1 /*
2  * awkgram.y --- yacc/bison parser
3  */
4 
5 /*
6  * Copyright (C) 1986, 1988, 1989, 1991-2021 the Free Software Foundation, Inc.
7  *
8  * This file is part of GAWK, the GNU implementation of the
9  * AWK Programming Language.
10  *
11  * GAWK is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 3 of the License, or
14  * (at your option) any later version.
15  *
16  * GAWK is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
24  */
25 
26 %{
27 #ifdef GAWKDEBUG
28 #define YYDEBUG 12
29 #endif
30 
31 #include "awk.h"
32 
33 #if defined(__STDC__) && __STDC__ < 1	/* VMS weirdness, maybe elsewhere */
34 #define signed /**/
35 #endif
36 
37 static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
38 #define  YYERROR_IS_DECLARED	1	/* for bison 3.8. sigh. */
39 static void error_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
40 static void lintwarn_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
41 static void warning_ln(int line, const char *m, ...) ATTRIBUTE_PRINTF_2;
42 static char *get_src_buf(void);
43 static int yylex(void);
44 int	yyparse(void);
45 static INSTRUCTION *snode(INSTRUCTION *subn, INSTRUCTION *op);
46 static char **check_params(char *fname, int pcount, INSTRUCTION *list);
47 static int install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist);
48 static NODE *mk_rexp(INSTRUCTION *exp);
49 static void param_sanity(INSTRUCTION *arglist);
50 static int parms_shadow(INSTRUCTION *pc, bool *shadow);
51 #ifndef NO_LINT
52 static int isnoeffect(OPCODE type);
53 #endif
54 static INSTRUCTION *make_assignable(INSTRUCTION *ip);
55 static void dumpintlstr(const char *str, size_t len);
56 static void dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2);
57 static bool include_source(INSTRUCTION *file, void **srcfile_p);
58 static bool load_library(INSTRUCTION *file, void **srcfile_p);
59 static void set_namespace(INSTRUCTION *ns, INSTRUCTION *comment);
60 static void next_sourcefile(void);
61 static char *tokexpand(void);
62 static NODE *set_profile_text(NODE *n, const char *str, size_t len);
63 static int check_qualified_special(char *token);
64 static char *qualify_name(const char *name, size_t len);
65 static INSTRUCTION *trailing_comment;
66 static INSTRUCTION *outer_comment;
67 static INSTRUCTION *interblock_comment;
68 static INSTRUCTION *pending_comment;
69 static INSTRUCTION *namespace_chain;
70 
71 #ifdef DEBUG_COMMENTS
72 static void
debug_print_comment_s(const char * name,INSTRUCTION * comment,int line)73 debug_print_comment_s(const char *name, INSTRUCTION *comment, int line)
74 {
75 	if (comment != NULL)
76 		fprintf(stderr, "%d: %s: <%.*s>\n", line, name,
77 				(int) (comment->memory->stlen - 1),
78 				comment->memory->stptr);
79 }
80 #define debug_print_comment(comment) \
81 	 debug_print_comment_s(# comment, comment, __LINE__)
82 #endif
83 
84 #define instruction(t)	bcalloc(t, 1, 0)
85 
86 static INSTRUCTION *mk_program(void);
87 static INSTRUCTION *append_rule(INSTRUCTION *pattern, INSTRUCTION *action);
88 static INSTRUCTION *mk_function(INSTRUCTION *fi, INSTRUCTION *def);
89 static INSTRUCTION *mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
90 		INSTRUCTION *elsep,	INSTRUCTION *false_branch);
91 static INSTRUCTION *mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1);
92 static INSTRUCTION *mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
93 		INSTRUCTION *incr, INSTRUCTION *body);
94 static void fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target);
95 static INSTRUCTION *mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op);
96 static INSTRUCTION *mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op);
97 static INSTRUCTION *mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op);
98 static INSTRUCTION *mk_getline(INSTRUCTION *op, INSTRUCTION *opt_var, INSTRUCTION *redir, int redirtype);
99 static int count_expressions(INSTRUCTION **list, bool isarg);
100 static INSTRUCTION *optimize_assignment(INSTRUCTION *exp);
101 static void add_lint(INSTRUCTION *list, LINTTYPE linttype);
102 
103 enum defref { FUNC_DEFINE, FUNC_USE, FUNC_EXT };
104 static void func_use(const char *name, enum defref how);
105 static void check_funcs(void);
106 
107 static ssize_t read_one_line(int fd, void *buffer, size_t count);
108 static int one_line_close(int fd);
109 static void merge_comments(INSTRUCTION *c1, INSTRUCTION *c2);
110 static INSTRUCTION *make_braced_statements(INSTRUCTION *lbrace, INSTRUCTION *stmts, INSTRUCTION *rbrace);
111 static void add_sign_to_num(NODE *n, char sign);
112 
113 static bool at_seen = false;
114 static bool want_source = false;
115 static bool want_regexp = false;	/* lexical scanning kludge */
116 static enum {
117 	FUNC_HEADER,
118 	FUNC_BODY,
119 	DONT_CHECK
120 } want_param_names = DONT_CHECK;	/* ditto */
121 static bool in_function;		/* parsing kludge */
122 static int rule = 0;
123 
124 const char *const ruletab[] = {
125 	"?",
126 	"BEGIN",
127 	"Rule",
128 	"END",
129 	"BEGINFILE",
130 	"ENDFILE",
131 };
132 
133 static bool in_print = false;	/* lexical scanning kludge for print */
134 static int in_parens = 0;	/* lexical scanning kludge for print */
135 static int sub_counter = 0;	/* array dimension counter for use in delete */
136 static char *lexptr;		/* pointer to next char during parsing */
137 static char *lexend;		/* end of buffer */
138 static char *lexptr_begin;	/* keep track of where we were for error msgs */
139 static char *lexeme;		/* beginning of lexeme for debugging */
140 static bool lexeof;		/* seen EOF for current source? */
141 static char *thisline = NULL;
142 static int in_braces = 0;	/* count braces for firstline, lastline in an 'action' */
143 static int lastline = 0;
144 static int firstline = 0;
145 static SRCFILE *sourcefile = NULL;	/* current program source */
146 static int lasttok = 0;
147 static bool eof_warned = false;	/* GLOBAL: want warning for each file */
148 static int break_allowed;	/* kludge for break */
149 static int continue_allowed;	/* kludge for continue */
150 
151 #define END_FILE	-1000
152 #define END_SRC  	-2000
153 
154 #define YYDEBUG_LEXER_TEXT (lexeme)
155 static char *tokstart = NULL;
156 static char *tok = NULL;
157 static char *tokend;
158 int errcount = 0;
159 
160 extern char *source;
161 extern int sourceline;
162 extern SRCFILE *srcfiles;
163 extern INSTRUCTION *rule_list;
164 extern int max_args;
165 extern NODE **args_array;
166 
167 const char awk_namespace[] = "awk";
168 const char *current_namespace = awk_namespace;
169 bool namespace_changed = false;
170 
171 static INSTRUCTION *rule_block[sizeof(ruletab)/sizeof(ruletab[0])];
172 
173 static INSTRUCTION *ip_rec;
174 static INSTRUCTION *ip_newfile;
175 static INSTRUCTION *ip_atexit = NULL;
176 static INSTRUCTION *ip_end;
177 static INSTRUCTION *ip_endfile;
178 static INSTRUCTION *ip_beginfile;
179 INSTRUCTION *main_beginfile;
180 static bool called_from_eval = false;
181 
182 static inline INSTRUCTION *list_create(INSTRUCTION *x);
183 static inline INSTRUCTION *list_append(INSTRUCTION *l, INSTRUCTION *x);
184 static inline INSTRUCTION *list_prepend(INSTRUCTION *l, INSTRUCTION *x);
185 static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2);
186 
187 extern double fmod(double x, double y);
188 
189 #define YYSTYPE INSTRUCTION *
190 %}
191 
192 %token FUNC_CALL NAME REGEXP FILENAME
193 %token YNUMBER YSTRING TYPED_REGEXP
194 %token RELOP IO_OUT IO_IN
195 %token ASSIGNOP ASSIGN MATCHOP CONCAT_OP
196 %token SUBSCRIPT
197 %token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
198 %token LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
199 %token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
200 %token LEX_BEGINFILE LEX_ENDFILE
201 %token LEX_GETLINE LEX_NEXTFILE
202 %token LEX_IN
203 %token LEX_AND LEX_OR INCREMENT DECREMENT
204 %token LEX_BUILTIN LEX_LENGTH
205 %token LEX_EOF
206 %token LEX_INCLUDE LEX_EVAL LEX_LOAD LEX_NAMESPACE
207 %token NEWLINE
208 
209 /* Lowest to highest */
210 %right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
211 %right '?' ':'
212 %left LEX_OR
213 %left LEX_AND
214 %left LEX_GETLINE
215 %nonassoc LEX_IN
216 %left FUNC_CALL LEX_BUILTIN LEX_LENGTH
217 %nonassoc ','
218 %left MATCHOP
219 %nonassoc RELOP '<' '>' IO_IN IO_OUT
220 %left CONCAT_OP
221 %left YSTRING YNUMBER TYPED_REGEXP
222 %left '+' '-'
223 %left '*' '/' '%'
224 %right '!' UNARY
225 %right '^'
226 %left INCREMENT DECREMENT
227 %left '$'
228 %left '(' ')'
229 %%
230 
231 program
232 	: /* empty */
233 	  { $$ = NULL; }
234 	| program rule
235 	  {
236 		rule = 0;
237 		yyerrok;
238 	  }
239 	| program nls
240 	  {
241 		if ($2 != NULL) {
242 			if ($1 == NULL)
243 				outer_comment = $2;
244 			else
245 				interblock_comment = $2;
246 		}
247 		$$ = $1;
248 	  }
249 	| program LEX_EOF
250 	  {
251 		next_sourcefile();
252 	  }
253 	| program error
254 	  {
255 		rule = 0;
256 		/*
257 		 * If errors, give up, don't produce an infinite
258 		 * stream of syntax error messages.
259 		 */
260   		/* yyerrok; */
261 	  }
262 	;
263 
264 rule
265 	: pattern action
266 	  {
267 		(void) append_rule($1, $2);
268 		if (pending_comment != NULL) {
269 			interblock_comment = pending_comment;
270 			pending_comment = NULL;
271 		}
272 	  }
273 	| pattern statement_term
274 	  {
275 		if (rule != Rule) {
276 			msg(_("%s blocks must have an action part"), ruletab[rule]);
277 			errcount++;
278 		} else if ($1 == NULL) {
279 			msg(_("each rule must have a pattern or an action part"));
280 			errcount++;
281 		} else {	/* pattern rule with non-empty pattern */
282 			if ($2 != NULL)
283 				list_append($1, $2);
284 			(void) append_rule($1, NULL);
285 		}
286 	  }
287 	| function_prologue action
288 	  {
289 		in_function = false;
290 		(void) mk_function($1, $2);
291 		want_param_names = DONT_CHECK;
292 		if (pending_comment != NULL) {
293 			interblock_comment = pending_comment;
294 			pending_comment = NULL;
295 		}
296 		yyerrok;
297 	  }
298 	| '@' LEX_INCLUDE source statement_term
299 	  {
300 		want_source = false;
301 		at_seen = false;
302 		if ($3 != NULL && $4 != NULL) {
303 			SRCFILE *s = (SRCFILE *) $3;
304 			s->comment = $4;
305 		}
306 		yyerrok;
307 	  }
308 	| '@' LEX_LOAD library statement_term
309 	  {
310 		want_source = false;
311 		at_seen = false;
312 		if ($3 != NULL && $4 != NULL) {
313 			SRCFILE *s = (SRCFILE *) $3;
314 			s->comment = $4;
315 		}
316 		yyerrok;
317 	  }
318 	| '@' LEX_NAMESPACE namespace statement_term
319 	  {
320 		want_source = false;
321 		at_seen = false;
322 
323 		// this frees $3 storage in all cases
324 		set_namespace($3, $4);
325 
326 		yyerrok;
327 	  }
328 	;
329 
330 source
331 	: FILENAME
332 	  {
333 		void *srcfile = NULL;
334 
335 		if (! include_source($1, & srcfile))
336 			YYABORT;
337 		efree($1->lextok);
338 		bcfree($1);
339 		$$ = (INSTRUCTION *) srcfile;
340 	  }
341 	| FILENAME error
342 	  { $$ = NULL; }
343 	| error
344 	  { $$ = NULL; }
345 	;
346 
347 library
348 	: FILENAME
349 	  {
350 		void *srcfile;
351 
352 		if (! load_library($1, & srcfile))
353 			YYABORT;
354 		efree($1->lextok);
355 		bcfree($1);
356 		$$ = (INSTRUCTION *) srcfile;
357 	  }
358 	| FILENAME error
359 	  { $$ = NULL; }
360 	| error
361 	  { $$ = NULL; }
362 	;
363 
364 namespace
365 	: FILENAME
366 	  { $$ = $1; }
367 	| FILENAME error
368 	  { $$ = NULL; }
369 	| error
370 	  { $$ = NULL; }
371 	;
372 
373 pattern
374 	: /* empty */
375 	  {
376 		rule = Rule;
377 		$$ = NULL;
378 	  }
379 	| exp
380 	  {
381 		rule = Rule;
382 	  }
383 
384 	| exp comma exp
385 	  {
386 		INSTRUCTION *tp;
387 
388 		add_lint($1, LINT_assign_in_cond);
389 		add_lint($3, LINT_assign_in_cond);
390 
391 		tp = instruction(Op_no_op);
392 		list_prepend($1, bcalloc(Op_line_range, !!do_pretty_print + 1, 0));
393 		$1->nexti->triggered = false;
394 		$1->nexti->target_jmp = $3->nexti;
395 
396 		list_append($1, instruction(Op_cond_pair));
397 		$1->lasti->line_range = $1->nexti;
398 		$1->lasti->target_jmp = tp;
399 
400 		list_append($3, instruction(Op_cond_pair));
401 		$3->lasti->line_range = $1->nexti;
402 		$3->lasti->target_jmp = tp;
403 		if (do_pretty_print) {
404 			($1->nexti + 1)->condpair_left = $1->lasti;
405 			($1->nexti + 1)->condpair_right = $3->lasti;
406 		}
407 		/* Put any comments in front of the range expression */
408 		if ($2 != NULL)
409 			$$ = list_append(list_merge(list_prepend($1, $2), $3), tp);
410 		else
411 			$$ = list_append(list_merge($1, $3), tp);
412 		rule = Rule;
413 	  }
414 	| LEX_BEGIN
415 	  {
416 		static int begin_seen = 0;
417 
418 		if (do_lint_old && ++begin_seen == 2)
419 			lintwarn_ln($1->source_line,
420 				_("old awk does not support multiple `BEGIN' or `END' rules"));
421 
422 		$1->in_rule = rule = BEGIN;
423 		$1->source_file = source;
424 		$$ = $1;
425 	  }
426 	| LEX_END
427 	  {
428 		static int end_seen = 0;
429 
430 		if (do_lint_old && ++end_seen == 2)
431 			lintwarn_ln($1->source_line,
432 				_("old awk does not support multiple `BEGIN' or `END' rules"));
433 
434 		$1->in_rule = rule = END;
435 		$1->source_file = source;
436 		$$ = $1;
437 	  }
438 	| LEX_BEGINFILE
439 	  {
440 		$1->in_rule = rule = BEGINFILE;
441 		$1->source_file = source;
442 		$$ = $1;
443 	  }
444 	| LEX_ENDFILE
445 	  {
446 		$1->in_rule = rule = ENDFILE;
447 		$1->source_file = source;
448 		$$ = $1;
449 	  }
450 	;
451 
452 action
453 	: l_brace statements r_brace opt_semi opt_nls
454 	  {
455 		INSTRUCTION *ip = make_braced_statements($1, $2, $3);
456 
457 		if ($3 != NULL && $5 != NULL) {
458 			merge_comments($3, $5);
459 			pending_comment = $3;
460 		} else if ($3 != NULL) {
461 			pending_comment = $3;
462 		} else if ($5 != NULL) {
463 			pending_comment = $5;
464 		}
465 
466 		$$ = ip;
467 	  }
468 	;
469 
470 func_name
471 	: NAME
472 	| FUNC_CALL
473 	  {
474 		const char *name = $1->lextok;
475 		char *qname = qualify_name(name, strlen(name));
476 
477 		if (qname != name) {
478 			efree((void *)name);
479 			$1->lextok = qname;
480 		}
481 		$$ = $1;
482 	  }
483 	| lex_builtin
484 	  {
485 		yyerror(_("`%s' is a built-in function, it cannot be redefined"),
486 					tokstart);
487 		YYABORT;
488 	  }
489 	| '@' LEX_EVAL
490 	  {
491 		$$ = $2;
492 		at_seen = false;
493 	  }
494 	;
495 
496 lex_builtin
497 	: LEX_BUILTIN
498 	| LEX_LENGTH
499 	;
500 
501 function_prologue
502 	: LEX_FUNCTION func_name '(' { want_param_names = FUNC_HEADER; } opt_param_list r_paren opt_nls
503 	  {
504 		INSTRUCTION *func_comment = NULL;
505 		// Merge any comments found in the parameter list with those
506 		// following the function header, associate the whole shebang
507 		// with the function as one block comment.
508 		if ($5 != NULL && $5->comment != NULL) {
509 			if ($7 != NULL) {
510 				merge_comments($5->comment, $7);
511 			}
512 			func_comment = $5->comment;
513 		} else if ($7 != NULL) {
514 			func_comment = $7;
515 		}
516 
517 		$1->source_file = source;
518 		$1->comment = func_comment;
519 		if (install_function($2->lextok, $1, $5) < 0)
520 			YYABORT;
521 		in_function = true;
522 		$2->lextok = NULL;
523 		bcfree($2);
524 		/* $5 already free'd in install_function */
525 		$$ = $1;
526 		want_param_names = FUNC_BODY;
527 	  }
528 	;
529 
530 regexp
531 	/*
532 	 * In this rule, want_regexp tells yylex that the next thing
533 	 * is a regexp so it should read up to the closing slash.
534 	 */
535 	: a_slash
536 		{ want_regexp = true; }
537 	  REGEXP	/* The terminating '/' is consumed by yylex(). */
538 		{
539 		  NODE *n, *exp;
540 		  char *re;
541 		  size_t len;
542 
543 		  re = $3->lextok;
544 		  $3->lextok = NULL;
545 		  len = strlen(re);
546 		  if (do_lint) {
547 			if (len == 0)
548 				lintwarn_ln($3->source_line,
549 					_("regexp constant `//' looks like a C++ comment, but is not"));
550 			else if (re[0] == '*' && re[len-1] == '*')
551 				/* possible C comment */
552 				lintwarn_ln($3->source_line,
553 					_("regexp constant `/%s/' looks like a C comment, but is not"), re);
554 		  }
555 
556 		  exp = make_str_node(re, len, ALREADY_MALLOCED);
557 		  n = make_regnode(Node_regex, exp);
558 		  if (n == NULL) {
559 			unref(exp);
560 			YYABORT;
561 		  }
562 		  $$ = $3;
563 		  $$->opcode = Op_match_rec;
564 		  $$->memory = n;
565 		}
566 	;
567 
568 typed_regexp
569 	: TYPED_REGEXP
570 		{
571 		  char *re;
572 		  size_t len;
573 
574 		  re = $1->lextok;
575 		  $1->lextok = NULL;
576 		  len = strlen(re);
577 
578 		  $$ = $1;
579 		  $$->opcode = Op_push_re;
580 		  $$->memory = make_typed_regex(re, len);
581 		}
582 
583 a_slash
584 	: '/'
585 	  { bcfree($1); }
586 	| SLASH_BEFORE_EQUAL
587 	;
588 
589 statements
590 	: /* empty */
591 	  { $$ = NULL; }
592 	| statements statement
593 	  {
594 		if ($2 == NULL) {
595 			$$ = $1;
596 		} else {
597 			add_lint($2, LINT_no_effect);
598 			if ($1 == NULL) {
599 				$$ = $2;
600 			} else {
601 				$$ = list_merge($1, $2);
602 			}
603 		}
604 
605 		if (trailing_comment != NULL) {
606 			$$ = list_append($$, trailing_comment);
607 			trailing_comment = NULL;
608 		}
609 
610 		yyerrok;
611 	  }
612 	| statements error
613 	  {	$$ = NULL; }
614 	;
615 
616 statement_term
617 	: nls		{ $$ = $1; }
618 	| semi opt_nls	{ $$ = $2; }
619 	;
620 
621 statement
622 	: semi opt_nls
623 	  {
624 		if ($2 != NULL) {
625 			INSTRUCTION *ip;
626 
627 			merge_comments($2, NULL);
628 			ip = list_create(instruction(Op_no_op));
629 			$$ = list_append(ip, $2);
630 		} else
631 			$$ = NULL;
632 	  }
633 	| l_brace statements r_brace
634 	  {
635 		trailing_comment = $3;	// NULL or comment
636 		$$ = make_braced_statements($1, $2, $3);
637 	  }
638 	| if_statement
639 	  {
640 		if (do_pretty_print)
641 			$$ = list_prepend($1, instruction(Op_exec_count));
642 		else
643 			$$ = $1;
644  	  }
645 	| LEX_SWITCH '(' exp r_paren opt_nls l_brace case_statements opt_nls r_brace
646 	  {
647 		INSTRUCTION *dflt, *curr = NULL, *cexp, *cstmt;
648 		INSTRUCTION *ip, *nextc, *tbreak;
649 		const char **case_values = NULL;
650 		int maxcount = 128;
651 		int case_count = 0;
652 		int i;
653 
654 		tbreak = instruction(Op_no_op);
655 		cstmt = list_create(tbreak);
656 		cexp = list_create(instruction(Op_pop));
657 		dflt = instruction(Op_jmp);
658 		dflt->target_jmp = tbreak;	/* if no case match and no explicit default */
659 
660 		if ($7 != NULL) {
661 			curr = $7->nexti;
662 			bcfree($7);	/* Op_list */
663 		}
664 		/*  else
665 			curr = NULL; */
666 
667 		for (; curr != NULL; curr = nextc) {
668 			INSTRUCTION *caseexp = curr->case_exp;
669 			INSTRUCTION *casestmt = curr->case_stmt;
670 
671 			nextc = curr->nexti;
672 			if (curr->opcode == Op_K_case) {
673 				if (caseexp->opcode == Op_push_i) {
674 					/* a constant scalar */
675 					char *caseval;
676 					caseval = force_string(caseexp->memory)->stptr;
677 					for (i = 0; i < case_count; i++) {
678 						if (strcmp(caseval, case_values[i]) == 0)
679 							error_ln(curr->source_line,
680 								_("duplicate case values in switch body: %s"), caseval);
681 					}
682 
683 					if (case_values == NULL)
684 						emalloc(case_values, const char **, sizeof(char *) * maxcount, "statement");
685 					else if (case_count >= maxcount) {
686 						maxcount += 128;
687 						erealloc(case_values, const char **, sizeof(char*) * maxcount, "statement");
688 					}
689 					case_values[case_count++] = caseval;
690 				} else {
691 					/* match a constant regex against switch expression. */
692 					(curr + 1)->match_exp = true;
693 				}
694 				curr->stmt_start = casestmt->nexti;
695 				curr->stmt_end	= casestmt->lasti;
696 				(void) list_prepend(cexp, curr);
697 				(void) list_prepend(cexp, caseexp);
698 			} else {
699 				if (dflt->target_jmp != tbreak)
700 					error_ln(curr->source_line,
701 						_("duplicate `default' detected in switch body"));
702 				else
703 					dflt->target_jmp = casestmt->nexti;
704 
705 				if (do_pretty_print) {
706 					curr->stmt_start = casestmt->nexti;
707 					curr->stmt_end = casestmt->lasti;
708 					(void) list_prepend(cexp, curr);
709 				} else
710 					bcfree(curr);
711 			}
712 
713 			cstmt = list_merge(casestmt, cstmt);
714 		}
715 
716 		if (case_values != NULL)
717 			efree(case_values);
718 
719 		ip = $3;
720 		if (do_pretty_print) {
721 			// first merge comments
722 			INSTRUCTION *head_comment = NULL;
723 
724 			if ($5 != NULL && $6 != NULL) {
725 				merge_comments($5, $6);
726 				head_comment = $5;
727 			} else if ($5 != NULL)
728 				head_comment = $5;
729 			else
730 				head_comment = $6;
731 
732 			$1->comment = head_comment;
733 
734 			(void) list_prepend(ip, $1);
735 			(void) list_prepend(ip, instruction(Op_exec_count));
736 			$1->target_break = tbreak;
737 			($1 + 1)->switch_start = cexp->nexti;
738 			($1 + 1)->switch_end = cexp->lasti;
739 			($1 + 1)->switch_end->comment = $9;
740 		}
741 		/* else
742 			$1 is NULL */
743 
744 		(void) list_append(cexp, dflt);
745 		(void) list_merge(ip, cexp);
746 		if ($8 != NULL)
747 			(void) list_append(cstmt, $8);
748 		$$ = list_merge(ip, cstmt);
749 
750 		break_allowed--;
751 		fix_break_continue(ip, tbreak, NULL);
752 	  }
753 	| LEX_WHILE '(' exp r_paren opt_nls statement
754 	  {
755 		/*
756 		 *    -----------------
757 		 * tc:
758 		 *         cond
759 		 *    -----------------
760 		 *    [Op_jmp_false tb   ]
761 		 *    -----------------
762 		 *         body
763 		 *    -----------------
764 		 *    [Op_jmp      tc    ]
765 		 * tb:[Op_no_op          ]
766 		 */
767 
768 		INSTRUCTION *ip, *tbreak, *tcont;
769 
770 		tbreak = instruction(Op_no_op);
771 		add_lint($3, LINT_assign_in_cond);
772 		tcont = $3->nexti;
773 		ip = list_append($3, instruction(Op_jmp_false));
774 		ip->lasti->target_jmp = tbreak;
775 
776 		if (do_pretty_print) {
777 			(void) list_append(ip, instruction(Op_exec_count));
778 			$1->target_break = tbreak;
779 			$1->target_continue = tcont;
780 			($1 + 1)->while_body = ip->lasti;
781 			(void) list_prepend(ip, $1);
782 		}
783 		/* else
784 			$1 is NULL */
785 
786 		if ($5 != NULL) {
787 			if ($6 == NULL)
788 				$6 = list_create(instruction(Op_no_op));
789 
790 			$5->memory->comment_type = BLOCK_COMMENT;
791 			$6 = list_prepend($6, $5);
792 		}
793 
794 		if ($6 != NULL)
795 			(void) list_merge(ip, $6);
796 		(void) list_append(ip, instruction(Op_jmp));
797 		ip->lasti->target_jmp = tcont;
798 		$$ = list_append(ip, tbreak);
799 
800 		break_allowed--;
801 		continue_allowed--;
802 		fix_break_continue(ip, tbreak, tcont);
803 	  }
804 	| LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
805 	  {
806 		/*
807 		 *    -----------------
808 		 * z:
809 		 *         body
810 		 *    -----------------
811 		 * tc:
812 		 *         cond
813 		 *    -----------------
814 		 *    [Op_jmp_true | z  ]
815 		 * tb:[Op_no_op         ]
816 		 */
817 
818 		INSTRUCTION *ip, *tbreak, *tcont;
819 
820 		tbreak = instruction(Op_no_op);
821 		tcont = $6->nexti;
822 		add_lint($6, LINT_assign_in_cond);
823 		if ($3 != NULL)
824 			ip = list_merge($3, $6);
825 		else
826 			ip = list_prepend($6, instruction(Op_no_op));
827 
828 		if ($2 != NULL)
829 			(void) list_prepend(ip, $2);
830 
831 		if (do_pretty_print)
832 			(void) list_prepend(ip, instruction(Op_exec_count));
833 
834 		(void) list_append(ip, instruction(Op_jmp_true));
835 		ip->lasti->target_jmp = ip->nexti;
836 		$$ = list_append(ip, tbreak);
837 
838 		break_allowed--;
839 		continue_allowed--;
840 		fix_break_continue(ip, tbreak, tcont);
841 
842 		if (do_pretty_print) {
843 			$1->target_break = tbreak;
844 			$1->target_continue = tcont;
845 			($1 + 1)->doloop_cond = tcont;
846 			$$ = list_prepend(ip, $1);
847 			bcfree($4);
848 			if ($8 != NULL)
849 				$1->comment = $8;
850 		}
851 		/* else
852 			$1 and $4 are NULLs */
853 	  }
854 	| LEX_FOR '(' NAME LEX_IN simple_variable r_paren opt_nls statement
855 	  {
856 		INSTRUCTION *ip;
857 		char *var_name = $3->lextok;
858 
859 		if ($8 != NULL
860 				&& $8->lasti->opcode == Op_K_delete
861 				&& $8->lasti->expr_count == 1
862 				&& $8->nexti->opcode == Op_push
863 				&& ($8->nexti->memory->type != Node_var || !($8->nexti->memory->var_update))
864 				&& strcmp($8->nexti->memory->vname, var_name) == 0
865 		) {
866 
867 		/*
868 		 * Efficiency hack.  Recognize the special case of
869 		 *
870 		 * 	for (iggy in foo)
871 		 * 		delete foo[iggy]
872 		 *
873 		 * and treat it as if it were
874 		 *
875 		 * 	delete foo
876 		 *
877 		 * Check that the body is a `delete a[i]' statement,
878 		 * and that both the loop var and array names match.
879 		 */
880 			NODE *arr = NULL;
881 
882 			ip = $8->nexti->nexti;
883 			if ($5->nexti->opcode == Op_push && $5->lasti == $5->nexti)
884 				arr = $5->nexti->memory;
885 			if (arr != NULL
886 					&& ip->opcode == Op_no_op
887 					&& ip->nexti->opcode == Op_push_array
888 					&& strcmp(ip->nexti->memory->vname, arr->vname) == 0
889 					&& ip->nexti->nexti == $8->lasti
890 			) {
891 				(void) make_assignable($8->nexti);
892 				$8->lasti->opcode = Op_K_delete_loop;
893 				$8->lasti->expr_count = 0;
894 				if ($1 != NULL)
895 					bcfree($1);
896 				efree(var_name);
897 				bcfree($3);
898 				bcfree($4);
899 				bcfree($5);
900 				if ($7 != NULL) {
901 					merge_comments($7, NULL);
902 					$8 = list_prepend($8, $7);
903 				}
904 				$$ = $8;
905 			} else
906 				goto regular_loop;
907 		} else {
908 			INSTRUCTION *tbreak, *tcont;
909 
910 			/*    [ Op_push_array a       ]
911 			 *    [ Op_arrayfor_init | ib ]
912 			 * ic:[ Op_arrayfor_incr | ib ]
913 			 *    [ Op_var_assign if any  ]
914 			 *
915 			 *              body
916 			 *
917 			 *    [Op_jmp | ic            ]
918 			 * ib:[Op_arrayfor_final      ]
919 			 */
920 regular_loop:
921 			ip = $5;
922 			ip->nexti->opcode = Op_push_array;
923 
924 			tbreak = instruction(Op_arrayfor_final);
925 			$4->opcode = Op_arrayfor_incr;
926 			$4->array_var = variable($3->source_line, var_name, Node_var);
927 			$4->target_jmp = tbreak;
928 			tcont = $4;
929 			$3->opcode = Op_arrayfor_init;
930 			$3->target_jmp = tbreak;
931 			(void) list_append(ip, $3);
932 
933 			if (do_pretty_print) {
934 				$1->opcode = Op_K_arrayfor;
935 				$1->target_continue = tcont;
936 				$1->target_break = tbreak;
937 				(void) list_append(ip, $1);
938 			}
939 			/* else
940 				$1 is NULL */
941 
942 			/* add update_FOO instruction if necessary */
943 			if ($4->array_var->type == Node_var && $4->array_var->var_update) {
944 				(void) list_append(ip, instruction(Op_var_update));
945 				ip->lasti->update_var = $4->array_var->var_update;
946 			}
947 			(void) list_append(ip, $4);
948 
949 			/* add set_FOO instruction if necessary */
950 			if ($4->array_var->type == Node_var && $4->array_var->var_assign) {
951 				(void) list_append(ip, instruction(Op_var_assign));
952 				ip->lasti->assign_var = $4->array_var->var_assign;
953 			}
954 
955 			if (do_pretty_print) {
956 				(void) list_append(ip, instruction(Op_exec_count));
957 				($1 + 1)->forloop_cond = $4;
958 				($1 + 1)->forloop_body = ip->lasti;
959 			}
960 
961 			if ($7 != NULL)
962 				merge_comments($7, NULL);
963 
964 			if ($8 != NULL) {
965 				if ($7 != NULL)
966 					$8 = list_prepend($8, $7);
967 				(void) list_merge(ip, $8);
968 			} else if ($7 != NULL)
969 				(void) list_append(ip, $7);
970 
971 			(void) list_append(ip, instruction(Op_jmp));
972 			ip->lasti->target_jmp = $4;
973 			$$ = list_append(ip, tbreak);
974 			fix_break_continue(ip, tbreak, tcont);
975 		}
976 
977 		break_allowed--;
978 		continue_allowed--;
979 	  }
980 	| LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
981 	  {
982 		if ($5 != NULL) {
983 			merge_comments($5, NULL);
984 			$1->comment = $5;
985 		}
986 		if ($8 != NULL) {
987 			merge_comments($8, NULL);
988 			if ($1->comment == NULL) {
989 				$8->memory->comment_type = FOR_COMMENT;
990 				$1->comment = $8;
991 			} else
992 				$1->comment->comment = $8;
993 		}
994 		if ($11 != NULL)
995 			$12 = list_prepend($12, $11);
996 		add_lint($6, LINT_assign_in_cond);
997 		$$ = mk_for_loop($1, $3, $6, $9, $12);
998 
999 		break_allowed--;
1000 		continue_allowed--;
1001 	  }
1002 	| LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
1003 	  {
1004 		if ($5 != NULL) {
1005 			merge_comments($5, NULL);
1006 			$1->comment = $5;
1007 		}
1008 		if ($7 != NULL) {
1009 			merge_comments($7, NULL);
1010 			if ($1->comment == NULL) {
1011 				$7->memory->comment_type = FOR_COMMENT;
1012 				$1->comment = $7;
1013 			} else
1014 				$1->comment->comment = $7;
1015 		}
1016 		if ($10 != NULL)
1017 			$11 = list_prepend($11, $10);
1018 		$$ = mk_for_loop($1, $3, (INSTRUCTION *) NULL, $8, $11);
1019 
1020 		break_allowed--;
1021 		continue_allowed--;
1022 	  }
1023 	| non_compound_stmt
1024 	  {
1025 		if (do_pretty_print)
1026 			$$ = list_prepend($1, instruction(Op_exec_count));
1027 		else
1028 			$$ = $1;
1029 	  }
1030 	;
1031 
1032 non_compound_stmt
1033 	: LEX_BREAK statement_term
1034 	  {
1035 		if (! break_allowed)
1036 			error_ln($1->source_line,
1037 				_("`break' is not allowed outside a loop or switch"));
1038 		$1->target_jmp = NULL;
1039 		$$ = list_create($1);
1040 		if ($2 != NULL)
1041 			$$ = list_append($$, $2);
1042 	  }
1043 	| LEX_CONTINUE statement_term
1044 	  {
1045 		if (! continue_allowed)
1046 			error_ln($1->source_line,
1047 				_("`continue' is not allowed outside a loop"));
1048 		$1->target_jmp = NULL;
1049 		$$ = list_create($1);
1050 		if ($2 != NULL)
1051 			$$ = list_append($$, $2);
1052 	  }
1053 	| LEX_NEXT statement_term
1054 	  {
1055 		/* if inside function (rule = 0), resolve context at run-time */
1056 		if (rule && rule != Rule)
1057 			error_ln($1->source_line,
1058 				_("`next' used in %s action"), ruletab[rule]);
1059 		$1->target_jmp = ip_rec;
1060 		$$ = list_create($1);
1061 		if ($2 != NULL)
1062 			$$ = list_append($$, $2);
1063 	  }
1064 	| LEX_NEXTFILE statement_term
1065 	  {
1066 		/* if inside function (rule = 0), resolve context at run-time */
1067 		if (rule == BEGIN || rule == END || rule == ENDFILE)
1068 			error_ln($1->source_line,
1069 				_("`nextfile' used in %s action"), ruletab[rule]);
1070 
1071 		$1->target_newfile = ip_newfile;
1072 		$1->target_endfile = ip_endfile;
1073 		$$ = list_create($1);
1074 		if ($2 != NULL)
1075 			$$ = list_append($$, $2);
1076 	  }
1077 	| LEX_EXIT opt_exp statement_term
1078 	  {
1079 		/* Initialize the two possible jump targets, the actual target
1080 		 * is resolved at run-time.
1081 		 */
1082 		$1->target_end = ip_end;	/* first instruction in end_block */
1083 		$1->target_atexit = ip_atexit;	/* cleanup and go home */
1084 
1085 		if ($2 == NULL) {
1086 			$$ = list_create($1);
1087 			(void) list_prepend($$, instruction(Op_push_i));
1088 			$$->nexti->memory = dupnode(Nnull_string);
1089 		} else
1090 			$$ = list_append($2, $1);
1091 		if ($3 != NULL)
1092 			$$ = list_append($$, $3);
1093 	  }
1094 	| LEX_RETURN
1095 	  {
1096 		if (! in_function)
1097 			yyerror(_("`return' used outside function context"));
1098 	  } opt_fcall_exp statement_term {
1099 		if (called_from_eval)
1100 			$1->opcode = Op_K_return_from_eval;
1101 
1102 		if ($3 == NULL) {
1103 			$$ = list_create($1);
1104 			(void) list_prepend($$, instruction(Op_push_i));
1105 			$$->nexti->memory = dupnode(Nnull_string);
1106 		} else
1107 			$$ = list_append($3, $1);
1108 		if ($4 != NULL)
1109 			$$ = list_append($$, $4);
1110 	  }
1111 	| simple_stmt statement_term
1112 	  {
1113 		if ($2 != NULL)
1114 			$$ = list_append($1, $2);
1115 		else
1116 			$$ = $1;
1117 	  }
1118 	;
1119 
1120 	/*
1121 	 * A simple_stmt exists to satisfy a constraint in the POSIX
1122 	 * grammar allowing them to occur as the 1st and 3rd parts
1123 	 * in a `for (...;...;...)' loop.  This is a historical oddity
1124 	 * inherited from Unix awk, not at all documented in the AK&W
1125 	 * awk book.  We support it, as this was reported as a bug.
1126 	 * We don't bother to document it though. So there.
1127 	 */
1128 simple_stmt
1129 	: print { in_print = true; in_parens = 0; } print_expression_list output_redir
1130 	  {
1131 		/*
1132 		 * Optimization: plain `print' has no expression list, so $3 is null.
1133 		 * If $3 is NULL or is a bytecode list for $0 use Op_K_print_rec,
1134 		 * which is faster for these two cases.
1135 		 */
1136 
1137 		if (do_optimize && $1->opcode == Op_K_print &&
1138 			($3 == NULL
1139 				|| ($3->lasti->opcode == Op_field_spec
1140 					&& $3->nexti->nexti->nexti == $3->lasti
1141 					&& $3->nexti->nexti->opcode == Op_push_i
1142 					&& $3->nexti->nexti->memory->type == Node_val)
1143 			)
1144 		) {
1145 			static bool warned = false;
1146 			/*   -----------------
1147 			 *      output_redir
1148 			 *    [ redirect exp ]
1149 			 *   -----------------
1150 			 *     expression_list
1151 			 *   ------------------
1152 			 *    [Op_K_print_rec | NULL | redir_type | expr_count]
1153 			 */
1154 
1155 			if ($3 != NULL) {
1156 				NODE *n = $3->nexti->nexti->memory;
1157 
1158 				if ((n->flags & (STRING|STRCUR)) != 0 || ! is_zero(n))
1159 					goto regular_print;
1160 
1161 				bcfree($3->lasti);			/* Op_field_spec */
1162 				unref(n);				/* Node_val */
1163 				bcfree($3->nexti->nexti);		/* Op_push_i */
1164 				bcfree($3->nexti);			/* Op_list */
1165 				bcfree($3);				/* Op_list */
1166 			} else {
1167 				if (do_lint && (rule == BEGIN || rule == END) && ! warned) {
1168 					warned = true;
1169 					lintwarn_ln($1->source_line,
1170 		_("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
1171 				}
1172 			}
1173 
1174 			$1->expr_count = 0;
1175 			$1->opcode = Op_K_print_rec;
1176 			if ($4 == NULL) {    /* no redircetion */
1177 				$1->redir_type = redirect_none;
1178 				$$ = list_create($1);
1179 			} else {
1180 				INSTRUCTION *ip;
1181 				ip = $4->nexti;
1182 				$1->redir_type = ip->redir_type;
1183 				$4->nexti = ip->nexti;
1184 				bcfree(ip);
1185 				$$ = list_append($4, $1);
1186 			}
1187 		} else {
1188 			/*   -----------------
1189 			 *    [ output_redir    ]
1190 			 *    [ redirect exp    ]
1191 			 *   -----------------
1192 			 *    [ expression_list ]
1193 			 *   ------------------
1194 			 *    [$1 | NULL | redir_type | expr_count]
1195 			 *
1196 			 */
1197 regular_print:
1198 			if ($4 == NULL) {		/* no redirection */
1199 				if ($3 == NULL)	{	/* print/printf without arg */
1200 					$1->expr_count = 0;
1201 					if ($1->opcode == Op_K_print)
1202 						$1->opcode = Op_K_print_rec;
1203 					$1->redir_type = redirect_none;
1204 					$$ = list_create($1);
1205 				} else {
1206 					INSTRUCTION *t = $3;
1207 					$1->expr_count = count_expressions(&t, false);
1208 					$1->redir_type = redirect_none;
1209 					$$ = list_append(t, $1);
1210 				}
1211 			} else {
1212 				INSTRUCTION *ip;
1213 				ip = $4->nexti;
1214 				$1->redir_type = ip->redir_type;
1215 				$4->nexti = ip->nexti;
1216 				bcfree(ip);
1217 				if ($3 == NULL) {
1218 					$1->expr_count = 0;
1219 					if ($1->opcode == Op_K_print)
1220 						$1->opcode = Op_K_print_rec;
1221 					$$ = list_append($4, $1);
1222 				} else {
1223 					INSTRUCTION *t = $3;
1224 					$1->expr_count = count_expressions(&t, false);
1225 					$$ = list_append(list_merge($4, t), $1);
1226 				}
1227 			}
1228 		}
1229 	  }
1230 
1231 	| LEX_DELETE NAME { sub_counter = 0; } delete_subscript_list
1232 	  {
1233 		char *arr = $2->lextok;
1234 
1235 		$2->opcode = Op_push_array;
1236 		$2->memory = variable($2->source_line, arr, Node_var_new);
1237 
1238 		if (! do_posix && ! do_traditional) {
1239 			if ($2->memory == symbol_table)
1240 				fatal(_("`delete' is not allowed with SYMTAB"));
1241 			else if ($2->memory == func_table)
1242 				fatal(_("`delete' is not allowed with FUNCTAB"));
1243 		}
1244 
1245 		if ($4 == NULL) {
1246 			/*
1247 			 * As of September 2012, POSIX has added support
1248 			 * for `delete array'. See:
1249 			 * http://austingroupbugs.net/view.php?id=544
1250 			 *
1251 			 * Thanks to Nathan Weeks for the initiative.
1252 			 *
1253 			 * Thus we no longer warn or check do_posix.
1254 			 * Also, since BWK awk supports it, we don't have to
1255 			 * check do_traditional either.
1256 			 */
1257 			$1->expr_count = 0;
1258 			$$ = list_append(list_create($2), $1);
1259 		} else {
1260 			$1->expr_count = sub_counter;
1261 			$$ = list_append(list_append($4, $2), $1);
1262 		}
1263 	  }
1264 	| LEX_DELETE '(' NAME ')'
1265 		  /*
1266 		   * this is for tawk compatibility. maybe the warnings
1267 		   * should always be done.
1268 		   */
1269 	  {
1270 		static bool warned = false;
1271 		char *arr = $3->lextok;
1272 
1273 		if (do_lint && ! warned) {
1274 			warned = true;
1275 			lintwarn_ln($1->source_line,
1276 				_("`delete(array)' is a non-portable tawk extension"));
1277 		}
1278 		if (do_traditional) {
1279 			error_ln($1->source_line,
1280 				_("`delete(array)' is a non-portable tawk extension"));
1281 		}
1282 		$3->memory = variable($3->source_line, arr, Node_var_new);
1283 		$3->opcode = Op_push_array;
1284 		$1->expr_count = 0;
1285 		$$ = list_append(list_create($3), $1);
1286 
1287 		if (! do_posix && ! do_traditional) {
1288 			if ($3->memory == symbol_table)
1289 				fatal(_("`delete' is not allowed with SYMTAB"));
1290 			else if ($3->memory == func_table)
1291 				fatal(_("`delete' is not allowed with FUNCTAB"));
1292 		}
1293 	  }
1294 	| exp
1295 	  {
1296 		$$ = optimize_assignment($1);
1297 	  }
1298 	;
1299 
1300 opt_simple_stmt
1301 	: /* empty */
1302 	  { $$ = NULL; }
1303 	| simple_stmt
1304 	  { $$ = $1; }
1305 	;
1306 
1307 case_statements
1308 	: /* empty */
1309 	  { $$ = NULL; }
1310 	| case_statements case_statement
1311 	  {
1312 		if ($1 == NULL)
1313 			$$ = list_create($2);
1314 		else
1315 			$$ = list_prepend($1, $2);
1316 	  }
1317 	| case_statements error
1318 	  { $$ = NULL; }
1319 	;
1320 
1321 case_statement
1322 	: LEX_CASE case_value colon opt_nls statements
1323 	  {
1324 		INSTRUCTION *casestmt = $5;
1325 		if ($5 == NULL)
1326 			casestmt = list_create(instruction(Op_no_op));
1327 		if (do_pretty_print)
1328 			(void) list_prepend(casestmt, instruction(Op_exec_count));
1329 		$1->case_exp = $2;
1330 		$1->case_stmt = casestmt;
1331 		$1->comment = $4;
1332 		bcfree($3);
1333 		$$ = $1;
1334 	  }
1335 	| LEX_DEFAULT colon opt_nls statements
1336 	  {
1337 		INSTRUCTION *casestmt = $4;
1338 		if ($4 == NULL)
1339 			casestmt = list_create(instruction(Op_no_op));
1340 		if (do_pretty_print)
1341 			(void) list_prepend(casestmt, instruction(Op_exec_count));
1342 		bcfree($2);
1343 		$1->case_stmt = casestmt;
1344 		$1->comment = $3;
1345 		$$ = $1;
1346 	  }
1347 	;
1348 
1349 case_value
1350 	: YNUMBER
1351 	  {	$$ = $1; }
1352 	| '-' YNUMBER    %prec UNARY
1353 	  {
1354 		NODE *n = $2->memory;
1355 		(void) force_number(n);
1356 		negate_num(n);
1357 		bcfree($1);
1358 		$$ = $2;
1359 	  }
1360 	| '+' YNUMBER    %prec UNARY
1361 	  {
1362 		NODE *n = $2->lasti->memory;
1363 		bcfree($1);
1364 		add_sign_to_num(n, '+');
1365 		$$ = $2;
1366 	  }
1367 	| YSTRING
1368 	  {	$$ = $1; }
1369 	| regexp
1370 	  {
1371 		if ($1->memory->type == Node_regex)
1372 			$1->opcode = Op_push_re;
1373 		else
1374 			$1->opcode = Op_push;
1375 		$$ = $1;
1376 	  }
1377 	| typed_regexp
1378 	  {
1379 		assert(($1->memory->flags & REGEX) == REGEX);
1380 		$1->opcode = Op_push_re;
1381 		$$ = $1;
1382 	  }
1383 	;
1384 
1385 print
1386 	: LEX_PRINT
1387 	  { $$ = $1; }
1388 	| LEX_PRINTF
1389 	  { $$ = $1; }
1390 	;
1391 
1392 	/*
1393 	 * Note: ``print(x)'' is already parsed by the first rule,
1394 	 * so there is no good in covering it by the second one too.
1395 	 */
1396 print_expression_list
1397 	: opt_expression_list
1398 	| '(' expression_list r_paren
1399 	  {
1400 		$$ = $2;
1401 	  }
1402 	;
1403 
1404 output_redir
1405 	: /* empty */
1406 	  {
1407 		in_print = false;
1408 		in_parens = 0;
1409 		$$ = NULL;
1410 	  }
1411 	| IO_OUT { in_print = false; in_parens = 0; } common_exp
1412 	  {
1413 		if ($1->redir_type == redirect_twoway
1414 		    	&& $3->lasti->opcode == Op_K_getline_redir
1415 		   	 	&& $3->lasti->redir_type == redirect_twoway)
1416 			yyerror(_("multistage two-way pipelines don't work"));
1417 		if (do_lint && $1->redir_type == redirect_output && $3->lasti->opcode == Op_concat)
1418 			lintwarn(_("concatenation as I/O `>' redirection target is ambiguous"));
1419 		$$ = list_prepend($3, $1);
1420 	  }
1421 	;
1422 
1423 if_statement
1424 	: LEX_IF '(' exp r_paren opt_nls statement
1425 	  {
1426 		if ($5 != NULL)
1427 			$1->comment = $5;
1428 		add_lint($3, LINT_assign_in_cond);
1429 		$$ = mk_condition($3, $1, $6, NULL, NULL);
1430 	  }
1431 	| LEX_IF '(' exp r_paren opt_nls statement
1432 	     LEX_ELSE opt_nls statement
1433 	  {
1434 		if ($5 != NULL)
1435 			$1->comment = $5;
1436 		if ($8 != NULL)
1437 			$7->comment = $8;
1438 		add_lint($3, LINT_assign_in_cond);
1439 		$$ = mk_condition($3, $1, $6, $7, $9);
1440 	  }
1441 	;
1442 
1443 nls
1444 	: NEWLINE
1445 	  {
1446 		$$ = $1;
1447 	  }
1448 	| nls NEWLINE
1449 	  {
1450 		if ($1 != NULL && $2 != NULL) {
1451 			if ($1->memory->comment_type == EOL_COMMENT) {
1452 				assert($2->memory->comment_type == BLOCK_COMMENT);
1453 				$1->comment = $2;	// chain them
1454 			} else {
1455 				merge_comments($1, $2);
1456 			}
1457 
1458 			$$ = $1;
1459 		} else if ($1 != NULL) {
1460 			$$ = $1;
1461 		} else if ($2 != NULL) {
1462 			$$ = $2;
1463 		} else
1464 			$$ = NULL;
1465 	  }
1466 	;
1467 
1468 opt_nls
1469 	: /* empty */
1470 	  { $$ = NULL; }
1471 	| nls
1472 	  { $$ = $1; }
1473 	;
1474 
1475 input_redir
1476 	: /* empty */
1477 	  { $$ = NULL; }
1478 	| '<' simp_exp
1479 	  {
1480 		bcfree($1);
1481 		$$ = $2;
1482 	  }
1483 	;
1484 
1485 opt_param_list
1486 	: /* empty */
1487 	  { $$ = NULL; }
1488 	| param_list
1489 	  { $$ = $1; }
1490 	;
1491 
1492 param_list
1493 	: NAME
1494 	  {
1495 		$1->param_count = 0;
1496 		$$ = list_create($1);
1497 	  }
1498 	| param_list comma NAME
1499 	  {
1500 		if ($1 != NULL && $3 != NULL) {
1501 			$3->param_count = $1->lasti->param_count + 1;
1502 			$$ = list_append($1, $3);
1503 			yyerrok;
1504 
1505 			// newlines are allowed after commas, catch any comments
1506 			if ($2 != NULL) {
1507 				if ($1->comment != NULL)
1508 					merge_comments($1->comment, $2);
1509 				else
1510 					$1->comment = $2;
1511 			}
1512 		} else
1513 			$$ = NULL;
1514 	  }
1515 	| error
1516 	  { $$ = NULL; }
1517 	| param_list error
1518 	  { $$ = $1; }
1519 	| param_list comma error
1520 	  { $$ = $1; }
1521 	;
1522 
1523 /* optional expression, as in for loop */
1524 opt_exp
1525 	: /* empty */
1526 	  { $$ = NULL; }
1527 	| exp
1528 	  { $$ = $1; }
1529 	;
1530 
1531 opt_expression_list
1532 	: /* empty */
1533 	  { $$ = NULL; }
1534 	| expression_list
1535 	  { $$ = $1; }
1536 	;
1537 
1538 expression_list
1539 	: exp
1540 	  {	$$ = mk_expression_list(NULL, $1); }
1541 	| expression_list comma exp
1542 	  {
1543 		if ($2 != NULL)
1544 			$1->lasti->comment = $2;
1545 		$$ = mk_expression_list($1, $3);
1546 		yyerrok;
1547 	  }
1548 	| error
1549 	  { $$ = NULL; }
1550 	| expression_list error
1551 	  {
1552 		/*
1553 		 * Returning the expression list instead of NULL lets
1554 		 * snode get a list of arguments that it can count.
1555 		 */
1556 		$$ = $1;
1557 	  }
1558 	| expression_list error exp
1559 	  {
1560 		/* Ditto */
1561 		$$ = mk_expression_list($1, $3);
1562 	  }
1563 	| expression_list comma error
1564 	  {
1565 		/* Ditto */
1566 		if ($2 != NULL)
1567 			$1->lasti->comment = $2;
1568 		$$ = $1;
1569 	  }
1570 	;
1571 
1572 opt_fcall_expression_list
1573 	: /* empty */
1574 	  { $$ = NULL; }
1575 	| fcall_expression_list
1576 	  { $$ = $1; }
1577 	;
1578 
1579 fcall_expression_list
1580 	: fcall_exp
1581 	  {	$$ = mk_expression_list(NULL, $1); }
1582 	| fcall_expression_list comma fcall_exp
1583 	  {
1584 		if ($2 != NULL)
1585 			$1->lasti->comment = $2;
1586 		$$ = mk_expression_list($1, $3);
1587 		yyerrok;
1588 	  }
1589 	| error
1590 	  { $$ = NULL; }
1591 	| fcall_expression_list error
1592 	  {
1593 		/*
1594 		 * Returning the expression list instead of NULL lets
1595 		 * snode get a list of arguments that it can count.
1596 		 */
1597 		$$ = $1;
1598 	  }
1599 	| fcall_expression_list error fcall_exp
1600 	  {
1601 		/* Ditto */
1602 		$$ = mk_expression_list($1, $3);
1603 	  }
1604 	| fcall_expression_list comma error
1605 	  {
1606 		/* Ditto */
1607 		if ($2 != NULL)
1608 			$1->comment = $2;
1609 		$$ = $1;
1610 	  }
1611 	;
1612 
1613 fcall_exp
1614 	: exp { $$ = $1; }
1615 	| typed_regexp { $$ = list_create($1); }
1616 	;
1617 
1618 opt_fcall_exp
1619 	: /* empty */
1620 	  { $$ = NULL; }
1621 	| fcall_exp { $$ = $1; }
1622 	;
1623 
1624 /* Expressions, not including the comma operator.  */
1625 exp
1626 	: variable assign_operator exp %prec ASSIGNOP
1627 	  {
1628 		if (do_lint && $3->lasti->opcode == Op_match_rec)
1629 			lintwarn_ln($2->source_line,
1630 				_("regular expression on right of assignment"));
1631 		$$ = mk_assignment($1, $3, $2);
1632 	  }
1633 	| variable ASSIGN typed_regexp %prec ASSIGNOP
1634 	  {
1635 		$$ = mk_assignment($1, list_create($3), $2);
1636 	  }
1637 	| exp LEX_AND exp
1638 	  {	$$ = mk_boolean($1, $3, $2); }
1639 	| exp LEX_OR exp
1640 	  {	$$ = mk_boolean($1, $3, $2); }
1641 	| exp MATCHOP typed_regexp
1642 	  {
1643 		if ($1->lasti->opcode == Op_match_rec)
1644 			warning_ln($2->source_line,
1645 				_("regular expression on left of `~' or `!~' operator"));
1646 
1647 		assert($3->opcode == Op_push_re
1648 			&& ($3->memory->flags & REGEX) != 0);
1649 		/* RHS is @/.../ */
1650 		$2->memory = $3->memory;
1651 		bcfree($3);
1652 		$$ = list_append($1, $2);
1653 	  }
1654 	| exp MATCHOP exp
1655 	  {
1656 		if ($1->lasti->opcode == Op_match_rec)
1657 			warning_ln($2->source_line,
1658 				_("regular expression on left of `~' or `!~' operator"));
1659 
1660 		if ($3->lasti == $3->nexti && $3->nexti->opcode == Op_match_rec) {
1661 			/* RHS is /.../ */
1662 			$2->memory = $3->nexti->memory;
1663 			bcfree($3->nexti);	/* Op_match_rec */
1664 			bcfree($3);			/* Op_list */
1665 			$$ = list_append($1, $2);
1666 		} else {
1667 			$2->memory = make_regnode(Node_dynregex, NULL);
1668 			$$ = list_append(list_merge($1, $3), $2);
1669 		}
1670 	  }
1671 	| exp LEX_IN simple_variable
1672 	  {
1673 		if (do_lint_old)
1674 			lintwarn_ln($2->source_line,
1675 				_("old awk does not support the keyword `in' except after `for'"));
1676 		$3->nexti->opcode = Op_push_array;
1677 		$2->opcode = Op_in_array;
1678 		$2->expr_count = 1;
1679 		$$ = list_append(list_merge($1, $3), $2);
1680 	  }
1681 	| exp a_relop exp %prec RELOP
1682 	  {
1683 		if (do_lint && $3->lasti->opcode == Op_match_rec)
1684 			lintwarn_ln($2->source_line,
1685 				_("regular expression on right of comparison"));
1686 		$$ = list_append(list_merge($1, $3), $2);
1687 	  }
1688 	| exp '?' exp ':' exp
1689 	  { $$ = mk_condition($1, $2, $3, $4, $5); }
1690 	| common_exp
1691 	  { $$ = $1; }
1692 	;
1693 
1694 assign_operator
1695 	: ASSIGN
1696 	  { $$ = $1; }
1697 	| ASSIGNOP
1698 	  { $$ = $1; }
1699 	| SLASH_BEFORE_EQUAL ASSIGN   /* `/=' */
1700 	  {
1701 		$2->opcode = Op_assign_quotient;
1702 		$$ = $2;
1703 	  }
1704 	;
1705 
1706 relop_or_less
1707 	: RELOP
1708 	  { $$ = $1; }
1709 	| '<'
1710 	  { $$ = $1; }
1711 	;
1712 
1713 a_relop
1714 	: relop_or_less
1715 	  { $$ = $1; }
1716 	| '>'
1717 	  { $$ = $1; }
1718 	;
1719 
1720 common_exp
1721 	: simp_exp
1722 	  { $$ = $1; }
1723 	| simp_exp_nc
1724 	  { $$ = $1; }
1725 	| common_exp simp_exp %prec CONCAT_OP
1726 	  {
1727 		int count = 2;
1728 		bool is_simple_var = false;
1729 
1730 		if ($1->lasti->opcode == Op_concat) {
1731 			/* multiple (> 2) adjacent strings optimization */
1732 			is_simple_var = ($1->lasti->concat_flag & CSVAR) != 0;
1733 			count = $1->lasti->expr_count + 1;
1734 			$1->lasti->opcode = Op_no_op;
1735 		} else {
1736 			is_simple_var = ($1->nexti->opcode == Op_push
1737 					&& $1->lasti == $1->nexti); /* first exp. is a simple
1738 					                             * variable?; kludge for use
1739 					                             * in Op_assign_concat.
1740 		 			                             */
1741 		}
1742 
1743 		if (do_optimize
1744 			&& $1->nexti == $1->lasti && $1->nexti->opcode == Op_push_i
1745 			&& $2->nexti == $2->lasti && $2->nexti->opcode == Op_push_i
1746 		) {
1747 			NODE *n1 = $1->nexti->memory;
1748 			NODE *n2 = $2->nexti->memory;
1749 			size_t nlen;
1750 
1751 			// 1.5 ""   # can't fold this if program mucks with CONVFMT.
1752 			// See test #12 in test/posix.awk.
1753 			// Also can't fold if one or the other is translatable.
1754 			if ((n1->flags & (NUMBER|NUMINT|INTLSTR)) != 0 || (n2->flags & (NUMBER|NUMINT|INTLSTR)) != 0)
1755 				goto plain_concat;
1756 
1757 			n1 = force_string(n1);
1758 			n2 = force_string(n2);
1759 			nlen = n1->stlen + n2->stlen;
1760 			erealloc(n1->stptr, char *, nlen + 1, "constant fold");
1761 			memcpy(n1->stptr + n1->stlen, n2->stptr, n2->stlen);
1762 			n1->stlen = nlen;
1763 			n1->stptr[nlen] = '\0';
1764 			n1->flags &= ~(NUMCUR|NUMBER|NUMINT);
1765 			n1->flags |= (STRING|STRCUR);
1766 			unref(n2);
1767 			bcfree($2->nexti);
1768 			bcfree($2);
1769 			$$ = $1;
1770 		} else {
1771 	plain_concat:
1772 			$$ = list_append(list_merge($1, $2), instruction(Op_concat));
1773 			$$->lasti->concat_flag = (is_simple_var ? CSVAR : 0);
1774 			$$->lasti->expr_count = count;
1775 			if (count > max_args)
1776 				max_args = count;
1777 		}
1778 	  }
1779 	;
1780 
1781 simp_exp
1782 	: non_post_simp_exp
1783 	/* Binary operators in order of decreasing precedence.  */
1784 	| simp_exp '^' simp_exp
1785 	  { $$ = mk_binary($1, $3, $2); }
1786 	| simp_exp '*' simp_exp
1787 	  { $$ = mk_binary($1, $3, $2); }
1788 	| simp_exp '/' simp_exp
1789 	  { $$ = mk_binary($1, $3, $2); }
1790 	| simp_exp '%' simp_exp
1791 	  { $$ = mk_binary($1, $3, $2); }
1792 	| simp_exp '+' simp_exp
1793 	  { $$ = mk_binary($1, $3, $2); }
1794 	| simp_exp '-' simp_exp
1795 	  { $$ = mk_binary($1, $3, $2); }
1796 	| LEX_GETLINE opt_variable input_redir
1797 	  {
1798 		/*
1799 		 * In BEGINFILE/ENDFILE, allow `getline [var] < file'
1800 		 */
1801 
1802 		if ((rule == BEGINFILE || rule == ENDFILE) && $3 == NULL)
1803 			error_ln($1->source_line,
1804 				 _("non-redirected `getline' invalid inside `%s' rule"), ruletab[rule]);
1805 		if (do_lint && rule == END && $3 == NULL)
1806 			lintwarn_ln($1->source_line,
1807 				_("non-redirected `getline' undefined inside END action"));
1808 		$$ = mk_getline($1, $2, $3, redirect_input);
1809 	  }
1810 	| variable INCREMENT
1811 	  {
1812 		$2->opcode = Op_postincrement;
1813 		$$ = mk_assignment($1, NULL, $2);
1814 	  }
1815 	| variable DECREMENT
1816 	  {
1817 		$2->opcode = Op_postdecrement;
1818 		$$ = mk_assignment($1, NULL, $2);
1819 	  }
1820 	| '(' expression_list r_paren LEX_IN simple_variable
1821 	  {
1822 		if (do_lint_old) {
1823 		    /* first one is warning so that second one comes out if warnings are fatal */
1824 		    warning_ln($4->source_line,
1825 				_("old awk does not support the keyword `in' except after `for'"));
1826 		    lintwarn_ln($4->source_line,
1827 				_("old awk does not support multidimensional arrays"));
1828 		}
1829 		$5->nexti->opcode = Op_push_array;
1830 		$4->opcode = Op_in_array;
1831 		if ($2 == NULL) {	/* error */
1832 			errcount++;
1833 			$4->expr_count = 0;
1834 			$$ = list_merge($5, $4);
1835 		} else {
1836 			INSTRUCTION *t = $2;
1837 			$4->expr_count = count_expressions(&t, false);
1838 			$$ = list_append(list_merge(t, $5), $4);
1839 		}
1840 	  }
1841 	;
1842 
1843 /* Expressions containing "| getline" lose the ability to be on the
1844    right-hand side of a concatenation. */
1845 simp_exp_nc
1846 	: common_exp IO_IN LEX_GETLINE opt_variable
1847 		{
1848 		  $$ = mk_getline($3, $4, $1, $2->redir_type);
1849 		  bcfree($2);
1850 		}
1851 	/* Binary operators in order of decreasing precedence.  */
1852 	| simp_exp_nc '^' simp_exp
1853 	  { $$ = mk_binary($1, $3, $2); }
1854 	| simp_exp_nc '*' simp_exp
1855 	  { $$ = mk_binary($1, $3, $2); }
1856 	| simp_exp_nc '/' simp_exp
1857 	  { $$ = mk_binary($1, $3, $2); }
1858 	| simp_exp_nc '%' simp_exp
1859 	  { $$ = mk_binary($1, $3, $2); }
1860 	| simp_exp_nc '+' simp_exp
1861 	  { $$ = mk_binary($1, $3, $2); }
1862 	| simp_exp_nc '-' simp_exp
1863 	  { $$ = mk_binary($1, $3, $2); }
1864 	;
1865 
1866 non_post_simp_exp
1867 	: regexp
1868 	  {
1869 		$$ = list_create($1);
1870 	  }
1871 	| '!' simp_exp %prec UNARY
1872 	  {
1873 		if ($2->opcode == Op_match_rec) {
1874 			$2->opcode = Op_nomatch;
1875 			$1->opcode = Op_push_i;
1876 			$1->memory = set_profile_text(make_number(0.0), "0", 1);
1877 			$$ = list_append(list_append(list_create($1),
1878 						instruction(Op_field_spec)), $2);
1879 		} else {
1880 			if (do_optimize && $2->nexti == $2->lasti
1881 					&& $2->nexti->opcode == Op_push_i
1882 					&& ($2->nexti->memory->flags & (MPFN|MPZN|INTLSTR)) == 0
1883 			) {
1884 				NODE *n = $2->nexti->memory;
1885 				if ((n->flags & STRING) != 0) {
1886 					n->numbr = (AWKNUM) (n->stlen == 0);
1887 					n->flags &= ~(STRCUR|STRING);
1888 					n->flags |= (NUMCUR|NUMBER);
1889 					efree(n->stptr);
1890 					n->stptr = NULL;
1891 					n->stlen = 0;
1892 				} else
1893 					n->numbr = (AWKNUM) (n->numbr == 0.0);
1894 				bcfree($1);
1895 				$$ = $2;
1896 			} else {
1897 				$1->opcode = Op_not;
1898 				add_lint($2, LINT_assign_in_cond);
1899 				$$ = list_append($2, $1);
1900 			}
1901 		}
1902 	   }
1903 	| '(' exp r_paren
1904 	  {
1905 		// Always include. Allows us to lint warn on
1906 		// print "foo" > "bar" 1
1907 		// but not warn on
1908 		// print "foo" > ("bar" 1)
1909 		$$ = list_append($2, bcalloc(Op_parens, 1, sourceline));
1910 	  }
1911 	| LEX_BUILTIN '(' opt_fcall_expression_list r_paren
1912 	  {
1913 		$$ = snode($3, $1);
1914 		if ($$ == NULL)
1915 			YYABORT;
1916 	  }
1917 	| LEX_LENGTH '(' opt_fcall_expression_list r_paren
1918 	  {
1919 		$$ = snode($3, $1);
1920 		if ($$ == NULL)
1921 			YYABORT;
1922 	  }
1923 	| LEX_LENGTH
1924 	  {
1925 		static bool warned = false;
1926 
1927 		if (do_lint && ! warned) {
1928 			warned = true;
1929 			lintwarn_ln($1->source_line,
1930 				_("call of `length' without parentheses is not portable"));
1931 		}
1932 		$$ = snode(NULL, $1);
1933 		if ($$ == NULL)
1934 			YYABORT;
1935 	  }
1936 	| func_call
1937 	| variable
1938 	| INCREMENT variable
1939 	  {
1940 		$1->opcode = Op_preincrement;
1941 		$$ = mk_assignment($2, NULL, $1);
1942 	  }
1943 	| DECREMENT variable
1944 	  {
1945 		$1->opcode = Op_predecrement;
1946 		$$ = mk_assignment($2, NULL, $1);
1947 	  }
1948 	| YNUMBER
1949 	  {
1950 		$$ = list_create($1);
1951 	  }
1952 	| YSTRING
1953 	  {
1954 		$$ = list_create($1);
1955 	  }
1956 	| '-' simp_exp    %prec UNARY
1957 	  {
1958 		if ($2->lasti->opcode == Op_push_i
1959 			&& ($2->lasti->memory->flags & STRING) == 0
1960 		) {
1961 			NODE *n = $2->lasti->memory;
1962 			(void) force_number(n);
1963 			negate_num(n);
1964 			$$ = $2;
1965 			bcfree($1);
1966 		} else {
1967 			$1->opcode = Op_unary_minus;
1968 			$$ = list_append($2, $1);
1969 		}
1970 	  }
1971 	| '+' simp_exp    %prec UNARY
1972 	  {
1973 		if ($2->lasti->opcode == Op_push_i
1974 			&& ($2->lasti->memory->flags & STRING) == 0
1975 			&& ($2->lasti->memory->flags & NUMCONSTSTR) != 0) {
1976 			NODE *n = $2->lasti->memory;
1977 			add_sign_to_num(n, '+');
1978 			$$ = $2;
1979 			bcfree($1);
1980 		} else {
1981 			/*
1982 			 * was: $$ = $2
1983 			 * POSIX semantics: force a conversion to numeric type
1984 			 */
1985 			$1->opcode = Op_unary_plus;
1986 			$$ = list_append($2, $1);
1987 		}
1988 	  }
1989 	;
1990 
1991 func_call
1992 	: direct_func_call
1993 	  {
1994 		func_use($1->lasti->func_name, FUNC_USE);
1995 		$$ = $1;
1996 	  }
1997 	| '@' direct_func_call
1998 	  {
1999 		/* indirect function call */
2000 		INSTRUCTION *f, *t;
2001 		char *name;
2002 		NODE *indirect_var;
2003 		static bool warned = false;
2004 		const char *msg = _("indirect function calls are a gawk extension");
2005 
2006 		if (do_traditional || do_posix)
2007 			yyerror("%s", msg);
2008 		else if (do_lint_extensions && ! warned) {
2009 			warned = true;
2010 			lintwarn("%s", msg);
2011 		}
2012 
2013 		f = $2->lasti;
2014 		f->opcode = Op_indirect_func_call;
2015 		name = estrdup(f->func_name, strlen(f->func_name));
2016 		if (is_std_var(name))
2017 			yyerror(_("cannot use special variable `%s' for indirect function call"), name);
2018 		indirect_var = variable(f->source_line, name, Node_var_new);
2019 		t = instruction(Op_push);
2020 		t->memory = indirect_var;
2021 
2022 		/* prepend indirect var instead of appending to arguments (opt_expression_list),
2023 		 * and pop it off in setup_frame (eval.c) (left to right evaluation order); Test case:
2024 		 *		f = "fun"
2025 		 *		@f(f="real_fun")
2026 		 */
2027 
2028 		$$ = list_prepend($2, t);
2029 		at_seen = false;
2030 	  }
2031 	;
2032 
2033 direct_func_call
2034 	: FUNC_CALL '(' opt_fcall_expression_list r_paren
2035 	  {
2036 		NODE *n;
2037 		char *name = $1->func_name;
2038 		char *qname = qualify_name(name, strlen(name));
2039 
2040 		if (qname != name) {
2041 			efree((char *) name);
2042 			$1->func_name = qname;
2043 		}
2044 
2045 		if (! at_seen) {
2046 			n = lookup($1->func_name);
2047 			if (n != NULL && n->type != Node_func
2048 			    && n->type != Node_ext_func) {
2049 				error_ln($1->source_line,
2050 					_("attempt to use non-function `%s' in function call"),
2051 						$1->func_name);
2052 			}
2053 		}
2054 
2055 		param_sanity($3);
2056 		$1->opcode = Op_func_call;
2057 		$1->func_body = NULL;
2058 		if ($3 == NULL) {	/* no argument or error */
2059 			($1 + 1)->expr_count = 0;
2060 			$$ = list_create($1);
2061 		} else {
2062 			INSTRUCTION *t = $3;
2063 			($1 + 1)->expr_count = count_expressions(&t, true);
2064 			$$ = list_append(t, $1);
2065 		}
2066 	  }
2067 	;
2068 
2069 opt_variable
2070 	: /* empty */
2071 	  { $$ = NULL; }
2072 	| variable
2073 	  { $$ = $1; }
2074 	;
2075 
2076 delete_subscript_list
2077 	: /* empty */
2078 	  { $$ = NULL; }
2079 	| delete_subscript SUBSCRIPT
2080 	  { $$ = $1; }
2081 	;
2082 
2083 delete_subscript
2084 	: delete_exp_list
2085 	  {	$$ = $1; }
2086 	| delete_subscript delete_exp_list
2087 	  {
2088 		$$ = list_merge($1, $2);
2089 	  }
2090 	;
2091 
2092 delete_exp_list
2093 	: bracketed_exp_list
2094 	  {
2095 		INSTRUCTION *ip = $1->lasti;
2096 		int count = ip->sub_count;	/* # of SUBSEP-seperated expressions */
2097 		if (count > 1) {
2098 			/* change Op_subscript or Op_sub_array to Op_concat */
2099 			ip->opcode = Op_concat;
2100 			ip->concat_flag = CSUBSEP;
2101 			ip->expr_count = count;
2102 		} else
2103 			ip->opcode = Op_no_op;
2104 		sub_counter++;	/* count # of dimensions */
2105 		$$ = $1;
2106 	  }
2107 	;
2108 
2109 bracketed_exp_list
2110 	: '[' expression_list ']'
2111   	  {
2112 		INSTRUCTION *t = $2;
2113 		if ($2 == NULL) {
2114 			error_ln($3->source_line,
2115 				_("invalid subscript expression"));
2116 			/* install Null string as subscript. */
2117 			t = list_create(instruction(Op_push_i));
2118 			t->nexti->memory = dupnode(Nnull_string);
2119 			$3->sub_count = 1;
2120 		} else
2121 			$3->sub_count = count_expressions(&t, false);
2122 		$$ = list_append(t, $3);
2123 	  }
2124 	;
2125 
2126 subscript
2127 	: bracketed_exp_list
2128 	  {	$$ = $1; }
2129 	| subscript bracketed_exp_list
2130 	  {
2131 		$$ = list_merge($1, $2);
2132 	  }
2133 	;
2134 
2135 subscript_list
2136 	: subscript SUBSCRIPT
2137 	  { $$ = $1; }
2138 	;
2139 
2140 simple_variable
2141 	: NAME
2142 	  {
2143 		$1->opcode = Op_push;
2144 		$1->memory = variable($1->source_line, $1->lextok, Node_var_new);
2145 		$$ = list_create($1);
2146 	  }
2147 	| NAME subscript_list
2148 	  {
2149 		char *arr = $1->lextok;
2150 
2151 		$1->memory = variable($1->source_line, arr, Node_var_array);
2152 		$1->opcode = Op_push_array;
2153 		$$ = list_prepend($2, $1);
2154 	  }
2155 	;
2156 
2157 variable
2158 	: simple_variable
2159 	  {
2160 		INSTRUCTION *ip = $1->nexti;
2161 		if (ip->opcode == Op_push
2162 			&& ip->memory->type == Node_var
2163 			&& ip->memory->var_update
2164 		) {
2165 			$$ = list_prepend($1, instruction(Op_var_update));
2166 			$$->nexti->update_var = ip->memory->var_update;
2167 		} else
2168 			$$ = $1;
2169 	  }
2170 	| '$' non_post_simp_exp opt_incdec
2171 	  {
2172 		$$ = list_append($2, $1);
2173 		if ($3 != NULL)
2174 			mk_assignment($2, NULL, $3);
2175 	  }
2176 	;
2177 
2178 opt_incdec
2179 	: INCREMENT
2180 	  {
2181 		$1->opcode = Op_postincrement;
2182 	  }
2183 	| DECREMENT
2184 	  {
2185 		$1->opcode = Op_postdecrement;
2186 	  }
2187 	| /* empty */
2188 	  { $$ = NULL; }
2189 	;
2190 
2191 l_brace
2192 	: '{' opt_nls { $$ = $2; }
2193 	;
2194 
2195 r_brace
2196 	: '}' opt_nls	{ $$ = $2; yyerrok; }
2197 	;
2198 
2199 r_paren
2200 	: ')' { yyerrok; }
2201 	;
2202 
2203 opt_semi
2204 	: /* empty */
2205 	  { $$ = NULL; }
2206 	| semi
2207 	;
2208 
2209 semi
2210 	: ';'	{ yyerrok; }
2211 	;
2212 
2213 colon
2214 	: ':'	{ $$ = $1; yyerrok; }
2215 	;
2216 
2217 comma
2218 	: ',' opt_nls	{ $$ = $2; yyerrok; }
2219 	;
2220 %%
2221 
2222 struct token {
2223 	const char *operator;	/* text to match */
2224 	OPCODE value;			/*  type */
2225 	int class;				/* lexical class */
2226 	unsigned flags;			/* # of args. allowed and compatability */
2227 #	define	ARGS	0xFF	/* 0, 1, 2, 3 args allowed (any combination */
2228 #	define	A(n)	(1<<(n))
2229 #	define	VERSION_MASK	0xFF00	/* old awk is zero */
2230 #	define	NOT_OLD		0x0100	/* feature not in old awk */
2231 #	define	NOT_POSIX	0x0200	/* feature not in POSIX */
2232 #	define	GAWKX		0x0400	/* gawk extension */
2233 #	define	BREAK		0x0800	/* break allowed inside */
2234 #	define	CONTINUE	0x1000	/* continue allowed inside */
2235 #	define	DEBUG_USE	0x2000	/* for use by developers */
2236 
2237 	NODE *(*ptr)(int);	/* function that implements this keyword */
2238 	NODE *(*ptr2)(int);	/* alternate arbitrary-precision function */
2239 };
2240 
2241 #ifdef USE_EBCDIC
2242 /* tokcompare --- lexicographically compare token names for sorting */
2243 
2244 static int
tokcompare(const void * l,const void * r)2245 tokcompare(const void *l, const void *r)
2246 {
2247 	struct token *lhs, *rhs;
2248 
2249 	lhs = (struct token *) l;
2250 	rhs = (struct token *) r;
2251 
2252 	return strcmp(lhs->operator, rhs->operator);
2253 }
2254 #endif
2255 
2256 /*
2257  * Tokentab is sorted ASCII ascending order, so it can be binary searched.
2258  * See check_special(), which sorts the table on EBCDIC systems.
2259  * Function pointers come from declarations in awk.h.
2260  */
2261 
2262 #ifdef HAVE_MPFR
2263 #define MPF(F) do_mpfr_##F
2264 #else
2265 #define MPF(F) 0
2266 #endif
2267 
2268 static const struct token tokentab[] = {
2269 {"BEGIN",	Op_rule,	 LEX_BEGIN,	0,		0,	0},
2270 {"BEGINFILE",	Op_rule,	 LEX_BEGINFILE,	GAWKX,		0,	0},
2271 {"END",		Op_rule,	 LEX_END,	0,		0,	0},
2272 {"ENDFILE",	Op_rule,	 LEX_ENDFILE,	GAWKX,		0,	0},
2273 #ifdef ARRAYDEBUG
2274 {"adump",	Op_builtin,    LEX_BUILTIN,	GAWKX|A(1)|A(2)|DEBUG_USE,	do_adump,	0},
2275 #endif
2276 {"and",		Op_builtin,    LEX_BUILTIN,	GAWKX,		do_and,	MPF(and)},
2277 {"asort",	Op_builtin,	 LEX_BUILTIN,	GAWKX|A(1)|A(2)|A(3),	do_asort,	0},
2278 {"asorti",	Op_builtin,	 LEX_BUILTIN,	GAWKX|A(1)|A(2)|A(3),	do_asorti,	0},
2279 {"atan2",	Op_builtin,	 LEX_BUILTIN,	NOT_OLD|A(2),	do_atan2,	MPF(atan2)},
2280 {"bindtextdomain",	Op_builtin,	 LEX_BUILTIN,	GAWKX|A(1)|A(2),	do_bindtextdomain,	0},
2281 {"break",	Op_K_break,	 LEX_BREAK,	0,		0,	0},
2282 {"case",	Op_K_case,	 LEX_CASE,	GAWKX,		0,	0},
2283 {"close",	Op_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1)|A(2),	do_close,	0},
2284 {"compl",	Op_builtin,    LEX_BUILTIN,	GAWKX|A(1),	do_compl,	MPF(compl)},
2285 {"continue",	Op_K_continue, LEX_CONTINUE,	0,		0,	0},
2286 {"cos",		Op_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_cos,	MPF(cos)},
2287 {"dcgettext",	Op_builtin,	 LEX_BUILTIN,	GAWKX|A(1)|A(2)|A(3),	do_dcgettext,	0},
2288 {"dcngettext",	Op_builtin,	 LEX_BUILTIN,	GAWKX|A(1)|A(2)|A(3)|A(4)|A(5),	do_dcngettext,	0},
2289 {"default",	Op_K_default,	 LEX_DEFAULT,	GAWKX,		0,	0},
2290 {"delete",	Op_K_delete,	 LEX_DELETE,	NOT_OLD,	0,	0},
2291 {"do",		Op_K_do,	 LEX_DO,	NOT_OLD|BREAK|CONTINUE,	0,	0},
2292 {"else",	Op_K_else,	 LEX_ELSE,	0,		0,	0},
2293 {"eval",	Op_symbol,	 LEX_EVAL,	0,		0,	0},
2294 {"exit",	Op_K_exit,	 LEX_EXIT,	0,		0,	0},
2295 {"exp",		Op_builtin,	 LEX_BUILTIN,	A(1),		do_exp,	MPF(exp)},
2296 {"fflush",	Op_builtin,	 LEX_BUILTIN,	A(0)|A(1), do_fflush,	0},
2297 {"for",		Op_K_for,	 LEX_FOR,	BREAK|CONTINUE,	0,	0},
2298 {"func",	Op_func, 	LEX_FUNCTION,	NOT_POSIX|NOT_OLD,	0,	0},
2299 {"function",	Op_func, 	LEX_FUNCTION,	NOT_OLD,	0,	0},
2300 {"gensub",	Op_sub_builtin,	 LEX_BUILTIN,	GAWKX|A(3)|A(4), 0,	0},
2301 {"getline",	Op_K_getline_redir,	 LEX_GETLINE,	NOT_OLD,	0,	0},
2302 {"gsub",	Op_sub_builtin,	 LEX_BUILTIN,	NOT_OLD|A(2)|A(3), 0,	0},
2303 {"if",		Op_K_if,	 LEX_IF,	0,		0,	0},
2304 {"in",		Op_symbol,	 LEX_IN,	0,		0,	0},
2305 {"include",	Op_symbol,	 LEX_INCLUDE,	GAWKX,	0,	0},
2306 {"index",	Op_builtin,	 LEX_BUILTIN,	A(2),		do_index,	0},
2307 {"int",		Op_builtin,	 LEX_BUILTIN,	A(1),		do_int,	MPF(int)},
2308 #ifdef SUPPLY_INTDIV
2309 {"intdiv0",	Op_builtin,	 LEX_BUILTIN,	GAWKX|A(3),	do_intdiv,	MPF(intdiv)},
2310 #endif
2311 {"isarray",	Op_builtin,	 LEX_BUILTIN,	GAWKX|A(1),	do_isarray,	0},
2312 {"length",	Op_builtin,	 LEX_LENGTH,	A(0)|A(1),	do_length,	0},
2313 {"load",  	Op_symbol,	 LEX_LOAD,	GAWKX,		0,	0},
2314 {"log",		Op_builtin,	 LEX_BUILTIN,	A(1),		do_log,	MPF(log)},
2315 {"lshift",	Op_builtin,    LEX_BUILTIN,	GAWKX|A(2),	do_lshift,	MPF(lshift)},
2316 {"match",	Op_builtin,	 LEX_BUILTIN,	NOT_OLD|A(2)|A(3), do_match,	0},
2317 {"mktime",	Op_builtin,	 LEX_BUILTIN,	GAWKX|A(1)|A(2), do_mktime, 0},
2318 {"namespace",  	Op_symbol,	 LEX_NAMESPACE,	GAWKX,		0,	0},
2319 {"next",	Op_K_next,	 LEX_NEXT,	0,		0,	0},
2320 {"nextfile",	Op_K_nextfile, LEX_NEXTFILE,	0,		0,	0},
2321 {"or",		Op_builtin,    LEX_BUILTIN,	GAWKX,		do_or,	MPF(or)},
2322 {"patsplit",	Op_builtin,    LEX_BUILTIN,	GAWKX|A(2)|A(3)|A(4), do_patsplit,	0},
2323 {"print",	Op_K_print,	 LEX_PRINT,	0,		0,	0},
2324 {"printf",	Op_K_printf,	 LEX_PRINTF,	0,		0,	0},
2325 {"rand",	Op_builtin,	 LEX_BUILTIN,	NOT_OLD|A(0),	do_rand,	MPF(rand)},
2326 {"return",	Op_K_return,	 LEX_RETURN,	NOT_OLD,	0,	0},
2327 {"rshift",	Op_builtin,    LEX_BUILTIN,	GAWKX|A(2),	do_rshift,	MPF(rshift)},
2328 {"sin",		Op_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_sin,	MPF(sin)},
2329 {"split",	Op_builtin,	 LEX_BUILTIN,	A(2)|A(3)|A(4),	do_split,	0},
2330 {"sprintf",	Op_builtin,	 LEX_BUILTIN,	0,		do_sprintf,	0},
2331 {"sqrt",	Op_builtin,	 LEX_BUILTIN,	A(1),		do_sqrt,	MPF(sqrt)},
2332 {"srand",	Op_builtin,	 LEX_BUILTIN,	NOT_OLD|A(0)|A(1), do_srand,	MPF(srand)},
2333 #if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
2334 {"stopme",	Op_builtin,	LEX_BUILTIN,	GAWKX|A(0)|DEBUG_USE,	stopme,		0},
2335 #endif
2336 {"strftime",	Op_builtin,	 LEX_BUILTIN,	GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime,	0},
2337 {"strtonum",	Op_builtin,    LEX_BUILTIN,	GAWKX|A(1),	do_strtonum, MPF(strtonum)},
2338 {"sub",		Op_sub_builtin,	 LEX_BUILTIN,	NOT_OLD|A(2)|A(3), 0,	0},
2339 {"substr",	Op_builtin,	 LEX_BUILTIN,	A(2)|A(3),	do_substr,	0},
2340 {"switch",	Op_K_switch,	 LEX_SWITCH,	GAWKX|BREAK,	0,	0},
2341 {"system",	Op_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_system,	0},
2342 {"systime",	Op_builtin,	 LEX_BUILTIN,	GAWKX|A(0),	do_systime,	0},
2343 {"tolower",	Op_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_tolower,	0},
2344 {"toupper",	Op_builtin,	 LEX_BUILTIN,	NOT_OLD|A(1),	do_toupper,	0},
2345 {"typeof",	Op_builtin,	 LEX_BUILTIN,	GAWKX|A(1)|A(2), do_typeof,	0},
2346 {"while",	Op_K_while,	 LEX_WHILE,	BREAK|CONTINUE,	0,	0},
2347 {"xor",		Op_builtin,    LEX_BUILTIN,	GAWKX,		do_xor,	MPF(xor)},
2348 };
2349 
2350 /* Variable containing the current shift state.  */
2351 static mbstate_t cur_mbstate;
2352 /* Ring buffer containing current characters.  */
2353 #define MAX_CHAR_IN_RING_BUFFER 8
2354 #define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
2355 static char cur_char_ring[RING_BUFFER_SIZE];
2356 /* Index for ring buffers.  */
2357 static int cur_ring_idx;
2358 /* This macro means that last nextc() return a singlebyte character
2359    or 1st byte of a multibyte character.  */
2360 #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
2361 
2362 /* getfname --- return name of a builtin function (for pretty printing) */
2363 
2364 const char *
getfname(NODE * (* fptr)(int),bool prepend_awk)2365 getfname(NODE *(*fptr)(int), bool prepend_awk)
2366 {
2367 	int i, j;
2368 	static char buf[100];
2369 
2370 	j = sizeof(tokentab) / sizeof(tokentab[0]);
2371 	/* linear search, no other way to do it */
2372 	for (i = 0; i < j; i++) {
2373 		if (tokentab[i].ptr == fptr || tokentab[i].ptr2 == fptr) {
2374 			if (prepend_awk && (tokentab[i].flags & GAWKX) != 0) {
2375 				sprintf(buf, "awk::%s", tokentab[i].operator);
2376 				return buf;
2377 			}
2378 			return tokentab[i].operator;
2379 		}
2380 	}
2381 
2382 	return NULL;
2383 }
2384 
2385 /* negate_num --- negate a number in NODE */
2386 
2387 void
negate_num(NODE * n)2388 negate_num(NODE *n)
2389 {
2390 #ifdef HAVE_MPFR
2391 	int tval = 0;
2392 #endif
2393 
2394 	add_sign_to_num(n, '-');
2395 
2396 	if (! is_mpg_number(n)) {
2397 		n->numbr = -n->numbr;
2398 		return;
2399 	}
2400 
2401 #ifdef HAVE_MPFR
2402 	if (is_mpg_integer(n)) {
2403 		if (! is_zero(n)) {
2404 			mpz_neg(n->mpg_i, n->mpg_i);
2405 			return;
2406 		}
2407 
2408 		/*
2409 		 * 0 --> -0 conversion. Requires turning the MPG integer
2410 		 * into an MPFR float.
2411 		 */
2412 
2413 		mpz_clear(n->mpg_i);	/* release the integer storage */
2414 
2415 		/* Convert and fall through. */
2416 		tval = mpfr_set_d(n->mpg_numbr, 0.0, ROUND_MODE);
2417 		IEEE_FMT(n->mpg_numbr, tval);
2418 		n->flags &= ~MPZN;
2419 		n->flags |= MPFN;
2420 	}
2421 
2422 	/* mpfr float case */
2423 	tval = mpfr_neg(n->mpg_numbr, n->mpg_numbr, ROUND_MODE);
2424 	IEEE_FMT(n->mpg_numbr, tval);
2425 #endif
2426 }
2427 
2428 /* add_sign_to_num --- make a constant unary plus or minus for profiling */
2429 
2430 static void
add_sign_to_num(NODE * n,char sign)2431 add_sign_to_num(NODE *n, char sign)
2432 {
2433 	if ((n->flags & NUMCONSTSTR) != 0) {
2434 		char *s;
2435 
2436 		s = n->stptr;
2437 		memmove(& s[1], & s[0], n->stlen + 1);
2438 		s[0] = sign;
2439 		n->stlen++;
2440 	}
2441 }
2442 
2443 /* print_included_from --- print `Included from ..' file names and locations */
2444 
2445 static void
print_included_from()2446 print_included_from()
2447 {
2448 	int saveline, line;
2449 	SRCFILE *s;
2450 
2451 	/* suppress current file name, line # from `.. included from ..' msgs */
2452 	saveline = sourceline;
2453 	sourceline = 0;
2454 
2455 	for (s = sourcefile; s != NULL && s->stype == SRC_INC; ) {
2456 		s = s->next;
2457 		if (s == NULL || s->fd <= INVALID_HANDLE)
2458 			continue;
2459 		line = s->srclines;
2460 
2461 		/* if last token is NEWLINE, line number is off by 1. */
2462 		if (s->lasttok == NEWLINE)
2463 			line--;
2464 		msg("%s %s:%d%c",
2465 			s->prev == sourcefile ? "In file included from"
2466 					  : "                 from",
2467 			(s->stype == SRC_INC ||
2468 				 s->stype == SRC_FILE) ? s->src : "cmd. line",
2469 			line,
2470 			s->stype == SRC_INC ? ',' : ':'
2471 		);
2472 	}
2473 	sourceline = saveline;
2474 }
2475 
2476 /* warning_ln --- print a warning message with location */
2477 
2478 static void
warning_ln(int line,const char * mesg,...)2479 warning_ln(int line, const char *mesg, ...)
2480 {
2481 	va_list args;
2482 	int saveline;
2483 
2484 	saveline = sourceline;
2485 	sourceline = line;
2486 	print_included_from();
2487 	va_start(args, mesg);
2488 	err(false, _("warning: "), mesg, args);
2489 	va_end(args);
2490 	sourceline = saveline;
2491 }
2492 
2493 /* lintwarn_ln --- print a lint warning and location */
2494 
2495 static void
lintwarn_ln(int line,const char * mesg,...)2496 lintwarn_ln(int line, const char *mesg, ...)
2497 {
2498 	va_list args;
2499 	int saveline;
2500 
2501 	saveline = sourceline;
2502 	sourceline = line;
2503 	print_included_from();
2504 	va_start(args, mesg);
2505 	if (lintfunc == r_fatal)
2506 		err(true, _("fatal: "), mesg, args);
2507 	else
2508 		err(false, _("warning: "), mesg, args);
2509 	va_end(args);
2510 	sourceline = saveline;
2511 	if (lintfunc == r_fatal)
2512 		gawk_exit(EXIT_FATAL);
2513 }
2514 
2515 /* error_ln --- print an error message and location */
2516 
2517 static void
error_ln(int line,const char * m,...)2518 error_ln(int line, const char *m, ...)
2519 {
2520 	va_list args;
2521 	int saveline;
2522 
2523 	saveline = sourceline;
2524 	sourceline = line;
2525 	print_included_from();
2526 	errcount++;
2527 	va_start(args, m);
2528 	err(false, "error: ", m, args);
2529 	va_end(args);
2530 	sourceline = saveline;
2531 }
2532 
2533 /* yyerror --- print a syntax error message, show where */
2534 
2535 static void
yyerror(const char * m,...)2536 yyerror(const char *m, ...)
2537 {
2538 	va_list args;
2539 	const char *mesg = NULL;
2540 	char *bp, *cp;
2541 	char *scan;
2542 	char *buf;
2543 	int count;
2544 	static char end_of_file_line[] = "(END OF FILE)";
2545 	static char syntax_error[] = "syntax error";
2546 	static size_t syn_err_len = sizeof(syntax_error) - 1;
2547 	bool generic_error = (strncmp(m, syntax_error, syn_err_len) == 0);
2548 
2549 	print_included_from();
2550 
2551 	errcount++;
2552 	/* Find the current line in the input file */
2553 	if (lexptr && lexeme) {
2554 		if (thisline == NULL) {
2555 			cp = lexeme;
2556 			if (*cp == '\n') {
2557 				if (cp > lexptr_begin)
2558 					cp--;
2559 				mesg = _("unexpected newline or end of string");
2560 			}
2561 			for (; cp != lexptr_begin && *cp != '\n'; --cp)
2562 				continue;
2563 			if (*cp == '\n')
2564 				cp++;
2565 			thisline = cp;
2566 		}
2567 		/* NL isn't guaranteed */
2568 		bp = lexeme;
2569 		if (bp < thisline)
2570 			bp = thisline + 1;
2571 		while (bp < lexend && *bp && *bp != '\n')
2572 			bp++;
2573 	} else {
2574 		thisline = end_of_file_line;
2575 		bp = thisline + strlen(thisline);
2576 	}
2577 
2578 	if (lexeof && mesg == NULL && generic_error) {
2579 		msg("%s", end_of_file_line);
2580 		mesg = _("source files / command-line arguments must contain complete functions or rules");
2581 	} else
2582 		msg("%.*s", (int) (bp - thisline), thisline);
2583 
2584 	va_start(args, m);
2585 	if (mesg == NULL)
2586 		mesg = m;
2587 
2588 	count = strlen(mesg) + 1;
2589 	if (lexptr != NULL)
2590 		count += (lexeme - thisline) + 2;
2591 	ezalloc(buf, char *, count+1, "yyerror");
2592 
2593 	bp = buf;
2594 
2595 	if (lexptr != NULL) {
2596 		scan = thisline;
2597 		while (scan < lexeme)
2598 			if (*scan++ == '\t')
2599 				*bp++ = '\t';
2600 			else
2601 				*bp++ = ' ';
2602 		*bp++ = '^';
2603 		*bp++ = ' ';
2604 	}
2605 	strcpy(bp, mesg);
2606 	err(false, "", buf, args);
2607 	va_end(args);
2608 	efree(buf);
2609 }
2610 
2611 /* mk_program --- create a single list of instructions */
2612 
2613 static INSTRUCTION *
mk_program()2614 mk_program()
2615 {
2616 	INSTRUCTION *cp, *tmp;
2617 
2618 #define begin_block         rule_block[BEGIN]
2619 #define end_block           rule_block[END]
2620 #define prog_block          rule_block[Rule]
2621 #define beginfile_block     rule_block[BEGINFILE]
2622 #define endfile_block       rule_block[ENDFILE]
2623 
2624 	if (end_block == NULL)
2625 		end_block = list_create(ip_end);
2626 	else
2627 		(void) list_prepend(end_block, ip_end);
2628 
2629 	if (! in_main_context()) {
2630 		if (begin_block != NULL && prog_block != NULL)
2631 			cp = list_merge(begin_block, prog_block);
2632 		else
2633 			cp = (begin_block != NULL) ? begin_block : prog_block;
2634 
2635 		if (cp != NULL)
2636 			(void) list_merge(cp, end_block);
2637 		else
2638 			cp = end_block;
2639 
2640 		(void) list_append(cp, instruction(Op_stop));
2641 		goto out;
2642 	}
2643 
2644 	if (endfile_block == NULL)
2645 		endfile_block = list_create(ip_endfile);
2646 	else {
2647 		ip_rec->has_endfile = true;
2648 		(void) list_prepend(endfile_block, ip_endfile);
2649 	}
2650 
2651 	if (beginfile_block == NULL)
2652 		beginfile_block = list_create(ip_beginfile);
2653 	else
2654 		(void) list_prepend(beginfile_block, ip_beginfile);
2655 
2656 	if (prog_block == NULL) {
2657 		if (end_block->nexti == end_block->lasti
2658 				&& beginfile_block->nexti == beginfile_block->lasti
2659 				&& endfile_block->nexti == endfile_block->lasti
2660 		) {
2661 			/* no pattern-action and (real) end, beginfile or endfile blocks */
2662 			bcfree(ip_rec);
2663 			bcfree(ip_newfile);
2664 			ip_rec = ip_newfile = NULL;
2665 
2666 			list_append(beginfile_block, instruction(Op_after_beginfile));
2667 			(void) list_append(endfile_block, instruction(Op_after_endfile));
2668 
2669 			if (begin_block == NULL)     /* no program at all */
2670 				cp = end_block;
2671 			else
2672 				cp = list_merge(begin_block, end_block);
2673 
2674 			if (interblock_comment != NULL) {
2675 				(void) list_append(cp, interblock_comment);
2676 				interblock_comment = NULL;
2677 			}
2678 
2679 			(void) list_append(cp, ip_atexit);
2680 			(void) list_append(cp, instruction(Op_stop));
2681 
2682 			/* append beginfile_block and endfile_block for sole use
2683 			 * in getline without redirection (Op_K_getline).
2684 			 */
2685 
2686 			(void) list_merge(cp, beginfile_block);
2687 			(void) list_merge(cp, endfile_block);
2688 
2689 			if (outer_comment != NULL) {
2690 				cp = list_merge(list_create(outer_comment), cp);
2691 				outer_comment = NULL;
2692 			}
2693 
2694 			if (interblock_comment != NULL) {
2695 				(void) list_append(cp, interblock_comment);
2696 				interblock_comment = NULL;
2697 			}
2698 
2699 			goto out;
2700 
2701 		} else {
2702 			/* install a do-nothing prog block */
2703 			prog_block = list_create(instruction(Op_no_op));
2704 		}
2705 	}
2706 
2707 	(void) list_append(endfile_block, instruction(Op_after_endfile));
2708 	(void) list_prepend(prog_block, ip_rec);
2709 	(void) list_append(prog_block, instruction(Op_jmp));
2710 	prog_block->lasti->target_jmp = ip_rec;
2711 
2712 	list_append(beginfile_block, instruction(Op_after_beginfile));
2713 
2714 	cp = list_merge(beginfile_block, prog_block);
2715 	(void) list_prepend(cp, ip_newfile);
2716 	(void) list_merge(cp, endfile_block);
2717 	(void) list_merge(cp, end_block);
2718 	if (begin_block != NULL)
2719 		cp = list_merge(begin_block, cp);
2720 
2721 	if (outer_comment != NULL) {
2722 		cp = list_merge(list_create(outer_comment), cp);
2723 		outer_comment = NULL;
2724 	}
2725 
2726 	if (interblock_comment != NULL) {
2727 		(void) list_append(cp, interblock_comment);
2728 		interblock_comment = NULL;
2729 	}
2730 
2731 	(void) list_append(cp, ip_atexit);
2732 	(void) list_append(cp, instruction(Op_stop));
2733 
2734 out:
2735 	/* delete the Op_list, not needed */
2736 	tmp = cp->nexti;
2737 	bcfree(cp);
2738 	return tmp;
2739 
2740 #undef begin_block
2741 #undef end_block
2742 #undef prog_block
2743 #undef beginfile_block
2744 #undef endfile_block
2745 }
2746 
2747 /* parse_program --- read in the program and convert into a list of instructions */
2748 
2749 int
parse_program(INSTRUCTION ** pcode,bool from_eval)2750 parse_program(INSTRUCTION **pcode, bool from_eval)
2751 {
2752 	int ret;
2753 
2754 	called_from_eval = from_eval;
2755 
2756 	/* pre-create non-local jump targets
2757 	 * ip_end (Op_no_op) -- used as jump target for `exit'
2758 	 * outside an END block.
2759 	 */
2760 	ip_end = instruction(Op_no_op);
2761 
2762 	if (! in_main_context())
2763 		ip_newfile = ip_rec = ip_atexit = ip_beginfile = ip_endfile = NULL;
2764 	else {
2765 		ip_endfile = instruction(Op_no_op);
2766 		main_beginfile = ip_beginfile = instruction(Op_no_op);
2767 		ip_rec = instruction(Op_get_record); /* target for `next', also ip_newfile */
2768 		ip_newfile = bcalloc(Op_newfile, 2, 0); /* target for `nextfile' */
2769 		ip_newfile->target_jmp = ip_end;
2770 		ip_newfile->target_endfile = ip_endfile;
2771 		(ip_newfile + 1)->target_get_record = ip_rec;
2772 		ip_rec->target_newfile = ip_newfile;
2773 		ip_atexit = instruction(Op_atexit);	/* target for `exit' in END block */
2774 	}
2775 
2776 	for (sourcefile = srcfiles->next; sourcefile->stype == SRC_EXTLIB;
2777 			sourcefile = sourcefile->next)
2778 		;
2779 
2780 	lexeof = false;
2781 	lexptr = NULL;
2782 	lasttok = 0;
2783 	memset(rule_block, 0, sizeof(rule_block));
2784 	errcount = 0;
2785 	tok = tokstart != NULL ? tokstart : tokexpand();
2786 
2787 	ret = yyparse();
2788 	*pcode = mk_program();
2789 
2790 	/* avoid false source indications */
2791 	source = NULL;
2792 	sourceline = 0;
2793 	if (ret == 0)	/* avoid spurious warning if parser aborted with YYABORT */
2794 		check_funcs();
2795 
2796 	if (do_posix && ! check_param_names())
2797 		errcount++;
2798 
2799 	if (args_array == NULL)
2800 		emalloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program");
2801 	else
2802 		erealloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program");
2803 
2804 	return (ret || errcount);
2805 }
2806 
2807 /* free_srcfile --- free a SRCFILE struct */
2808 
2809 void
free_srcfile(SRCFILE * thisfile)2810 free_srcfile(SRCFILE *thisfile)
2811 {
2812 	efree(thisfile->src);
2813 	efree(thisfile);
2814 }
2815 
2816 /* do_add_srcfile --- add one item to srcfiles */
2817 
2818 static SRCFILE *
do_add_srcfile(enum srctype stype,char * src,char * path,SRCFILE * thisfile)2819 do_add_srcfile(enum srctype stype, char *src, char *path, SRCFILE *thisfile)
2820 {
2821 	SRCFILE *s;
2822 
2823 	ezalloc(s, SRCFILE *, sizeof(SRCFILE), "do_add_srcfile");
2824 	s->src = estrdup(src, strlen(src));
2825 	s->fullpath = path;
2826 	s->stype = stype;
2827 	s->fd = INVALID_HANDLE;
2828 	s->next = thisfile;
2829 	s->prev = thisfile->prev;
2830 	thisfile->prev->next = s;
2831 	thisfile->prev = s;
2832 	return s;
2833 }
2834 
2835 /* add_srcfile --- add one item to srcfiles after checking if
2836  *				a source file exists and not already in list.
2837  */
2838 
2839 SRCFILE *
add_srcfile(enum srctype stype,char * src,SRCFILE * thisfile,bool * already_included,int * errcode)2840 add_srcfile(enum srctype stype, char *src, SRCFILE *thisfile, bool *already_included, int *errcode)
2841 {
2842 	SRCFILE *s;
2843 	struct stat sbuf;
2844 	char *path;
2845 	int errno_val = 0;
2846 
2847 	if (already_included)
2848 		*already_included = false;
2849 	if (errcode)
2850 		*errcode = 0;
2851 	if (stype == SRC_CMDLINE || stype == SRC_STDIN)
2852 		return do_add_srcfile(stype, src, NULL, thisfile);
2853 
2854 	path = find_source(src, & sbuf, & errno_val, stype == SRC_EXTLIB);
2855 	if (path == NULL) {
2856 		if (errcode) {
2857 			*errcode = errno_val;
2858 			return NULL;
2859 		}
2860 		/* use full messages to ease translation */
2861 		fatal(stype != SRC_EXTLIB
2862 			? _("cannot open source file `%s' for reading: %s")
2863 			: _("cannot open shared library `%s' for reading: %s"),
2864 				src,
2865 				errno_val ? strerror(errno_val) : _("reason unknown"));
2866 	}
2867 
2868 	/* N.B. We do not eliminate duplicate SRC_FILE (-f) programs. */
2869 	for (s = srcfiles->next; s != srcfiles; s = s->next) {
2870 		if ((s->stype == SRC_FILE || s->stype == SRC_INC || s->stype == SRC_EXTLIB) && files_are_same(path, s)) {
2871 			if (stype == SRC_INC || stype == SRC_EXTLIB) {
2872 				/* eliminate duplicates */
2873 				if ((stype == SRC_INC) && (s->stype == SRC_FILE))
2874 					fatal(_("cannot include `%s' and use it as a program file"), src);
2875 
2876 				if (do_lint) {
2877 					int line = sourceline;
2878 					/* Kludge: the line number may be off for `@include file'.
2879 					 * Since, this function is also used for '-f file' in main.c,
2880 					 * sourceline > 1 check ensures that the call is at
2881 					 * parse time.
2882 					 */
2883 					if (sourceline > 1 && lasttok == NEWLINE)
2884 						line--;
2885 					lintwarn_ln(line,
2886 						    stype != SRC_EXTLIB
2887 						      ? _("already included source file `%s'")
2888 						      : _("already loaded shared library `%s'"),
2889 						    src);
2890 				}
2891 				efree(path);
2892 				if (already_included)
2893 					*already_included = true;
2894 				return NULL;
2895 			} else {
2896 				/* duplicates are allowed for -f */
2897 				if (s->stype == SRC_INC)
2898 					fatal(_("cannot include `%s' and use it as a program file"), src);
2899 				/* no need to scan for further matches, since
2900 				 * they must be of homogeneous type */
2901 				break;
2902 			}
2903 		}
2904 	}
2905 
2906 	s = do_add_srcfile(stype, src, path, thisfile);
2907 	s->sbuf = sbuf;
2908 	s->mtime = sbuf.st_mtime;
2909 	return s;
2910 }
2911 
2912 /* include_source --- read program from source included using `@include' */
2913 
2914 static bool
include_source(INSTRUCTION * file,void ** srcfile_p)2915 include_source(INSTRUCTION *file, void **srcfile_p)
2916 {
2917 	SRCFILE *s;
2918 	char *src = file->lextok;
2919 	int errcode;
2920 	bool already_included;
2921 
2922 	*srcfile_p = NULL;
2923 
2924 	if (do_traditional || do_posix) {
2925 		error_ln(file->source_line, _("@include is a gawk extension"));
2926 		return false;
2927 	}
2928 
2929 	if (strlen(src) == 0) {
2930 		if (do_lint)
2931 			lintwarn_ln(file->source_line, _("empty filename after @include"));
2932 		return true;
2933 	}
2934 
2935 	s = add_srcfile(SRC_INC, src, sourcefile, &already_included, &errcode);
2936 	if (s == NULL) {
2937 		if (already_included)
2938 			return true;
2939 		error_ln(file->source_line,
2940 			_("cannot open source file `%s' for reading: %s"),
2941 			src, errcode ? strerror(errcode) : _("reason unknown"));
2942 		return false;
2943 	}
2944 
2945 	/* save scanner state for the current sourcefile */
2946 	sourcefile->srclines = sourceline;
2947 	sourcefile->lexptr = lexptr;
2948 	sourcefile->lexend = lexend;
2949 	sourcefile->lexptr_begin = lexptr_begin;
2950 	sourcefile->lexeme = lexeme;
2951 	sourcefile->lasttok = lasttok;
2952 	sourcefile->namespace = current_namespace;
2953 
2954 	/* included file becomes the current source */
2955 	sourcefile = s;
2956 	lexptr = NULL;
2957 	sourceline = 0;
2958 	source = NULL;
2959 	lasttok = 0;
2960 	lexeof = false;
2961 	eof_warned = false;
2962 	current_namespace = awk_namespace;
2963 	*srcfile_p = (void *) s;
2964 	return true;
2965 }
2966 
2967 /* load_library --- load a shared library */
2968 
2969 static bool
load_library(INSTRUCTION * file,void ** srcfile_p)2970 load_library(INSTRUCTION *file, void **srcfile_p)
2971 {
2972 	SRCFILE *s;
2973 	char *src = file->lextok;
2974 	int errcode;
2975 	bool already_included;
2976 
2977 	*srcfile_p = NULL;
2978 
2979 	if (do_traditional || do_posix) {
2980 		error_ln(file->source_line, _("@load is a gawk extension"));
2981 		return false;
2982 	}
2983 
2984 
2985 	if (strlen(src) == 0) {
2986 		if (do_lint)
2987 			lintwarn_ln(file->source_line, _("empty filename after @load"));
2988 		return true;
2989 	}
2990 
2991 	if (do_pretty_print && ! do_profile) {
2992 		// create a fake one, don't try to open the file
2993 		s = do_add_srcfile(SRC_EXTLIB, src, src, sourcefile);
2994 	} else {
2995 		s = add_srcfile(SRC_EXTLIB, src, sourcefile, &already_included, &errcode);
2996 		if (s == NULL) {
2997 			if (already_included)
2998 				return true;
2999 			error_ln(file->source_line,
3000 				_("cannot open shared library `%s' for reading: %s"),
3001 				src, errcode ? strerror(errcode) : _("reason unknown"));
3002 			return false;
3003 		}
3004 
3005 		load_ext(s->fullpath);
3006 	}
3007 
3008 	*srcfile_p = (void *) s;
3009 	return true;
3010 }
3011 
3012 /* next_sourcefile --- read program from the next source in srcfiles */
3013 
3014 static void
next_sourcefile()3015 next_sourcefile()
3016 {
3017 	static int (*closefunc)(int fd) = NULL;
3018 
3019 	if (closefunc == NULL) {
3020 		char *cp = getenv("AWKREADFUNC");
3021 
3022 		/* If necessary, one day, test value for different functions.  */
3023 		if (cp == NULL)
3024 			closefunc = close;
3025 		else
3026 			closefunc = one_line_close;
3027 	}
3028 
3029 	/*
3030 	 * This won't be true if there's an invalid character in
3031 	 * the source file or source string (e.g., user typo).
3032 	 * Previous versions of gawk did not core dump in such a
3033 	 * case.
3034 	 *
3035 	 * assert(lexeof == true);
3036 	 */
3037 
3038 	lexeof = false;
3039 	eof_warned = false;
3040 	sourcefile->srclines = sourceline;	/* total no of lines in current file */
3041 	if (sourcefile->fd > INVALID_HANDLE) {
3042 		if (sourcefile->fd != fileno(stdin))  /* safety */
3043 			(*closefunc)(sourcefile->fd);
3044 		sourcefile->fd = INVALID_HANDLE;
3045 	}
3046 	if (sourcefile->buf != NULL) {
3047 		efree(sourcefile->buf);
3048 		sourcefile->buf = NULL;
3049 		sourcefile->lexptr_begin = NULL;
3050 	}
3051 
3052 	while ((sourcefile = sourcefile->next) != NULL) {
3053 		if (sourcefile == srcfiles)
3054 			return;
3055 		if (sourcefile->stype != SRC_EXTLIB)
3056 			break;
3057 	}
3058 
3059 	if (sourcefile->lexptr_begin != NULL) {
3060 		/* resume reading from already opened file (postponed to process '@include') */
3061 		lexptr = sourcefile->lexptr;
3062 		lexend = sourcefile->lexend;
3063 		lasttok = sourcefile->lasttok;
3064 		lexptr_begin = sourcefile->lexptr_begin;
3065 		lexeme = sourcefile->lexeme;
3066 		sourceline = sourcefile->srclines;
3067 		source = sourcefile->src;
3068 		set_current_namespace(sourcefile->namespace);
3069 	} else {
3070 		lexptr = NULL;
3071 		sourceline = 0;
3072 		source = NULL;
3073 		lasttok = 0;
3074 		set_current_namespace(awk_namespace);
3075 	}
3076 }
3077 
3078 /* get_src_buf --- read the next buffer of source program */
3079 
3080 static char *
get_src_buf()3081 get_src_buf()
3082 {
3083 	int n;
3084 	char *scan;
3085 	bool newfile;
3086 	int savelen;
3087 	struct stat sbuf;
3088 
3089 	/*
3090 	 * No argument prototype on readfunc on purpose,
3091 	 * avoids problems with some ancient systems where
3092 	 * the types of arguments to read() aren't up to date.
3093 	 */
3094 	static ssize_t (*readfunc)(int, void *, size_t) = NULL;
3095 
3096 	if (readfunc == NULL) {
3097 		char *cp = getenv("AWKREADFUNC");
3098 
3099 		/* If necessary, one day, test value for different functions.  */
3100 		if (cp == NULL)
3101 			/*
3102 			 * cast is to remove warnings on systems with
3103 			 * different return types for read.
3104 			 */
3105 			readfunc = ( ssize_t(*)(int, void *, size_t) ) read;
3106 		else
3107 			readfunc = read_one_line;
3108 	}
3109 
3110 	newfile = false;
3111 	if (sourcefile == srcfiles)
3112 		return NULL;
3113 
3114 	if (sourcefile->stype == SRC_CMDLINE) {
3115 		if (sourcefile->bufsize == 0) {
3116 			sourcefile->bufsize = strlen(sourcefile->src);
3117 			lexptr = lexptr_begin = lexeme = sourcefile->src;
3118 			lexend = lexptr + sourcefile->bufsize;
3119 			sourceline = 1;
3120 			if (sourcefile->bufsize == 0) {
3121 				/*
3122 				 * Yet Another Special case:
3123 				 *	gawk '' /path/name
3124 				 * Sigh.
3125 				 */
3126 				static bool warned = false;
3127 
3128 				if (do_lint && ! warned) {
3129 					warned = true;
3130 					lintwarn(_("empty program text on command line"));
3131 				}
3132 				lexeof = true;
3133 			}
3134 		} else if (sourcefile->buf == NULL  && *(lexptr-1) != '\n') {
3135 			/*
3136 			 * The following goop is to ensure that the source
3137 			 * ends with a newline and that the entire current
3138 			 * line is available for error messages.
3139 			 */
3140 			int offset;
3141 			char *buf;
3142 
3143 			offset = lexptr - lexeme;
3144 			for (scan = lexeme; scan > lexptr_begin; scan--)
3145 				if (*scan == '\n') {
3146 					scan++;
3147 					break;
3148 				}
3149 			savelen = lexptr - scan;
3150 			emalloc(buf, char *, savelen + 1, "get_src_buf");
3151 			memcpy(buf, scan, savelen);
3152 			thisline = buf;
3153 			lexptr = buf + savelen;
3154 			*lexptr = '\n';
3155 			lexeme = lexptr - offset;
3156 			lexptr_begin = buf;
3157 			lexend = lexptr + 1;
3158 			sourcefile->buf = buf;
3159 		} else
3160 			lexeof = true;
3161 		return lexptr;
3162 	}
3163 
3164 	if (sourcefile->fd <= INVALID_HANDLE) {
3165 		int fd;
3166 		int l;
3167 
3168 		source = sourcefile->src;
3169 		if (source == NULL)
3170 			return NULL;
3171 		fd = srcopen(sourcefile);
3172 		if (fd <= INVALID_HANDLE) {
3173 			char *in;
3174 
3175 			/* suppress file name and line no. in error mesg */
3176 			in = source;
3177 			source = NULL;
3178 			error(_("cannot open source file `%s' for reading: %s"),
3179 				in, strerror(errno));
3180 			errcount++;
3181 			lexeof = true;
3182 			return sourcefile->src;
3183 		}
3184 
3185 		sourcefile->fd = fd;
3186 		l = optimal_bufsize(fd, &sbuf);
3187 		/*
3188 		 * Make sure that something silly like
3189 		 * 	AWKBUFSIZE=8 make check
3190 		 * works ok.
3191 		 */
3192 #define A_DECENT_BUFFER_SIZE	128
3193 		if (l < A_DECENT_BUFFER_SIZE)
3194 			l = A_DECENT_BUFFER_SIZE;
3195 #undef A_DECENT_BUFFER_SIZE
3196 		sourcefile->bufsize = l;
3197 		newfile = true;
3198 		emalloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
3199 		memset(sourcefile->buf, '\0', sourcefile->bufsize);	// keep valgrind happy
3200 		lexptr = lexptr_begin = lexeme = sourcefile->buf;
3201 		savelen = 0;
3202 		sourceline = 1;
3203 		thisline = NULL;
3204 	} else {
3205 		/*
3206 		 * Here, we retain the current source line in the beginning of the buffer.
3207 		 */
3208 		int offset;
3209 		for (scan = lexeme; scan > lexptr_begin; scan--)
3210 			if (*scan == '\n') {
3211 				scan++;
3212 				break;
3213 			}
3214 
3215 		savelen = lexptr - scan;
3216 		offset = lexptr - lexeme;
3217 
3218 		if (savelen > 0) {
3219 			/*
3220 			 * Need to make sure we have room left for reading new text;
3221 			 * grow the buffer (by doubling, an arbitrary choice), if the retained line
3222 			 * takes up more than a certain percentage (50%, again an arbitrary figure)
3223 			 * of the available space.
3224 			 */
3225 
3226 			if (savelen > sourcefile->bufsize / 2) { /* long line or token  */
3227 				sourcefile->bufsize *= 2;
3228 				erealloc(sourcefile->buf, char *, sourcefile->bufsize, "get_src_buf");
3229 				scan = sourcefile->buf + (scan - lexptr_begin);
3230 				lexptr_begin = sourcefile->buf;
3231 			}
3232 
3233 			thisline = lexptr_begin;
3234 			memmove(thisline, scan, savelen);
3235 			lexptr = thisline + savelen;
3236 			lexeme = lexptr - offset;
3237 		} else {
3238 			savelen = 0;
3239 			lexptr = lexeme = lexptr_begin;
3240 			thisline = NULL;
3241 		}
3242 	}
3243 
3244 	n = (*readfunc)(sourcefile->fd, lexptr, sourcefile->bufsize - savelen);
3245 	if (n == -1) {
3246 		error(_("cannot read source file `%s': %s"),
3247 				source, strerror(errno));
3248 		errcount++;
3249 		lexeof = true;
3250 	} else {
3251 		lexend = lexptr + n;
3252 		if (n == 0) {
3253 			static bool warned = false;
3254 			if (do_lint && newfile && ! warned) {
3255 				warned = true;
3256 				sourceline = 0;
3257 				lintwarn(_("source file `%s' is empty"), source);
3258 			}
3259 			lexeof = true;
3260 		}
3261 	}
3262 	return sourcefile->buf;
3263 }
3264 
3265 /* tokadd --- add a character to the token buffer */
3266 
3267 #define	tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
3268 
3269 /* tokexpand --- grow the token buffer */
3270 
3271 static char *
tokexpand()3272 tokexpand()
3273 {
3274 	static size_t toksize;
3275 	int tokoffset;
3276 
3277 	if (tokstart != NULL) {
3278 		tokoffset = tok - tokstart;
3279 		toksize *= 2;
3280 		erealloc(tokstart, char *, toksize, "tokexpand");
3281 		tok = tokstart + tokoffset;
3282 	} else {
3283 		toksize = 60;
3284 		emalloc(tokstart, char *, toksize, "tokexpand");
3285 		tok = tokstart;
3286 	}
3287 	tokend = tokstart + toksize;
3288 	return tok;
3289 }
3290 
3291 /* check_bad_char --- fatal if c isn't allowed in gawk source code */
3292 
3293 /*
3294  * The error message was inspired by someone who decided to put
3295  * a physical \0 byte into the source code to see what would
3296  * happen and then filed a bug report about it.  Sigh.
3297  */
3298 
3299 static void
check_bad_char(int c)3300 check_bad_char(int c)
3301 {
3302 	/* allow escapes. needed for autoconf. bleah. */
3303 	switch (c) {
3304 	case '\a':
3305 	case '\b':
3306 	case '\f':
3307 	case '\n':
3308 	case '\r':
3309 	case '\t':
3310 		return;
3311 	default:
3312 		break;
3313 	}
3314 
3315 	if (iscntrl(c) && ! isspace(c))
3316 		// This is a PEBKAC error, but we'll be nice and not say so.
3317 		fatal(_("error: invalid character '\\%03o' in source code"), c & 0xFF);
3318 }
3319 
3320 /* nextc --- get the next input character */
3321 
3322 // For namespaces, -e chunks must be syntactic units.
3323 #define NO_CONTINUE_SOURCE_STRINGS	1
3324 
3325 static int
nextc(bool check_for_bad)3326 nextc(bool check_for_bad)
3327 {
3328 	if (gawk_mb_cur_max > 1) {
3329 again:
3330 #ifdef NO_CONTINUE_SOURCE_STRINGS
3331 		if (lexeof)
3332 			return END_FILE;
3333 #else
3334 		if (lexeof) {
3335 			if (sourcefile->next == srcfiles)
3336 				return END_FILE;
3337 			else
3338 				next_sourcefile();
3339 		}
3340 #endif
3341 		if (lexptr == NULL || lexptr >= lexend) {
3342 			if (get_src_buf())
3343 				goto again;
3344 			return END_SRC;
3345 		}
3346 
3347 		/* Update the buffer index.  */
3348 		cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
3349 			cur_ring_idx + 1;
3350 
3351 		/* Did we already check the current character?  */
3352 		if (cur_char_ring[cur_ring_idx] == 0) {
3353 			/* No, we need to check the next character on the buffer.  */
3354 			int idx, work_ring_idx = cur_ring_idx;
3355 			mbstate_t tmp_state;
3356 			size_t mbclen;
3357 
3358 			for (idx = 0; lexptr + idx < lexend; idx++) {
3359 				tmp_state = cur_mbstate;
3360 				mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
3361 
3362 				if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) {
3363 					/* It is a singlebyte character, non-complete multibyte
3364 					   character or EOF.  We treat it as a singlebyte
3365 					   character.  */
3366 					cur_char_ring[work_ring_idx] = 1;
3367 					break;
3368 				} else if (mbclen == (size_t)-2) {
3369 					/* It is not a complete multibyte character.  */
3370 					cur_char_ring[work_ring_idx] = idx + 1;
3371 				} else {
3372 					/* mbclen > 1 */
3373 					cur_char_ring[work_ring_idx] = mbclen;
3374 					break;
3375 				}
3376 				work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
3377 					0 : work_ring_idx + 1;
3378 			}
3379 			cur_mbstate = tmp_state;
3380 
3381 			/* Put a mark on the position on which we write next character.  */
3382 			work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
3383 				0 : work_ring_idx + 1;
3384 			cur_char_ring[work_ring_idx] = 0;
3385 		}
3386 		if (check_for_bad || *lexptr == '\0')
3387 			check_bad_char(*lexptr);
3388 
3389 		return (int) (unsigned char) *lexptr++;
3390 	} else {
3391 		do {
3392 #ifdef NO_CONTINUE_SOURCE_STRINGS
3393 			if (lexeof)
3394 				return END_FILE;
3395 #else
3396 			if (lexeof) {
3397 				if (sourcefile->next == srcfiles)
3398 					return END_FILE;
3399 				else
3400 					next_sourcefile();
3401 			}
3402 #endif
3403 			if (lexptr && lexptr < lexend) {
3404 				if (check_for_bad || *lexptr == '\0')
3405 					check_bad_char(*lexptr);
3406 				return ((int) (unsigned char) *lexptr++);
3407 			}
3408 		} while (get_src_buf());
3409 		return END_SRC;
3410 	}
3411 }
3412 #undef NO_CONTINUE_SOURCE_STRINGS
3413 
3414 /* pushback --- push a character back on the input */
3415 
3416 static inline void
pushback(void)3417 pushback(void)
3418 {
3419 	if (gawk_mb_cur_max > 1)
3420 		cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
3421 			cur_ring_idx - 1;
3422 	(! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
3423 }
3424 
3425 /*
3426  * get_comment --- collect comment text.
3427  * 	Flag = EOL_COMMENT for end-of-line comments.
3428  * 	Flag = BLOCK_COMMENT for self-contained comments.
3429  */
3430 
3431 static int
get_comment(enum commenttype flag,INSTRUCTION ** comment_instruction)3432 get_comment(enum commenttype flag, INSTRUCTION **comment_instruction)
3433 {
3434 	int c;
3435 	int sl;
3436 	char *p1;
3437 	char *p2;
3438 
3439 	tok = tokstart;
3440 	tokadd('#');
3441 	sl = sourceline;
3442 
3443 	while (true) {
3444 		while ((c = nextc(false)) != '\n' && c != END_FILE) {
3445 			/* ignore \r characters */
3446 			if (c != '\r')
3447 				tokadd(c);
3448 		}
3449 		if (flag == EOL_COMMENT) {
3450 			/* comment at end of line.  */
3451 			if (c == '\n')
3452 				tokadd(c);
3453 			break;
3454 		}
3455 		if (c == '\n') {
3456 			tokadd(c);
3457 			sourceline++;
3458 			do {
3459 				c = nextc(false);
3460 				if (c == '\n') {
3461 					sourceline++;
3462 					tokadd(c);
3463 				}
3464 			} while (c != END_FILE && isspace(c));
3465 			if (c == END_FILE)
3466 				break;
3467 			else if (c != '#') {
3468 				pushback();
3469 				sourceline--;
3470 				break;
3471 			} else
3472 				tokadd(c);
3473 		} else
3474 			break;
3475 	}
3476 
3477 	/* remove any trailing blank lines (consecutive \n) from comment */
3478 	p1 = tok - 1;
3479 	p2 = tok - 2;
3480 	while (*p1 == '\n' && *p2 == '\n') {
3481 		p1--;
3482 		p2--;
3483 		tok--;
3484 	}
3485 
3486 	(*comment_instruction) = bcalloc(Op_comment, 1, sl);
3487 	(*comment_instruction)->source_file = source;
3488 	(*comment_instruction)->memory = make_str_node(tokstart, tok - tokstart, 0);
3489 	(*comment_instruction)->memory->comment_type = flag;
3490 
3491 	return c;
3492 }
3493 
3494 /* allow_newline --- allow newline after &&, ||, ? and : */
3495 
3496 static void
allow_newline(INSTRUCTION ** new_comment)3497 allow_newline(INSTRUCTION **new_comment)
3498 {
3499 	int c;
3500 
3501 	for (;;) {
3502 		c = nextc(true);
3503 		if (c == END_FILE) {
3504 			pushback();
3505 			break;
3506 		}
3507 		if (c == '#') {
3508 			if (do_pretty_print && ! do_profile) {
3509 				/* collect comment byte code iff doing pretty print but not profiling.  */
3510 				c = get_comment(EOL_COMMENT, new_comment);
3511 			} else {
3512 				while ((c = nextc(false)) != '\n' && c != END_FILE)
3513 					continue;
3514 			}
3515 			if (c == END_FILE) {
3516 				pushback();
3517 				break;
3518 			}
3519 		}
3520 		if (c == '\n')
3521 			sourceline++;
3522 		if (! isspace(c)) {
3523 			pushback();
3524 			break;
3525 		}
3526 	}
3527 }
3528 
3529 /* newline_eof --- return newline or EOF as needed and adjust variables */
3530 
3531 /*
3532  * This routine used to be a macro, however GCC 4.6.2 warned about
3533  * the result of a computation not being used.  Converting to a function
3534  * removes the warnings.
3535  */
3536 
3537 static int
newline_eof()3538 newline_eof()
3539 {
3540 	/* NB: a newline at end does not start a source line. */
3541 	if (lasttok != NEWLINE) {
3542                 pushback();
3543 		if (do_lint && ! eof_warned) {
3544         		lintwarn(_("source file does not end in newline"));
3545 			eof_warned = true;
3546 		}
3547 		sourceline++;
3548 		return NEWLINE;
3549 	}
3550 
3551 	sourceline--;
3552 	eof_warned = false;
3553 	return LEX_EOF;
3554 }
3555 
3556 /* yylex --- Read the input and turn it into tokens. */
3557 
3558 static int
3559 #ifdef USE_EBCDIC
yylex_ebcdic(void)3560 yylex_ebcdic(void)
3561 #else
3562 yylex(void)
3563 #endif
3564 {
3565 	int c;
3566 	bool seen_e = false;		/* These are for numbers */
3567 	bool seen_point = false;
3568 	bool esc_seen;		/* for literal strings */
3569 	int mid;
3570 	int base;
3571 	static bool did_newline = false;
3572 	char *tokkey;
3573 	bool inhex = false;
3574 	bool intlstr = false;
3575 	AWKNUM d;
3576 	bool collecting_typed_regexp = false;
3577 	static int qm_col_count = 0;
3578 
3579 #define GET_INSTRUCTION(op) bcalloc(op, 1, sourceline)
3580 
3581 #define NEWLINE_EOF newline_eof()
3582 
3583 	yylval = (INSTRUCTION *) NULL;
3584 	if (lasttok == SUBSCRIPT) {
3585 		lasttok = 0;
3586 		return SUBSCRIPT;
3587 	}
3588 
3589 	if (lasttok == LEX_EOF)		/* error earlier in current source, must give up !! */
3590 		return 0;
3591 
3592 	c = nextc(! want_regexp);
3593 	if (c == END_SRC)
3594 		return 0;
3595 	if (c == END_FILE)
3596 		return lasttok = NEWLINE_EOF;
3597 	pushback();
3598 
3599 #if defined __EMX__
3600 	/*
3601 	 * added for OS/2's extproc feature of cmd.exe
3602 	 * (like #! in BSD sh)
3603 	 */
3604 	if (strncasecmp(lexptr, "extproc ", 8) == 0) {
3605 		while (*lexptr && *lexptr != '\n')
3606 			lexptr++;
3607 	}
3608 #endif
3609 
3610 	lexeme = lexptr;
3611 	thisline = NULL;
3612 
3613 collect_regexp:
3614 	if (want_regexp) {
3615 		int in_brack = 0;	/* count brackets, [[:alnum:]] allowed */
3616 		int b_index = -1;
3617 		int cur_index = 0;
3618 
3619 		/*
3620 		 * Here is what's ok with brackets:
3621 		 *
3622 		 * [..[..] []] [^]] [.../...]
3623 		 * [...\[...] [...\]...] [...\/...]
3624 		 *
3625 		 * (Remember that all of the above are inside /.../)
3626 		 *
3627 		 * The code for \ handles \[, \] and \/.
3628 		 *
3629 		 * Otherwise, track the first open [ position, and if
3630 		 * an embedded ] occurs, allow it to pass through
3631 		 * if it's right after the first [ or after [^.
3632 		 *
3633 		 * Whew!
3634 		 */
3635 
3636 		want_regexp = false;
3637 		tok = tokstart;
3638 		for (;;) {
3639 			c = nextc(false);
3640 
3641 			cur_index = tok - tokstart;
3642 			if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
3643 			case '[':
3644 				if (nextc(false) == ':' || in_brack == 0) {
3645 					in_brack++;
3646 					if (in_brack == 1)
3647 						b_index = tok - tokstart;
3648 				}
3649 				pushback();
3650 				break;
3651 			case ']':
3652 				if (in_brack > 0
3653 				    && (cur_index == b_index + 1
3654 					|| (cur_index == b_index + 2 && tok[-1] == '^')))
3655 					; /* do nothing */
3656 				else {
3657 					in_brack--;
3658 					if (in_brack == 0)
3659 						b_index = -1;
3660 				}
3661 				break;
3662 			case '\\':
3663 				if ((c = nextc(false)) == END_FILE) {
3664 					pushback();
3665 					yyerror(_("unterminated regexp ends with `\\' at end of file"));
3666 					goto end_regexp; /* kludge */
3667 				}
3668 				if (c == '\r')	/* allow MS-DOS files. bleah */
3669 					c = nextc(true);
3670 				if (c == '\n') {
3671 					sourceline++;
3672 					continue;
3673 				} else {
3674 					tokadd('\\');
3675 					tokadd(c);
3676 					continue;
3677 				}
3678 				break;
3679 			case '/':	/* end of the regexp */
3680 				if (in_brack > 0)
3681 					break;
3682 end_regexp:
3683 				yylval = GET_INSTRUCTION(Op_token);
3684 				yylval->lextok = estrdup(tokstart, tok - tokstart);
3685 				if (do_lint) {
3686 					int peek = nextc(true);
3687 
3688 					pushback();
3689 					if (peek == 'i' || peek == 's') {
3690 						if (source)
3691 							lintwarn(
3692 						_("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"),
3693 								source, sourceline, peek);
3694 						else
3695 							lintwarn(
3696 						_("tawk regex modifier `/.../%c' doesn't work in gawk"),
3697 								peek);
3698 					}
3699 				}
3700 				if (collecting_typed_regexp) {
3701 					collecting_typed_regexp = false;
3702 					lasttok = TYPED_REGEXP;
3703 				} else
3704 					lasttok = REGEXP;
3705 
3706 				return lasttok;
3707 			case '\n':
3708 				pushback();
3709 				yyerror(_("unterminated regexp"));
3710 				goto end_regexp;	/* kludge */
3711 			case END_FILE:
3712 				pushback();
3713 				yyerror(_("unterminated regexp at end of file"));
3714 				goto end_regexp;	/* kludge */
3715 			}
3716 			tokadd(c);
3717 		}
3718 	}
3719 retry:
3720 
3721 	/* skipping \r is a hack, but windows is just too pervasive. sigh. */
3722 	while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
3723 		continue;
3724 
3725 	lexeme = lexptr ? lexptr - 1 : lexptr;
3726 	thisline = NULL;
3727 	tok = tokstart;
3728 
3729 	if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
3730 	switch (c) {
3731 	case END_SRC:
3732 		return 0;
3733 
3734 	case END_FILE:
3735 		return lasttok = NEWLINE_EOF;
3736 
3737 	case '\n':
3738 		sourceline++;
3739 		return lasttok = NEWLINE;
3740 
3741 	case '#':		/* it's a comment */
3742 		yylval = NULL;
3743 		if (do_pretty_print && ! do_profile) {
3744 			/*
3745 			 * Collect comment byte code iff doing pretty print
3746 			 * but not profiling.
3747 			 */
3748 			INSTRUCTION *new_comment;
3749 
3750 			if (lasttok == NEWLINE || lasttok == 0)
3751 				c = get_comment(BLOCK_COMMENT, & new_comment);
3752 			else
3753 				c = get_comment(EOL_COMMENT, & new_comment);
3754 
3755 			yylval = new_comment;
3756 
3757 			if (c == END_FILE) {
3758 				pushback();
3759 				return lasttok = NEWLINE;
3760 			}
3761 		} else {
3762 			while ((c = nextc(false)) != '\n') {
3763 				if (c == END_FILE)
3764 					return lasttok = NEWLINE_EOF;
3765 			}
3766 		}
3767 		sourceline++;
3768 		return lasttok = NEWLINE;
3769 
3770 	case '@':
3771 		c = nextc(true);
3772 		if (c == '/') {
3773 			want_regexp = true;
3774 			collecting_typed_regexp = true;
3775 			goto collect_regexp;
3776 		}
3777 		pushback();
3778 		at_seen = true;
3779 		return lasttok = '@';
3780 
3781 	case '\\':
3782 #ifdef RELAXED_CONTINUATION
3783 		/*
3784 		 * This code purports to allow comments and/or whitespace
3785 		 * after the `\' at the end of a line used for continuation.
3786 		 * Use it at your own risk. We think it's a bad idea, which
3787 		 * is why it's not on by default.
3788 		 */
3789 		yylval = NULL;
3790 		if (! do_traditional) {
3791 			INSTRUCTION *new_comment;
3792 
3793 			/* strip trailing white-space and/or comment */
3794 			while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
3795 				continue;
3796 			if (c == '#') {
3797 				static bool warned = false;
3798 
3799 				if (do_lint && ! warned) {
3800 					warned = true;
3801 					lintwarn(
3802 		_("use of `\\ #...' line continuation is not portable"));
3803 				}
3804 				if (do_pretty_print && ! do_profile) {
3805 					c = get_comment(EOL_COMMENT, & new_comment);
3806 					yylval = new_comment;
3807 					return lasttok = c;
3808 				} else {
3809 					while ((c = nextc(false)) != '\n')
3810 						if (c == END_FILE)
3811 							break;
3812 				}
3813 			}
3814 			pushback();
3815 		}
3816 #endif /* RELAXED_CONTINUATION */
3817 		c = nextc(true);
3818 		if (c == '\r')	/* allow MS-DOS files. bleah */
3819 			c = nextc(true);
3820 		if (c == '\n') {
3821 			sourceline++;
3822 			goto retry;
3823 		} else {
3824 			yyerror(_("backslash not last character on line"));
3825 			return lasttok = LEX_EOF;
3826 		}
3827 		break;
3828 
3829 	case '?':
3830 		qm_col_count++;
3831 		// fall through
3832 	case ':':
3833 		yylval = GET_INSTRUCTION(Op_cond_exp);
3834 		if (qm_col_count > 0) {
3835 			if (! do_posix) {
3836 				INSTRUCTION *new_comment = NULL;
3837 				allow_newline(& new_comment);
3838 				yylval->comment = new_comment;
3839 			}
3840 			if (c == ':')
3841 				qm_col_count--;
3842 		}
3843 		return lasttok = c;
3844 
3845 		/*
3846 		 * in_parens is undefined unless we are parsing a print
3847 		 * statement (in_print), but why bother with a check?
3848 		 */
3849 	case ')':
3850 		in_parens--;
3851 		return lasttok = c;
3852 
3853 	case '(':
3854 		in_parens++;
3855 		return lasttok = c;
3856 	case '$':
3857 		yylval = GET_INSTRUCTION(Op_field_spec);
3858 		return lasttok = c;
3859 	case '{':
3860 		if (++in_braces == 1)
3861 			firstline = sourceline;
3862 	case ';':
3863 	case ',':
3864 	case '[':
3865 			return lasttok = c;
3866 	case ']':
3867 		c = nextc(true);
3868 		pushback();
3869 		if (c == '[') {
3870 			if (do_traditional)
3871 				fatal(_("multidimensional arrays are a gawk extension"));
3872 			if (do_lint_extensions)
3873 				lintwarn(_("multidimensional arrays are a gawk extension"));
3874 			yylval = GET_INSTRUCTION(Op_sub_array);
3875 			lasttok = ']';
3876 		} else {
3877 			yylval = GET_INSTRUCTION(Op_subscript);
3878 			lasttok = SUBSCRIPT;	/* end of subscripts */
3879 		}
3880 		return ']';
3881 
3882 	case '*':
3883 		if ((c = nextc(true)) == '=') {
3884 			yylval = GET_INSTRUCTION(Op_assign_times);
3885 			return lasttok = ASSIGNOP;
3886 		} else if (do_posix) {
3887 			pushback();
3888 			yylval = GET_INSTRUCTION(Op_times);
3889 			return lasttok = '*';
3890 		} else if (c == '*') {
3891 			/* make ** and **= aliases for ^ and ^= */
3892 			static bool did_warn_op = false, did_warn_assgn = false;
3893 
3894 			if (nextc(true) == '=') {
3895 				if (! did_warn_assgn) {
3896 					did_warn_assgn = true;
3897 					if (do_lint)
3898 						lintwarn(_("POSIX does not allow operator `%s'"), "**=");
3899 					if (do_lint_old)
3900 						lintwarn(_("operator `%s' is not supported in old awk"), "**=");
3901 				}
3902 				yylval = GET_INSTRUCTION(Op_assign_exp);
3903 				return ASSIGNOP;
3904 			} else {
3905 				pushback();
3906 				if (! did_warn_op) {
3907 					did_warn_op = true;
3908 					if (do_lint)
3909 						lintwarn(_("POSIX does not allow operator `%s'"), "**");
3910 					if (do_lint_old)
3911 						lintwarn(_("operator `%s' is not supported in old awk"), "**");
3912 				}
3913 				yylval = GET_INSTRUCTION(Op_exp);
3914 				return lasttok = '^';
3915 			}
3916 		}
3917 		pushback();
3918 		yylval = GET_INSTRUCTION(Op_times);
3919 		return lasttok = '*';
3920 
3921 	case '/':
3922 		if (nextc(false) == '=') {
3923 			pushback();
3924 			return lasttok = SLASH_BEFORE_EQUAL;
3925 		}
3926 		pushback();
3927 		yylval = GET_INSTRUCTION(Op_quotient);
3928 		return lasttok = '/';
3929 
3930 	case '%':
3931 		if (nextc(true) == '=') {
3932 			yylval = GET_INSTRUCTION(Op_assign_mod);
3933 			return lasttok = ASSIGNOP;
3934 		}
3935 		pushback();
3936 		yylval = GET_INSTRUCTION(Op_mod);
3937 		return lasttok = '%';
3938 
3939 	case '^':
3940 	{
3941 		static bool did_warn_op = false, did_warn_assgn = false;
3942 
3943 		if (nextc(true) == '=') {
3944 			if (do_lint_old && ! did_warn_assgn) {
3945 				did_warn_assgn = true;
3946 				lintwarn(_("operator `%s' is not supported in old awk"), "^=");
3947 			}
3948 			yylval = GET_INSTRUCTION(Op_assign_exp);
3949 			return lasttok = ASSIGNOP;
3950 		}
3951 		pushback();
3952 		if (do_lint_old && ! did_warn_op) {
3953 			did_warn_op = true;
3954 			lintwarn(_("operator `%s' is not supported in old awk"), "^");
3955 		}
3956 		yylval = GET_INSTRUCTION(Op_exp);
3957 		return lasttok = '^';
3958 	}
3959 
3960 	case '+':
3961 		if ((c = nextc(true)) == '=') {
3962 			yylval = GET_INSTRUCTION(Op_assign_plus);
3963 			return lasttok = ASSIGNOP;
3964 		}
3965 		if (c == '+') {
3966 			yylval = GET_INSTRUCTION(Op_symbol);
3967 			return lasttok = INCREMENT;
3968 		}
3969 		pushback();
3970 		yylval = GET_INSTRUCTION(Op_plus);
3971 		return lasttok = '+';
3972 
3973 	case '!':
3974 		if ((c = nextc(true)) == '=') {
3975 			yylval = GET_INSTRUCTION(Op_notequal);
3976 			return lasttok = RELOP;
3977 		}
3978 		if (c == '~') {
3979 			yylval = GET_INSTRUCTION(Op_nomatch);
3980 			return lasttok = MATCHOP;
3981 		}
3982 		pushback();
3983 		yylval = GET_INSTRUCTION(Op_symbol);
3984 		return lasttok = '!';
3985 
3986 	case '<':
3987 		if (nextc(true) == '=') {
3988 			yylval = GET_INSTRUCTION(Op_leq);
3989 			return lasttok = RELOP;
3990 		}
3991 		yylval = GET_INSTRUCTION(Op_less);
3992 		pushback();
3993 		return lasttok = '<';
3994 
3995 	case '=':
3996 		if (nextc(true) == '=') {
3997 			yylval = GET_INSTRUCTION(Op_equal);
3998 			return lasttok = RELOP;
3999 		}
4000 		yylval = GET_INSTRUCTION(Op_assign);
4001 		pushback();
4002 		return lasttok = ASSIGN;
4003 
4004 	case '>':
4005 		if ((c = nextc(true)) == '=') {
4006 			yylval = GET_INSTRUCTION(Op_geq);
4007 			return lasttok = RELOP;
4008 		} else if (c == '>') {
4009 			yylval = GET_INSTRUCTION(Op_symbol);
4010 			yylval->redir_type = redirect_append;
4011 			return lasttok = IO_OUT;
4012 		}
4013 		pushback();
4014 		if (in_print && in_parens == 0) {
4015 			yylval = GET_INSTRUCTION(Op_symbol);
4016 			yylval->redir_type = redirect_output;
4017 			return lasttok = IO_OUT;
4018 		}
4019 		yylval = GET_INSTRUCTION(Op_greater);
4020 		return lasttok = '>';
4021 
4022 	case '~':
4023 		yylval = GET_INSTRUCTION(Op_match);
4024 		return lasttok = MATCHOP;
4025 
4026 	case '}':
4027 		/*
4028 		 * Added did newline stuff.  Easier than
4029 		 * hacking the grammar.
4030 		 */
4031 		if (did_newline) {
4032 			did_newline = false;
4033 			if (--in_braces == 0)
4034 				lastline = sourceline;
4035 			return lasttok = c;
4036 		}
4037 		did_newline = true;
4038 		--lexptr;	/* pick up } next time */
4039 		return lasttok = NEWLINE;
4040 
4041 	case '"':
4042 	string:
4043 		esc_seen = false;
4044 		/*
4045 		 * Allow any kind of junk in quoted string,
4046 		 * so pass false to nextc().
4047 		 */
4048 		while ((c = nextc(false)) != '"') {
4049 			if (c == '\n') {
4050 				pushback();
4051 				yyerror(_("unterminated string"));
4052 				return lasttok = LEX_EOF;
4053 			}
4054 			if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
4055 			    c == '\\') {
4056 				c = nextc(true);
4057 				if (c == '\r')	/* allow MS-DOS files. bleah */
4058 					c = nextc(true);
4059 				if (c == '\n') {
4060 					if (do_posix)
4061 						fatal(_("POSIX does not allow physical newlines in string values"));
4062 					else if (do_lint)
4063 						lintwarn(_("backslash string continuation is not portable"));
4064 					sourceline++;
4065 					continue;
4066 				}
4067 				esc_seen = true;
4068 				if (! want_source || c != '"')
4069 					tokadd('\\');
4070 			}
4071 			if (c == END_FILE) {
4072 				pushback();
4073 				yyerror(_("unterminated string"));
4074 				return lasttok = LEX_EOF;
4075 			}
4076 			tokadd(c);
4077 		}
4078 		yylval = GET_INSTRUCTION(Op_token);
4079 		if (want_source) {
4080 			yylval->lextok = estrdup(tokstart, tok - tokstart);
4081 			return lasttok = FILENAME;
4082 		}
4083 
4084 		yylval->opcode = Op_push_i;
4085 		yylval->memory = make_str_node(tokstart,
4086 					tok - tokstart, esc_seen ? SCAN : 0);
4087 		if (intlstr) {
4088 			yylval->memory->flags |= INTLSTR;
4089 			intlstr = false;
4090 			if (do_intl)
4091 				dumpintlstr(yylval->memory->stptr, yylval->memory->stlen);
4092 		}
4093 		return lasttok = YSTRING;
4094 
4095 	case '-':
4096 		if ((c = nextc(true)) == '=') {
4097 			yylval = GET_INSTRUCTION(Op_assign_minus);
4098 			return lasttok = ASSIGNOP;
4099 		}
4100 		if (c == '-') {
4101 			yylval = GET_INSTRUCTION(Op_symbol);
4102 			return lasttok = DECREMENT;
4103 		}
4104 		pushback();
4105 		yylval = GET_INSTRUCTION(Op_minus);
4106 		return lasttok = '-';
4107 
4108 	case '.':
4109 		c = nextc(true);
4110 		pushback();
4111 		if (! isdigit(c))
4112 			return lasttok = '.';
4113 		else
4114 			c = '.';
4115 		/* FALL THROUGH */
4116 	case '0':
4117 	case '1':
4118 	case '2':
4119 	case '3':
4120 	case '4':
4121 	case '5':
4122 	case '6':
4123 	case '7':
4124 	case '8':
4125 	case '9':
4126 		/* It's a number */
4127 		for (;;) {
4128 			bool gotnumber = false;
4129 
4130 			tokadd(c);
4131 			switch (c) {
4132 			case 'x':
4133 			case 'X':
4134 				if (do_traditional)
4135 					goto done;
4136 				if (tok == tokstart + 2) {
4137 					int peek = nextc(true);
4138 
4139 					if (isxdigit(peek)) {
4140 						inhex = true;
4141 						pushback();	/* following digit */
4142 					} else {
4143 						pushback();	/* x or X */
4144 						goto done;
4145 					}
4146 				}
4147 				break;
4148 			case '.':
4149 				/* period ends exponent part of floating point number */
4150 				if (seen_point || seen_e) {
4151 					gotnumber = true;
4152 					break;
4153 				}
4154 				seen_point = true;
4155 				break;
4156 			case 'e':
4157 			case 'E':
4158 				if (inhex)
4159 					break;
4160 				if (seen_e) {
4161 					gotnumber = true;
4162 					break;
4163 				}
4164 				seen_e = true;
4165 				if ((c = nextc(true)) == '-' || c == '+') {
4166 					int c2 = nextc(true);
4167 
4168 					if (isdigit(c2)) {
4169 						tokadd(c);
4170 						tokadd(c2);
4171 					} else {
4172 						pushback();	/* non-digit after + or - */
4173 						pushback();	/* + or - */
4174 						pushback();	/* e or E */
4175 					}
4176 				} else if (! isdigit(c)) {
4177 					pushback();	/* character after e or E */
4178 					pushback();	/* e or E */
4179 				} else {
4180 					pushback();	/* digit */
4181 				}
4182 				break;
4183 			case 'a':
4184 			case 'A':
4185 			case 'b':
4186 			case 'B':
4187 			case 'c':
4188 			case 'C':
4189 			case 'D':
4190 			case 'd':
4191 			case 'f':
4192 			case 'F':
4193 				if (do_traditional || ! inhex)
4194 					goto done;
4195 				/* fall through */
4196 			case '0':
4197 			case '1':
4198 			case '2':
4199 			case '3':
4200 			case '4':
4201 			case '5':
4202 			case '6':
4203 			case '7':
4204 			case '8':
4205 			case '9':
4206 				break;
4207 			default:
4208 			done:
4209 				gotnumber = true;
4210 			}
4211 			if (gotnumber)
4212 				break;
4213 			c = nextc(true);
4214 		}
4215 		pushback();
4216 
4217 		tokadd('\0');
4218 		yylval = GET_INSTRUCTION(Op_push_i);
4219 
4220 		base = 10;
4221 		if (! do_traditional) {
4222 			base = get_numbase(tokstart, strlen(tokstart)-1, false);
4223 			if (do_lint) {
4224 				if (base == 8)
4225 					lintwarn("numeric constant `%.*s' treated as octal",
4226 						(int) strlen(tokstart)-1, tokstart);
4227 				else if (base == 16)
4228 					lintwarn("numeric constant `%.*s' treated as hexadecimal",
4229 						(int) strlen(tokstart)-1, tokstart);
4230 			}
4231 		}
4232 
4233 #ifdef HAVE_MPFR
4234 		if (do_mpfr) {
4235 			NODE *r;
4236 
4237 			if (! seen_point && ! seen_e) {
4238 				r = mpg_integer();
4239 				mpg_strtoui(r->mpg_i, tokstart, strlen(tokstart), NULL, base);
4240 				errno = 0;
4241 			} else {
4242 				int tval;
4243 				r = mpg_float();
4244 				tval = mpfr_strtofr(r->mpg_numbr, tokstart, NULL, base, ROUND_MODE);
4245 				errno = 0;
4246 				IEEE_FMT(r->mpg_numbr, tval);
4247 			}
4248 			yylval->memory = set_profile_text(r, tokstart, strlen(tokstart)-1);
4249 			return lasttok = YNUMBER;
4250 		}
4251 #endif
4252 		if (base != 10)
4253 			d = nondec2awknum(tokstart, strlen(tokstart)-1, NULL);
4254 		else
4255 			d = atof(tokstart);
4256 		yylval->memory = set_profile_text(make_number(d), tokstart, strlen(tokstart) - 1);
4257 		if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d)
4258 			yylval->memory->flags |= NUMINT;
4259 		return lasttok = YNUMBER;
4260 
4261 	case '&':
4262 		if ((c = nextc(true)) == '&') {
4263 			yylval = GET_INSTRUCTION(Op_and);
4264 			INSTRUCTION *new_comment = NULL;
4265 			allow_newline(& new_comment);
4266 			yylval->comment = new_comment;
4267 
4268 			return lasttok = LEX_AND;
4269 		}
4270 		pushback();
4271 		yylval = GET_INSTRUCTION(Op_symbol);
4272 		return lasttok = '&';
4273 
4274 	case '|':
4275 		if ((c = nextc(true)) == '|') {
4276 			yylval = GET_INSTRUCTION(Op_or);
4277 			INSTRUCTION *new_comment = NULL;
4278 			allow_newline(& new_comment);
4279 			yylval->comment = new_comment;
4280 
4281 			return lasttok = LEX_OR;
4282 		} else if (! do_traditional && c == '&') {
4283 			yylval = GET_INSTRUCTION(Op_symbol);
4284 			yylval->redir_type = redirect_twoway;
4285 
4286 			return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
4287 		}
4288 		pushback();
4289 		if (in_print && in_parens == 0) {
4290 			yylval = GET_INSTRUCTION(Op_symbol);
4291 			yylval->redir_type = redirect_pipe;
4292 			return lasttok = IO_OUT;
4293 		} else {
4294 			yylval = GET_INSTRUCTION(Op_symbol);
4295 			yylval->redir_type = redirect_pipein;
4296 			return lasttok = IO_IN;
4297 		}
4298 	}
4299 
4300 	if (! is_letter(c)) {
4301 		yyerror(_("invalid char '%c' in expression"), c);
4302 		return lasttok = LEX_EOF;
4303 	}
4304 
4305 	/*
4306 	 * Lots of fog here.  Consider:
4307 	 *
4308 	 * print "xyzzy"$_"foo"
4309 	 *
4310 	 * Without the check for ` lasttok != '$' ', this is parsed as
4311 	 *
4312 	 * print "xxyzz" $(_"foo")
4313 	 *
4314 	 * With the check, it is "correctly" parsed as three
4315 	 * string concatenations.  Sigh.  This seems to be
4316 	 * "more correct", but this is definitely one of those
4317 	 * occasions where the interactions are funny.
4318 	 */
4319 	if (! do_traditional && c == '_' && lasttok != '$') {
4320 		if ((c = nextc(true)) == '"') {
4321 			intlstr = true;
4322 			goto string;
4323 		}
4324 		pushback();
4325 		c = '_';
4326 	}
4327 
4328 	/* it's some type of name-type-thing.  Find its length. */
4329 	tok = tokstart;
4330 	while (c != END_FILE && is_identchar(c)) {
4331 		tokadd(c);
4332 		c = nextc(true);
4333 
4334 		if (! do_traditional && c == ':') {
4335 			int peek = nextc(true);
4336 
4337 			if (peek == ':') {	// saw identifier::
4338 				tokadd(c);
4339 				tokadd(c);
4340 				c = nextc(true);
4341 			} else
4342 				pushback();
4343 				// then continue around the loop, c == ':'
4344 		}
4345 	}
4346 	tokadd('\0');
4347 	pushback();
4348 
4349 	(void) validate_qualified_name(tokstart);
4350 
4351 	/* See if it is a special token. */
4352 	if ((mid = check_qualified_special(tokstart)) >= 0) {
4353 		static int warntab[sizeof(tokentab) / sizeof(tokentab[0])];
4354 		int class = tokentab[mid].class;
4355 
4356 		switch (class) {
4357 		case LEX_EVAL:
4358 		case LEX_INCLUDE:
4359 		case LEX_LOAD:
4360 		case LEX_NAMESPACE:
4361 			if (lasttok != '@')
4362 				goto out;
4363 		default:
4364 			break;
4365 		}
4366 
4367 		/* allow parameter names to shadow the names of gawk extension built-ins */
4368 		if ((tokentab[mid].flags & GAWKX) != 0) {
4369 			NODE *f;
4370 
4371 			switch (want_param_names) {
4372 			case FUNC_HEADER:
4373 				/* in header, defining parameter names */
4374 				goto out;
4375 			case FUNC_BODY:
4376 				/* in body, name must be in symbol table for it to be a parameter */
4377 				if ((f = lookup(tokstart)) != NULL) {
4378 					if (f->type == Node_builtin_func)
4379 						break;
4380 					else
4381 						goto out;
4382 				}
4383 				/* else
4384 					fall through */
4385 			case DONT_CHECK:
4386 				/* regular code */
4387 				break;
4388 			default:
4389 				cant_happen();
4390 				break;
4391 			}
4392 		}
4393 
4394 		if (do_lint) {
4395 			if (do_lint_extensions && (tokentab[mid].flags & GAWKX) != 0 && (warntab[mid] & GAWKX) == 0) {
4396 				lintwarn(_("`%s' is a gawk extension"),
4397 					tokentab[mid].operator);
4398 				warntab[mid] |= GAWKX;
4399 			}
4400 			if ((tokentab[mid].flags & NOT_POSIX) != 0 && (warntab[mid] & NOT_POSIX) == 0) {
4401 				lintwarn(_("POSIX does not allow `%s'"),
4402 					tokentab[mid].operator);
4403 				warntab[mid] |= NOT_POSIX;
4404 			}
4405 		}
4406 		if (do_lint_old && (tokentab[mid].flags & NOT_OLD) != 0
4407 				 && (warntab[mid] & NOT_OLD) == 0
4408 		) {
4409 			lintwarn(_("`%s' is not supported in old awk"),
4410 					tokentab[mid].operator);
4411 			warntab[mid] |= NOT_OLD;
4412 		}
4413 
4414 		if ((tokentab[mid].flags & BREAK) != 0)
4415 			break_allowed++;
4416 		if ((tokentab[mid].flags & CONTINUE) != 0)
4417 			continue_allowed++;
4418 
4419 		switch (class) {
4420 		case LEX_NAMESPACE:
4421 		case LEX_INCLUDE:
4422 		case LEX_LOAD:
4423 			want_source = true;
4424 			break;
4425 		case LEX_EVAL:
4426 			if (in_main_context())
4427 				goto out;
4428 			emalloc(tokkey, char *, tok - tokstart + 1, "yylex");
4429 			tokkey[0] = '@';
4430 			memcpy(tokkey + 1, tokstart, tok - tokstart);
4431 			yylval = GET_INSTRUCTION(Op_token);
4432 			yylval->lextok = tokkey;
4433 			break;
4434 
4435 		case LEX_FUNCTION:
4436 		case LEX_BEGIN:
4437 		case LEX_END:
4438 		case LEX_BEGINFILE:
4439 		case LEX_ENDFILE:
4440 			yylval = bcalloc(tokentab[mid].value, 4, sourceline);
4441 			break;
4442 
4443 		case LEX_FOR:
4444 		case LEX_WHILE:
4445 		case LEX_DO:
4446 		case LEX_SWITCH:
4447 			if (! do_pretty_print)
4448 				return lasttok = class;
4449 			/* fall through */
4450 		case LEX_CASE:
4451 			yylval = bcalloc(tokentab[mid].value, 2, sourceline);
4452 			break;
4453 
4454 		/*
4455 		 * These must be checked here, due to the LALR nature of the parser,
4456 		 * the rules for continue and break may not be reduced until after
4457 		 * a token that increments the xxx_allowed varibles is seen. Bleah.
4458 		 */
4459 		case LEX_CONTINUE:
4460 			if (! continue_allowed) {
4461 				error_ln(sourceline,
4462 					_("`continue' is not allowed outside a loop"));
4463 				errcount++;
4464 			}
4465 			goto make_instruction;
4466 
4467 		case LEX_BREAK:
4468 			if (! break_allowed) {
4469 				error_ln(sourceline,
4470 					_("`break' is not allowed outside a loop or switch"));
4471 				errcount++;
4472 			}
4473 			goto make_instruction;
4474 
4475 		default:
4476 make_instruction:
4477 			yylval = GET_INSTRUCTION(tokentab[mid].value);
4478 			if (class == LEX_BUILTIN || class == LEX_LENGTH)
4479 				yylval->builtin_idx = mid;
4480 			break;
4481 		}
4482 		return lasttok = class;
4483 	}
4484 out:
4485 	if (want_param_names == FUNC_HEADER)
4486 		tokkey = estrdup(tokstart, tok - tokstart - 1);
4487 	else
4488 		tokkey = qualify_name(tokstart, tok - tokstart - 1);
4489 
4490 	if (*lexptr == '(') {
4491 		yylval = bcalloc(Op_token, 2, sourceline);
4492 		yylval->lextok = tokkey;
4493 		return lasttok = FUNC_CALL;
4494 	} else {
4495 		static bool goto_warned = false;
4496 
4497 		yylval = GET_INSTRUCTION(Op_token);
4498 		yylval->lextok = tokkey;
4499 
4500 #define SMART_ALECK	1
4501 		if (SMART_ALECK
4502 		    && do_lint
4503 		    && ! goto_warned
4504 		    && tolower(tokkey[0]) == 'g'
4505 		    && strcasecmp(tokkey, "goto") == 0) {
4506 			goto_warned = true;
4507 			lintwarn(_("`goto' considered harmful!"));
4508 		}
4509 		return lasttok = NAME;
4510 	}
4511 
4512 #undef GET_INSTRUCTION
4513 #undef NEWLINE_EOF
4514 }
4515 
4516 /* It's EBCDIC in a Bison grammar, run for the hills!
4517 
4518    Or, convert single-character tokens coming out of yylex() from EBCDIC to
4519    ASCII values on-the-fly so that the parse tables need not be regenerated
4520    for EBCDIC systems.  */
4521 #ifdef USE_EBCDIC
4522 static int
yylex(void)4523 yylex(void)
4524 {
4525 	static char etoa_xlate[256];
4526 	static bool do_etoa_init = true;
4527 	int tok;
4528 
4529 	if (do_etoa_init)
4530 	{
4531 		for (tok = 0; tok < 256; tok++)
4532 			etoa_xlate[tok] = (char) tok;
4533 #ifdef HAVE___ETOA_L
4534 		/* IBM helpfully provides this function.  */
4535 		__etoa_l(etoa_xlate, sizeof(etoa_xlate));
4536 #else
4537 # error "An EBCDIC-to-ASCII translation function is needed for this system"
4538 #endif
4539 		do_etoa_init = false;
4540 	}
4541 
4542 	tok = yylex_ebcdic();
4543 
4544 	if (tok >= 0 && tok <= 0xFF)
4545 		tok = etoa_xlate[tok];
4546 
4547 	return tok;
4548 }
4549 #endif /* USE_EBCDIC */
4550 
4551 /* snode --- instructions for builtin functions. Checks for arg. count
4552              and supplies defaults where possible. */
4553 
4554 static INSTRUCTION *
snode(INSTRUCTION * subn,INSTRUCTION * r)4555 snode(INSTRUCTION *subn, INSTRUCTION *r)
4556 {
4557 	INSTRUCTION *arg;
4558 	INSTRUCTION *ip;
4559 	NODE *n;
4560 	int nexp = 0;
4561 	int args_allowed;
4562 	int idx = r->builtin_idx;
4563 
4564 	if (subn != NULL) {
4565 		INSTRUCTION *tp;
4566 		for (tp = subn->nexti; tp; tp = tp->nexti) {
4567 			tp = tp->lasti;
4568 			nexp++;
4569 		}
4570 		assert(nexp > 0);
4571 	}
4572 
4573 	/* check against how many args. are allowed for this builtin */
4574 	args_allowed = tokentab[idx].flags & ARGS;
4575 	if (args_allowed && (args_allowed & A(nexp)) == 0) {
4576 		yyerror(_("%d is invalid as number of arguments for %s"),
4577 				nexp, tokentab[idx].operator);
4578 		return NULL;
4579 	}
4580 
4581 	/* special processing for sub, gsub and gensub */
4582 
4583 	if (tokentab[idx].value == Op_sub_builtin) {
4584 		const char *operator = tokentab[idx].operator;
4585 
4586 		r->sub_flags = 0;
4587 
4588 		arg = subn->nexti;		/* first arg list */
4589 		(void) mk_rexp(arg);
4590 
4591 		if (strcmp(operator, "gensub") != 0) {
4592 			/* sub and gsub */
4593 
4594 			if (strcmp(operator, "gsub") == 0)
4595 				r->sub_flags |= GSUB;
4596 
4597 			arg = arg->lasti->nexti;	/* 2nd arg list */
4598 			if (nexp == 2) {
4599 				INSTRUCTION *expr;
4600 
4601 				expr = list_create(instruction(Op_push_i));
4602 				expr->nexti->memory = set_profile_text(make_number(0.0), "0", 1);
4603 				(void) mk_expression_list(subn,
4604 						list_append(expr, instruction(Op_field_spec)));
4605 			}
4606 
4607 			arg = arg->lasti->nexti; 	/* third arg list */
4608 			ip = arg->lasti;
4609 			if (ip->opcode == Op_push_i) {
4610 				if (do_lint)
4611 					lintwarn(_("%s: string literal as last argument of substitute has no effect"),
4612 						operator);
4613 				r->sub_flags |=	LITERAL;
4614 			} else {
4615 				if (make_assignable(ip) == NULL)
4616 					yyerror(_("%s third parameter is not a changeable object"),
4617 						operator);
4618 				else
4619 					ip->do_reference = true;
4620 			}
4621 
4622 			r->expr_count = count_expressions(&subn, false);
4623 			ip = subn->lasti;
4624 
4625 			(void) list_append(subn, r);
4626 
4627 			/* add after_assign code */
4628 			if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
4629 				(void) list_append(subn, instruction(Op_var_assign));
4630 				subn->lasti->assign_ctxt = Op_sub_builtin;
4631 				subn->lasti->assign_var = ip->memory->var_assign;
4632 			} else if (ip->opcode == Op_field_spec_lhs) {
4633 				(void) list_append(subn, instruction(Op_field_assign));
4634 				subn->lasti->assign_ctxt = Op_sub_builtin;
4635 				subn->lasti->field_assign = (Func_ptr) 0;
4636 				ip->target_assign = subn->lasti;
4637 			} else if (ip->opcode == Op_subscript_lhs) {
4638 				(void) list_append(subn, instruction(Op_subscript_assign));
4639 				subn->lasti->assign_ctxt = Op_sub_builtin;
4640 			}
4641 
4642 			return subn;
4643 
4644 		} else {
4645 			/* gensub */
4646 
4647 			r->sub_flags |= GENSUB;
4648 			if (nexp == 3) {
4649 				ip = instruction(Op_push_i);
4650 				ip->memory = set_profile_text(make_number(0.0), "0", 1);
4651 				(void) mk_expression_list(subn,
4652 						list_append(list_create(ip), instruction(Op_field_spec)));
4653 			}
4654 
4655 			r->expr_count = count_expressions(&subn, false);
4656 			return list_append(subn, r);
4657 		}
4658 	}
4659 
4660 #ifdef HAVE_MPFR
4661 	/* N.B.: If necessary, add special processing for alternate builtin, below */
4662 	if (do_mpfr && tokentab[idx].ptr2)
4663 		r->builtin =  tokentab[idx].ptr2;
4664 	else
4665 #endif
4666 		r->builtin = tokentab[idx].ptr;
4667 
4668 	/* special case processing for a few builtins */
4669 
4670 	if (r->builtin == do_length) {
4671 		if (nexp == 0) {
4672 		    /* no args. Use $0 */
4673 
4674 			INSTRUCTION *list;
4675 			r->expr_count = 1;
4676 			list = list_create(r);
4677 			(void) list_prepend(list, instruction(Op_field_spec));
4678 			(void) list_prepend(list, instruction(Op_push_i));
4679 			list->nexti->memory = set_profile_text(make_number(0.0), "0", 1);
4680 			return list;
4681 		} else {
4682 			arg = subn->nexti;
4683 			if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
4684 				arg->nexti->opcode = Op_push_arg;	/* argument may be array */
4685  		}
4686 	} else if (r->builtin == do_isarray) {
4687 		arg = subn->nexti;
4688 		if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
4689 			arg->nexti->opcode = Op_push_arg_untyped;	/* argument may be untyped */
4690 	} else if (r->builtin == do_typeof) {
4691 		arg = subn->nexti;
4692 		if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
4693 			arg->nexti->opcode = Op_push_arg_untyped;	/* argument may be untyped */
4694 		if (nexp == 2) {	/* 2nd argument there */
4695 			arg = subn->nexti->lasti->nexti;	/* 2nd arg list */
4696 			ip = arg->lasti;
4697 			if (ip->opcode == Op_push)
4698 				ip->opcode = Op_push_array;
4699 		}
4700 #ifdef SUPPLY_INTDIV
4701 	} else if (r->builtin == do_intdiv
4702 #ifdef HAVE_MPFR
4703 		   || r->builtin == MPF(intdiv)
4704 #endif
4705 			) {
4706 		arg = subn->nexti->lasti->nexti->lasti->nexti;	/* 3rd arg list */
4707 		ip = arg->lasti;
4708 		if (ip->opcode == Op_push)
4709 			ip->opcode = Op_push_array;
4710 #endif /* SUPPLY_INTDIV */
4711 	} else if (r->builtin == do_match) {
4712 		static bool warned = false;
4713 
4714 		arg = subn->nexti->lasti->nexti;	/* 2nd arg list */
4715 		(void) mk_rexp(arg);
4716 
4717 		if (nexp == 3) {	/* 3rd argument there */
4718 			if (do_lint_extensions && ! warned) {
4719 				warned = true;
4720 				lintwarn(_("match: third argument is a gawk extension"));
4721 			}
4722 			if (do_traditional) {
4723 				yyerror(_("match: third argument is a gawk extension"));
4724 				return NULL;
4725 			}
4726 
4727 			arg = arg->lasti->nexti; 	/* third arg list */
4728 			ip = arg->lasti;
4729 			if (/*ip == arg->nexti  && */ ip->opcode == Op_push)
4730 				ip->opcode = Op_push_array;
4731 		}
4732 	} else if (r->builtin == do_split) {
4733 		arg = subn->nexti->lasti->nexti;	/* 2nd arg list */
4734 		ip = arg->lasti;
4735 		if (ip->opcode == Op_push)
4736 			ip->opcode = Op_push_array;
4737 		if (nexp == 2) {
4738 			INSTRUCTION *expr;
4739 			expr = list_create(instruction(Op_push));
4740 			expr->nexti->memory = FS_node;
4741 			(void) mk_expression_list(subn, expr);
4742 		}
4743 		arg = arg->lasti->nexti;
4744 		n = mk_rexp(arg);
4745 		if (nexp == 2)
4746 			n->re_flags |= FS_DFLT;
4747 		if (nexp == 4) {
4748 			arg = arg->lasti->nexti;
4749 			ip = arg->lasti;
4750 			if (ip->opcode == Op_push)
4751 				ip->opcode = Op_push_array;
4752 		}
4753 	} else if (r->builtin == do_patsplit) {
4754 		arg = subn->nexti->lasti->nexti;	/* 2nd arg list */
4755 		ip = arg->lasti;
4756 		if (ip->opcode == Op_push)
4757 			ip->opcode = Op_push_array;
4758 		if (nexp == 2) {
4759 			INSTRUCTION *expr;
4760 			expr = list_create(instruction(Op_push));
4761 			expr->nexti->memory = FPAT_node;
4762 			(void) mk_expression_list(subn, expr);
4763 		}
4764 		arg = arg->lasti->nexti;
4765 		n = mk_rexp(arg);
4766 		if (nexp == 4) {
4767 			arg = arg->lasti->nexti;
4768 			ip = arg->lasti;
4769 			if (ip->opcode == Op_push)
4770 				ip->opcode = Op_push_array;
4771 		}
4772 	} else if (r->builtin == do_close) {
4773 		static bool warned = false;
4774 		if (nexp == 2) {
4775 			if (do_lint_extensions && ! warned) {
4776 				warned = true;
4777 				lintwarn(_("close: second argument is a gawk extension"));
4778 			}
4779 			if (do_traditional) {
4780 				yyerror(_("close: second argument is a gawk extension"));
4781 				return NULL;
4782 			}
4783 		}
4784 	} else if (do_intl					/* --gen-po */
4785 			&& r->builtin == do_dcgettext		/* dcgettext(...) */
4786 			&& subn->nexti->lasti->opcode == Op_push_i	/* 1st arg is constant */
4787 			&& (subn->nexti->lasti->memory->flags & STRING) != 0) {	/* it's a string constant */
4788 		/* ala xgettext, dcgettext("some string" ...) dumps the string */
4789 		NODE *str = subn->nexti->lasti->memory;
4790 
4791 		if ((str->flags & INTLSTR) != 0)
4792 			warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
4793 			/* don't dump it, the lexer already did */
4794 		else
4795 			dumpintlstr(str->stptr, str->stlen);
4796 	} else if (do_intl					/* --gen-po */
4797 			&& r->builtin == do_dcngettext		/* dcngettext(...) */
4798 			&& subn->nexti->lasti->opcode == Op_push_i	/* 1st arg is constant */
4799 			&& (subn->nexti->lasti->memory->flags & STRING) != 0	/* it's a string constant */
4800 			&& subn->nexti->lasti->nexti->lasti->opcode == Op_push_i	/* 2nd arg is constant too */
4801 			&& (subn->nexti->lasti->nexti->lasti->memory->flags & STRING) != 0) {	/* it's a string constant */
4802 		/* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
4803 		NODE *str1 = subn->nexti->lasti->memory;
4804 		NODE *str2 = subn->nexti->lasti->nexti->lasti->memory;
4805 
4806 		if (((str1->flags | str2->flags) & INTLSTR) != 0)
4807 			warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
4808 		else
4809 			dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
4810 	} else if (r->builtin == do_asort || r->builtin == do_asorti) {
4811 		arg = subn->nexti;	/* 1st arg list */
4812 		ip = arg->lasti;
4813 		if (ip->opcode == Op_push)
4814 			ip->opcode = Op_push_array;
4815 		if (nexp >= 2) {
4816 			arg = ip->nexti;
4817 			ip = arg->lasti;
4818 			if (ip->opcode == Op_push)
4819 				ip->opcode = Op_push_array;
4820 		}
4821 	}
4822 	else if (r->builtin == do_index) {
4823 		arg = subn->nexti->lasti->nexti;	/* 2nd arg list */
4824 		ip = arg->lasti;
4825 		if (ip->opcode == Op_match_rec || ip->opcode == Op_push_re)
4826 			fatal(_("index: regexp constant as second argument is not allowed"));
4827 	}
4828 #ifdef ARRAYDEBUG
4829 	else if (r->builtin == do_adump) {
4830 		ip = subn->nexti->lasti;
4831 		if (ip->opcode == Op_push)
4832 			ip->opcode = Op_push_array;
4833 	}
4834 #endif
4835 
4836 	if (subn != NULL) {
4837 		r->expr_count = count_expressions(&subn, false);
4838 		return list_append(subn, r);
4839 	}
4840 
4841 	r->expr_count = 0;
4842 	return list_create(r);
4843 }
4844 
4845 
4846 /* parms_shadow --- check if parameters shadow globals */
4847 
4848 static int
parms_shadow(INSTRUCTION * pc,bool * shadow)4849 parms_shadow(INSTRUCTION *pc, bool *shadow)
4850 {
4851 	int pcount, i;
4852 	bool ret = false;
4853 	NODE *func, *fp;
4854 	char *fname;
4855 
4856 	func = pc->func_body;
4857 	fname = func->vname;
4858 	fp = func->fparms;
4859 
4860 #if 0	/* can't happen, already exited if error ? */
4861 	if (fname == NULL || func == NULL)	/* error earlier */
4862 		return false;
4863 #endif
4864 
4865 	pcount = func->param_cnt;
4866 
4867 	if (pcount == 0)		/* no args, no problem */
4868 		return 0;
4869 
4870 	source = pc->source_file;
4871 	sourceline = pc->source_line;
4872 	/*
4873 	 * Use warning() and not lintwarn() so that can warn
4874 	 * about all shadowed parameters.
4875 	 */
4876 	for (i = 0; i < pcount; i++) {
4877 		if (lookup(fp[i].param) != NULL) {
4878 			warning(
4879 	_("function `%s': parameter `%s' shadows global variable"),
4880 					fname, fp[i].param);
4881 			ret = true;
4882 		}
4883 	}
4884 
4885 	*shadow |= ret;
4886 	return 0;
4887 }
4888 
4889 /* valinfo --- dump var info */
4890 
4891 void
valinfo(NODE * n,Func_print print_func,FILE * fp)4892 valinfo(NODE *n, Func_print print_func, FILE *fp)
4893 {
4894 	if (n == Nnull_string)
4895 		print_func(fp, "uninitialized scalar\n");
4896 	else if ((n->flags & REGEX) != 0)
4897 		print_func(fp, "@/%.*s/\n", n->stlen, n->stptr);
4898 	else if ((n->flags & STRING) != 0) {
4899 		pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', false);
4900 		print_func(fp, "\n");
4901 	} else if ((n->flags & NUMBER) != 0) {
4902 #ifdef HAVE_MPFR
4903 		if (is_mpg_float(n))
4904 			print_func(fp, "%s\n", mpg_fmt("%.17R*g", ROUND_MODE, n->mpg_numbr));
4905 		else if (is_mpg_integer(n))
4906 			print_func(fp, "%s\n", mpg_fmt("%Zd", n->mpg_i));
4907 		else
4908 #endif
4909 		print_func(fp, "%.17g\n", n->numbr);
4910 	} else
4911 		print_func(fp, "?? flags %s\n", flags2str(n->flags));
4912 }
4913 
4914 
4915 /* dump_vars --- dump the symbol table */
4916 
4917 void
dump_vars(const char * fname)4918 dump_vars(const char *fname)
4919 {
4920 	FILE *fp;
4921 	NODE **vars;
4922 
4923 	if (fname == NULL)
4924 		fp = stderr;
4925 	else if (strcmp(fname, "-") == 0)
4926 		fp = stdout;
4927 	else if ((fp = fopen(fname, "w")) == NULL) {
4928 		warning(_("could not open `%s' for writing: %s"), fname, strerror(errno));
4929 		warning(_("sending variable list to standard error"));
4930 		fp = stderr;
4931 	}
4932 
4933 	vars = variable_list();
4934 	print_vars(vars, fprintf, fp);
4935 	efree(vars);
4936 	if (fp != stdout && fp != stderr && fclose(fp) != 0)
4937 		warning(_("%s: close failed: %s"), fname, strerror(errno));
4938 }
4939 
4940 /* dump_funcs --- print all functions */
4941 
4942 void
dump_funcs()4943 dump_funcs()
4944 {
4945 	NODE **funcs;
4946 	funcs = function_list(true);
4947 	(void) foreach_func(funcs, (int (*)(INSTRUCTION *, void *)) pp_func, (void *) 0);
4948 	efree(funcs);
4949 }
4950 
4951 
4952 /* shadow_funcs --- check all functions for parameters that shadow globals */
4953 
4954 void
shadow_funcs()4955 shadow_funcs()
4956 {
4957 	static int calls = 0;
4958 	bool shadow = false;
4959 	NODE **funcs;
4960 
4961 	if (calls++ != 0)
4962 		fatal(_("shadow_funcs() called twice!"));
4963 
4964 	funcs = function_list(true);
4965 	(void) foreach_func(funcs, (int (*)(INSTRUCTION *, void *)) parms_shadow, & shadow);
4966 	efree(funcs);
4967 
4968 	/* End with fatal if the user requested it.  */
4969 	if (shadow && lintfunc == r_fatal)
4970 		lintwarn(_("there were shadowed variables"));
4971 }
4972 
4973 
4974 /* mk_function --- finalize function definition node; remove parameters
4975  *	out of the symbol table.
4976  */
4977 
4978 static INSTRUCTION *
mk_function(INSTRUCTION * fi,INSTRUCTION * def)4979 mk_function(INSTRUCTION *fi, INSTRUCTION *def)
4980 {
4981 	NODE *thisfunc;
4982 
4983 	thisfunc = fi->func_body;
4984 	assert(thisfunc != NULL);
4985 
4986 	/* add any pre-function comment to start of action for profile.c  */
4987 
4988 	if (interblock_comment != NULL) {
4989 		interblock_comment->source_line = 0;
4990 		merge_comments(interblock_comment, fi->comment);
4991 		fi->comment = interblock_comment;
4992 		interblock_comment = NULL;
4993 	}
4994 
4995 	/*
4996 	 * Add an implicit return at end;
4997 	 * also used by 'return' command in debugger
4998 	 */
4999 
5000 	(void) list_append(def, instruction(Op_push_i));
5001 	def->lasti->memory = dupnode(Nnull_string);
5002 	(void) list_append(def, instruction(Op_K_return));
5003 
5004 	if (trailing_comment != NULL) {
5005 		(void) list_append(def, trailing_comment);
5006 		trailing_comment = NULL;
5007 	}
5008 
5009 	if (do_pretty_print) {
5010 		fi[3].nexti = namespace_chain;
5011 		namespace_chain = NULL;
5012 		(void) list_prepend(def, instruction(Op_exec_count));
5013 	}
5014 
5015 	/* fi->opcode = Op_func */
5016 	(fi + 1)->firsti = def->nexti;
5017 	(fi + 1)->lasti = def->lasti;
5018 	(fi + 2)->first_line = fi->source_line;
5019 	(fi + 2)->last_line = lastline;
5020 	fi->nexti = def->nexti;
5021 	bcfree(def);
5022 
5023 	(void) list_append(rule_list, fi + 1);	/* debugging */
5024 
5025 	/* update lint table info */
5026 	func_use(thisfunc->vname, FUNC_DEFINE);
5027 
5028 	/* remove params from symbol table */
5029 	remove_params(thisfunc);
5030 	return fi;
5031 }
5032 
5033 /*
5034  * install_function:
5035  * install function name in the symbol table.
5036  * Extra work, build up and install a list of the parameter names.
5037  */
5038 
5039 static int
install_function(char * fname,INSTRUCTION * fi,INSTRUCTION * plist)5040 install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist)
5041 {
5042 	NODE *r, *f;
5043 	int pcount = 0;
5044 
5045 	r = lookup(fname);
5046 	if (r != NULL) {
5047 		error_ln(fi->source_line, _("function name `%s' previously defined"), fname);
5048 		return -1;
5049 	}
5050 
5051 	if (plist != NULL)
5052 		pcount = plist->lasti->param_count + 1;
5053 	f = install_symbol(fname, Node_func);
5054 	if (f->vname != fname) {
5055 		// DON'T free fname, it's done later
5056 		fname = f->vname;
5057 	}
5058 
5059 	fi->func_body = f;
5060 	f->param_cnt = pcount;
5061 	f->code_ptr = fi;
5062 	f->fparms = NULL;
5063 	if (pcount > 0) {
5064 		char **pnames;
5065 		pnames = check_params(fname, pcount, plist);	/* frees plist */
5066 		f->fparms = make_params(pnames, pcount);
5067 		efree(pnames);
5068 		install_params(f);
5069 	}
5070 	return 0;
5071 }
5072 
5073 
5074 /* check_params --- build a list of function parameter names after
5075  *	making sure that the names are valid and there are no duplicates.
5076  */
5077 
5078 static char **
check_params(char * fname,int pcount,INSTRUCTION * list)5079 check_params(char *fname, int pcount, INSTRUCTION *list)
5080 {
5081 	INSTRUCTION *p, *np;
5082 	int i, j;
5083 	char *name;
5084 	char **pnames;
5085 
5086 	assert(pcount > 0);
5087 
5088 	emalloc(pnames, char **, pcount * sizeof(char *), "check_params");
5089 
5090 	for (i = 0, p = list->nexti; p != NULL; i++, p = np) {
5091 		np = p->nexti;
5092 		name = p->lextok;
5093 		p->lextok = NULL;
5094 
5095 		if (strcmp(name, fname) == 0) {
5096 			/* check for function foo(foo) { ... }.  bleah. */
5097 			error_ln(p->source_line,
5098 				_("function `%s': cannot use function name as parameter name"), fname);
5099 		} else if (is_std_var(name)) {
5100 			error_ln(p->source_line,
5101 				_("function `%s': cannot use special variable `%s' as a function parameter"),
5102 					fname, name);
5103 		} else if (strchr(name, ':') != NULL)
5104 			error_ln(p->source_line,
5105 				_("function `%s': parameter `%s' cannot contain a namespace"),
5106 					fname, name);
5107 
5108 		/* check for duplicate parameters */
5109 		for (j = 0; j < i; j++) {
5110 			if (strcmp(name, pnames[j]) == 0) {
5111 				error_ln(p->source_line,
5112 					_("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
5113 					fname, i + 1, name, j + 1);
5114 			}
5115 		}
5116 
5117 		pnames[i] = name;
5118 		bcfree(p);
5119 	}
5120 	bcfree(list);
5121 
5122 	return pnames;
5123 }
5124 
5125 
5126 #ifdef HASHSIZE
5127 undef HASHSIZE
5128 #endif
5129 #define HASHSIZE 1021
5130 
5131 static struct fdesc {
5132 	char *name;
5133 	short used;
5134 	short defined;
5135 	short extension;
5136 	struct fdesc *next;
5137 } *ftable[HASHSIZE];
5138 
5139 /* func_use --- track uses and definitions of functions */
5140 
5141 static void
func_use(const char * name,enum defref how)5142 func_use(const char *name, enum defref how)
5143 {
5144 	struct fdesc *fp;
5145 	int len;
5146 	int ind;
5147 
5148 	len = strlen(name);
5149 	ind = hash(name, len, HASHSIZE, NULL);
5150 
5151 	for (fp = ftable[ind]; fp != NULL; fp = fp->next)
5152 		if (strcmp(fp->name, name) == 0)
5153 			goto update_value;
5154 
5155 	/* not in the table, fall through to allocate a new one */
5156 
5157 	ezalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
5158 	emalloc(fp->name, char *, len + 1, "func_use");
5159 	strcpy(fp->name, name);
5160 	fp->next = ftable[ind];
5161 	ftable[ind] = fp;
5162 
5163 update_value:
5164 	if (how == FUNC_DEFINE)
5165 		fp->defined++;
5166 	else if (how == FUNC_EXT) {
5167 		fp->defined++;
5168 		fp->extension++;
5169 	} else
5170 		fp->used++;
5171 }
5172 
5173 /* track_ext_func --- add an extension function to the table */
5174 
5175 void
track_ext_func(const char * name)5176 track_ext_func(const char *name)
5177 {
5178 	func_use(name, FUNC_EXT);
5179 }
5180 
5181 /* check_funcs --- verify functions that are called but not defined */
5182 
5183 static void
check_funcs()5184 check_funcs()
5185 {
5186 	struct fdesc *fp, *next;
5187 	int i;
5188 
5189 	if (! in_main_context())
5190 		goto free_mem;
5191 
5192 	for (i = 0; i < HASHSIZE; i++) {
5193 		for (fp = ftable[i]; fp != NULL; fp = fp->next) {
5194 			if (do_lint && ! fp->extension) {
5195 				/*
5196 				 * Making this not a lint check and
5197 				 * incrementing * errcount breaks old code.
5198 				 * Sigh.
5199 				 */
5200 				if (fp->defined == 0)
5201 					lintwarn(_("function `%s' called but never defined"),
5202 						fp->name);
5203 
5204 				if (fp->used == 0)
5205 					lintwarn(_("function `%s' defined but never called directly"),
5206 						fp->name);
5207 			}
5208 		}
5209 	}
5210 
5211 free_mem:
5212 	/* now let's free all the memory */
5213 	for (i = 0; i < HASHSIZE; i++) {
5214 		for (fp = ftable[i]; fp != NULL; fp = next) {
5215 			next = fp->next;
5216 			efree(fp->name);
5217 			efree(fp);
5218 		}
5219 		ftable[i] = NULL;
5220 	}
5221 }
5222 
5223 /* param_sanity --- look for parameters that are regexp constants */
5224 
5225 static void
param_sanity(INSTRUCTION * arglist)5226 param_sanity(INSTRUCTION *arglist)
5227 {
5228 	INSTRUCTION *argl, *arg;
5229 	int i = 1;
5230 
5231 	if (arglist == NULL)
5232 		return;
5233 	for (argl = arglist->nexti; argl; ) {
5234 		arg = argl->lasti;
5235 		if (arg->opcode == Op_match_rec)
5236 			warning_ln(arg->source_line,
5237 				_("regexp constant for parameter #%d yields boolean value"), i);
5238 		argl = arg->nexti;
5239 		i++;
5240 	}
5241 }
5242 
5243 /* variable --- make sure NAME is in the symbol table */
5244 
5245 NODE *
variable(int location,char * name,NODETYPE type)5246 variable(int location, char *name, NODETYPE type)
5247 {
5248 	NODE *r;
5249 
5250 	if ((r = lookup(name)) != NULL) {
5251 		if (r->type == Node_func || r->type == Node_ext_func )
5252 			error_ln(location, _("function `%s' called with space between name and `(',\nor used as a variable or an array"),
5253 				r->vname);
5254 	} else {
5255 		/* not found */
5256 		return install_symbol(name, type);
5257 	}
5258 	efree(name);
5259 	return r;
5260 }
5261 
5262 /* make_regnode --- make a regular expression node */
5263 
5264 NODE *
make_regnode(NODETYPE type,NODE * exp)5265 make_regnode(NODETYPE type, NODE *exp)
5266 {
5267 	NODE *n;
5268 
5269 	assert(type == Node_regex || type == Node_dynregex);
5270 	getnode(n);
5271 	memset(n, 0, sizeof(NODE));
5272 	n->type = type;
5273 	n->re_cnt = 1;
5274 
5275 	if (type == Node_regex) {
5276 		n->re_reg[0] = make_regexp(exp->stptr, exp->stlen, false, true, false);
5277 		if (n->re_reg[0] == NULL) {
5278 			freenode(n);
5279 			return NULL;
5280 		}
5281 		n->re_exp = exp;
5282 		n->re_flags = CONSTANT;
5283 	}
5284 	return n;
5285 }
5286 
5287 
5288 /* mk_rexp --- make a regular expression constant */
5289 
5290 static NODE *
mk_rexp(INSTRUCTION * list)5291 mk_rexp(INSTRUCTION *list)
5292 {
5293 	INSTRUCTION *ip;
5294 
5295 	ip = list->nexti;
5296 	if (ip == list->lasti && ip->opcode == Op_match_rec)
5297 		ip->opcode = Op_push_re;
5298 	else if (ip == list->lasti && ip->opcode == Op_push_re)
5299 		; /* do nothing --- @/.../ */
5300 	else {
5301 		ip = instruction(Op_push_re);
5302 		ip->memory = make_regnode(Node_dynregex, NULL);
5303 		ip->nexti = list->lasti->nexti;
5304 		list->lasti->nexti = ip;
5305 		list->lasti = ip;
5306 	}
5307 	return ip->memory;
5308 }
5309 
5310 #ifndef NO_LINT
5311 /* isnoeffect --- when used as a statement, has no side effects */
5312 
5313 static int
isnoeffect(OPCODE type)5314 isnoeffect(OPCODE type)
5315 {
5316 	switch (type) {
5317 	case Op_times:
5318 	case Op_times_i:
5319 	case Op_quotient:
5320 	case Op_quotient_i:
5321 	case Op_mod:
5322 	case Op_mod_i:
5323 	case Op_plus:
5324 	case Op_plus_i:
5325 	case Op_minus:
5326 	case Op_minus_i:
5327 	case Op_subscript:
5328 	case Op_concat:
5329 	case Op_exp:
5330 	case Op_exp_i:
5331 	case Op_unary_minus:
5332 	case Op_field_spec:
5333 	case Op_and_final:
5334 	case Op_or_final:
5335 	case Op_equal:
5336 	case Op_notequal:
5337 	case Op_less:
5338 	case Op_greater:
5339 	case Op_leq:
5340 	case Op_geq:
5341 	case Op_match:
5342 	case Op_nomatch:
5343 	case Op_match_rec:
5344 	case Op_not:
5345 	case Op_in_array:
5346 		return true;
5347 	// Additional opcodes that can be part of an expression
5348 	// that has no effect:
5349 	case Op_and:
5350 	case Op_or:
5351 	case Op_push:
5352 	case Op_push_i:
5353 	case Op_push_array:
5354 	case Op_pop:
5355 	case Op_lint_plus:
5356 		return true;
5357 	default:
5358 		break;	/* keeps gcc -Wall happy */
5359 	}
5360 
5361 	return false;
5362 }
5363 #endif /* NO_LINT */
5364 
5365 
5366 /* make_assignable --- make this operand an assignable one if posiible */
5367 
5368 static INSTRUCTION *
make_assignable(INSTRUCTION * ip)5369 make_assignable(INSTRUCTION *ip)
5370 {
5371 	switch (ip->opcode) {
5372 	case Op_push:
5373 		ip->opcode = Op_push_lhs;
5374 		return ip;
5375 	case Op_field_spec:
5376 		ip->opcode = Op_field_spec_lhs;
5377 		return ip;
5378 	case Op_subscript:
5379 		ip->opcode = Op_subscript_lhs;
5380 		return ip;
5381 	default:
5382 		break;	/* keeps gcc -Wall happy */
5383 	}
5384 	return NULL;
5385 }
5386 
5387 /* stopme --- for debugging */
5388 
5389 NODE *
stopme(int nargs ATTRIBUTE_UNUSED)5390 stopme(int nargs ATTRIBUTE_UNUSED)
5391 {
5392 	return make_number(0.0);
5393 }
5394 
5395 /* dumpintlstr --- write out an initial .po file entry for the string */
5396 
5397 static void
dumpintlstr(const char * str,size_t len)5398 dumpintlstr(const char *str, size_t len)
5399 {
5400 	char *cp;
5401 
5402 	/* See the GNU gettext distribution for details on the file format */
5403 
5404 	if (source != NULL) {
5405 		/* ala the gettext sources, remove leading `./'s */
5406 		for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
5407 			continue;
5408 		printf("#: %s:%d\n", cp, sourceline);
5409 	}
5410 
5411 	printf("msgid ");
5412 	pp_string_fp(fprintf, stdout, str, len, '"', true);
5413 	putchar('\n');
5414 	printf("msgstr \"\"\n\n");
5415 	fflush(stdout);
5416 }
5417 
5418 /* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
5419 
5420 static void
dumpintlstr2(const char * str1,size_t len1,const char * str2,size_t len2)5421 dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2)
5422 {
5423 	char *cp;
5424 
5425 	/* See the GNU gettext distribution for details on the file format */
5426 
5427 	if (source != NULL) {
5428 		/* ala the gettext sources, remove leading `./'s */
5429 		for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
5430 			continue;
5431 		printf("#: %s:%d\n", cp, sourceline);
5432 	}
5433 
5434 	printf("msgid ");
5435 	pp_string_fp(fprintf, stdout, str1, len1, '"', true);
5436 	putchar('\n');
5437 	printf("msgid_plural ");
5438 	pp_string_fp(fprintf, stdout, str2, len2, '"', true);
5439 	putchar('\n');
5440 	printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
5441 	fflush(stdout);
5442 }
5443 
5444 /* mk_binary --- instructions for binary operators */
5445 
5446 static INSTRUCTION *
mk_binary(INSTRUCTION * s1,INSTRUCTION * s2,INSTRUCTION * op)5447 mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op)
5448 {
5449 	INSTRUCTION *ip1,*ip2, *lint_plus;
5450 	AWKNUM res;
5451 
5452 	ip2 = s2->nexti;
5453 	if (s2->lasti == ip2 && ip2->opcode == Op_push_i) {
5454 	/* do any numeric constant folding */
5455 		ip1 = s1->nexti;
5456 		if (do_optimize
5457 				&& ip1 == s1->lasti && ip1->opcode == Op_push_i
5458 				&& (ip1->memory->flags & (MPFN|MPZN|STRCUR|STRING)) == 0
5459 				&& (ip2->memory->flags & (MPFN|MPZN|STRCUR|STRING)) == 0
5460 		) {
5461 			NODE *n1 = ip1->memory, *n2 = ip2->memory;
5462 			res = force_number(n1)->numbr;
5463 			(void) force_number(n2);
5464 			switch (op->opcode) {
5465 			case Op_times:
5466 				res *= n2->numbr;
5467 				break;
5468 			case Op_quotient:
5469 				if (n2->numbr == 0.0) {
5470 					/* don't fatalize, allow parsing rest of the input */
5471 					error_ln(op->source_line, _("division by zero attempted"));
5472 					goto regular;
5473 				}
5474 
5475 				res /= n2->numbr;
5476 				break;
5477 			case Op_mod:
5478 				if (n2->numbr == 0.0) {
5479 					/* don't fatalize, allow parsing rest of the input */
5480 					error_ln(op->source_line, _("division by zero attempted in `%%'"));
5481 					goto regular;
5482 				}
5483 #ifdef HAVE_FMOD
5484 				res = fmod(res, n2->numbr);
5485 #else	/* ! HAVE_FMOD */
5486 				(void) modf(res / n2->numbr, &res);
5487 				res = n1->numbr - res * n2->numbr;
5488 #endif	/* ! HAVE_FMOD */
5489 				break;
5490 			case Op_plus:
5491 				res += n2->numbr;
5492 				break;
5493 			case Op_minus:
5494 				res -= n2->numbr;
5495 				break;
5496 			case Op_exp:
5497 				res = calc_exp(res, n2->numbr);
5498 				break;
5499 			default:
5500 				goto regular;
5501 			}
5502 
5503 			op->opcode = Op_push_i;
5504 			// We don't need to call set_profile_text() here since
5505 			// optimizing is disabled when doing pretty printing.
5506 			op->memory = make_number(res);
5507 			unref(n1);
5508 			unref(n2);
5509 			bcfree(ip1);
5510 			bcfree(ip2);
5511 			bcfree(s1);
5512 			bcfree(s2);
5513 			return list_create(op);
5514 		} else {
5515 		/* do basic arithmetic optimisation */
5516 		/* convert (Op_push_i Node_val) + (Op_plus) to (Op_plus_i Node_val) */
5517 			switch (op->opcode) {
5518 			case Op_times:
5519 				op->opcode = Op_times_i;
5520 				break;
5521 			case Op_quotient:
5522 				op->opcode = Op_quotient_i;
5523 				break;
5524 			case Op_mod:
5525 				op->opcode = Op_mod_i;
5526 				break;
5527 			case Op_plus:
5528 				if (do_lint)
5529 					goto regular;
5530 				op->opcode = Op_plus_i;
5531 				break;
5532 			case Op_minus:
5533 				op->opcode = Op_minus_i;
5534 				break;
5535 			case Op_exp:
5536 				op->opcode = Op_exp_i;
5537 				break;
5538 			default:
5539 				goto regular;
5540 			}
5541 
5542 			op->memory = ip2->memory;
5543 			bcfree(ip2);
5544 			bcfree(s2);	/* Op_list */
5545 			return list_append(s1, op);
5546 		}
5547 	}
5548 
5549 regular:
5550 	/* append lists s1, s2 and add `op' bytecode */
5551 	(void) list_merge(s1, s2);
5552 	if (do_lint && op->opcode == Op_plus) {
5553 		lint_plus = instruction(Op_lint_plus);
5554 		(void) list_append(s1, lint_plus);
5555 	}
5556 	return list_append(s1, op);
5557 }
5558 
5559 /* mk_boolean --- instructions for boolean and, or */
5560 
5561 static INSTRUCTION *
mk_boolean(INSTRUCTION * left,INSTRUCTION * right,INSTRUCTION * op)5562 mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op)
5563 {
5564 	INSTRUCTION *tp;
5565 	OPCODE opc, final_opc;
5566 
5567 	opc = op->opcode;		/* Op_and or Op_or */
5568 	final_opc = (opc == Op_or) ? Op_or_final : Op_and_final;
5569 
5570 	add_lint(right, LINT_assign_in_cond);
5571 
5572 	tp = left->lasti;
5573 
5574 	if (tp->opcode != final_opc) {	/* x || y */
5575 		list_append(right, instruction(final_opc));
5576 		add_lint(left, LINT_assign_in_cond);
5577 		(void) list_append(left, op);
5578 		left->lasti->target_jmp = right->lasti;
5579 
5580 		/* NB: target_stmt points to previous Op_and(Op_or) in a chain;
5581 		 *     target_stmt only used in the parser (see below).
5582 		 */
5583 
5584 		left->lasti->target_stmt = left->lasti;
5585 		right->lasti->target_stmt = left->lasti;
5586 	} else {		/* optimization for x || y || z || ... */
5587 		INSTRUCTION *ip;
5588 
5589 		op->opcode = final_opc;
5590 		(void) list_append(right, op);
5591 		op->target_stmt = tp;
5592 		tp->opcode = opc;
5593 		tp->target_jmp = op;
5594 
5595 		/* update jump targets */
5596 		for (ip = tp->target_stmt; ; ip = ip->target_stmt) {
5597 			assert(ip->opcode == opc);
5598 			assert(ip->target_jmp == tp);
5599 			/* if (ip->opcode == opc &&  ip->target_jmp == tp) */
5600 			ip->target_jmp = op;
5601 			if (ip->target_stmt == ip)
5602 				break;
5603 		}
5604 	}
5605 
5606 	return list_merge(left, right);
5607 }
5608 
5609 /* mk_condition --- if-else and conditional */
5610 
5611 static INSTRUCTION *
mk_condition(INSTRUCTION * cond,INSTRUCTION * ifp,INSTRUCTION * true_branch,INSTRUCTION * elsep,INSTRUCTION * false_branch)5612 mk_condition(INSTRUCTION *cond, INSTRUCTION *ifp, INSTRUCTION *true_branch,
5613 		INSTRUCTION *elsep, INSTRUCTION *false_branch)
5614 {
5615 	/*
5616 	 *    ----------------
5617 	 *       cond
5618 	 *    ----------------
5619 	 * t: [Op_jmp_false f ]
5620 	 *    ----------------
5621 	 *       true_branch
5622 	 *
5623 	 *    ----------------
5624 	 *    [Op_jmp y]
5625 	 *    ----------------
5626 	 * f:
5627 	 *      false_branch
5628 	 *    ----------------
5629 	 * y: [Op_no_op]
5630 	 *    ----------------
5631 	 */
5632 
5633 	INSTRUCTION *ip;
5634 	bool setup_else_part = true;
5635 
5636 	if (false_branch == NULL) {
5637 		false_branch = list_create(instruction(Op_no_op));
5638 		if (elsep == NULL) {		/* else { } */
5639 			setup_else_part = false;
5640 		}
5641 	} else {
5642 		/* assert(elsep != NULL); */
5643 
5644 		/* avoid a series of no_op's: if .. else if .. else if .. */
5645 		if (false_branch->lasti->opcode != Op_no_op)
5646 			(void) list_append(false_branch, instruction(Op_no_op));
5647 	}
5648 
5649 	if (setup_else_part) {
5650 		if (do_pretty_print) {
5651 			(void) list_prepend(false_branch, elsep);
5652 			false_branch->nexti->branch_end = false_branch->lasti;
5653 			(void) list_prepend(false_branch, instruction(Op_exec_count));
5654 		} else
5655 			bcfree(elsep);
5656 	}
5657 
5658 	(void) list_prepend(false_branch, instruction(Op_jmp));
5659 	false_branch->nexti->target_jmp = false_branch->lasti;
5660 
5661 	add_lint(cond, LINT_assign_in_cond);
5662 	ip = list_append(cond, instruction(Op_jmp_false));
5663 	ip->lasti->target_jmp = false_branch->nexti->nexti;
5664 
5665 	if (do_pretty_print) {
5666 		(void) list_prepend(ip, ifp);
5667 		(void) list_append(ip, instruction(Op_exec_count));
5668 		ip->nexti->branch_if = ip->lasti;
5669 		ip->nexti->branch_else = false_branch->nexti;
5670 	} else
5671 		bcfree(ifp);
5672 
5673 	if (true_branch != NULL)
5674 		list_merge(ip, true_branch);
5675 	return list_merge(ip, false_branch);
5676 }
5677 
5678 enum defline { FIRST_LINE, LAST_LINE };
5679 
5680 /* find_line -- find the first(last) line in a list of (pattern) instructions */
5681 
5682 static int
find_line(INSTRUCTION * pattern,enum defline what)5683 find_line(INSTRUCTION *pattern, enum defline what)
5684 {
5685 	INSTRUCTION *ip;
5686 	int lineno = 0;
5687 
5688 	for (ip = pattern->nexti; ip; ip = ip->nexti) {
5689 		if (what == LAST_LINE) {
5690 			if (ip->source_line > lineno)
5691 				lineno = ip->source_line;
5692 		} else {	/* FIRST_LINE */
5693 			if (ip->source_line > 0
5694 					&& (lineno == 0 || ip->source_line < lineno))
5695 				lineno = ip->source_line;
5696 		}
5697 		if (ip == pattern->lasti)
5698 			break;
5699 	}
5700 	assert(lineno > 0);
5701 	return lineno;
5702 }
5703 
5704 /* append_rule --- pattern-action instructions */
5705 
5706 static INSTRUCTION *
append_rule(INSTRUCTION * pattern,INSTRUCTION * action)5707 append_rule(INSTRUCTION *pattern, INSTRUCTION *action)
5708 {
5709 	/*
5710 	 *    ----------------
5711 	 *       pattern
5712 	 *    ----------------
5713 	 *    [Op_jmp_false f ]
5714 	 *    ----------------
5715 	 *       action
5716 	 *    ----------------
5717 	 * f: [Op_no_op       ]
5718 	 *    ----------------
5719 	 */
5720 
5721 	INSTRUCTION *rp;
5722 	INSTRUCTION *tp;
5723 	INSTRUCTION *ip;
5724 
5725 	if (rule != Rule) {
5726 		rp = pattern;
5727 		if (do_pretty_print) {
5728 			rp[3].nexti = namespace_chain;
5729 			namespace_chain = NULL;
5730 			(void) list_append(action, instruction(Op_no_op));
5731 		}
5732 		(rp + 1)->firsti = action->nexti;
5733 		(rp + 1)->lasti = action->lasti;
5734 		(rp + 2)->first_line = pattern->source_line;
5735 		(rp + 2)->last_line = lastline;
5736 		ip = list_prepend(action, rp);
5737 		if (interblock_comment != NULL) {
5738 			ip = list_prepend(ip, interblock_comment);
5739 			interblock_comment = NULL;
5740 		}
5741 	} else {
5742 		rp = bcalloc(Op_rule, 4, 0);
5743 		rp->in_rule = Rule;
5744 		rp->source_file = source;
5745 		tp = instruction(Op_no_op);
5746 
5747 		if (do_pretty_print) {
5748 			rp[3].nexti = namespace_chain;
5749 			namespace_chain = NULL;
5750 		}
5751 
5752 		if (pattern == NULL) {
5753 			/* assert(action != NULL); */
5754 			if (do_pretty_print)
5755 				(void) list_prepend(action, instruction(Op_exec_count));
5756 			(rp + 1)->firsti = action->nexti;
5757 			(rp + 1)->lasti = tp;
5758 			(rp + 2)->first_line = firstline;
5759 			(rp + 2)->last_line = lastline;
5760 			rp->source_line = firstline;
5761 			ip = list_prepend(list_append(action, tp), rp);
5762 		} else {
5763 			(void) list_append(pattern, instruction(Op_jmp_false));
5764 			pattern->lasti->target_jmp = tp;
5765 			(rp + 2)->first_line = find_line(pattern, FIRST_LINE);
5766 			rp->source_line = (rp + 2)->first_line;
5767 			if (action == NULL) {
5768 				(rp + 2)->last_line = find_line(pattern, LAST_LINE);
5769 				action = list_create(instruction(Op_K_print_rec));
5770 				if (do_pretty_print)
5771 					action = list_prepend(action, instruction(Op_exec_count));
5772 			} else
5773 				(rp + 2)->last_line = lastline;
5774 
5775 			if (interblock_comment != NULL) {	// was after previous action
5776 				pattern = list_prepend(pattern, interblock_comment);
5777 				interblock_comment = NULL;
5778 			}
5779 
5780 			if (do_pretty_print) {
5781 				pattern = list_prepend(pattern, instruction(Op_exec_count));
5782 				action = list_prepend(action, instruction(Op_exec_count));
5783 			}
5784 
5785  			(rp + 1)->firsti = action->nexti;
5786 			(rp + 1)->lasti = tp;
5787 			ip = list_append(
5788 					list_merge(list_prepend(pattern, rp),
5789 						action),
5790 					tp);
5791 		}
5792 	}
5793 
5794 	list_append(rule_list, rp + 1);
5795 
5796 	if (rule_block[rule] == NULL)
5797 		rule_block[rule] = ip;
5798 	else
5799 		(void) list_merge(rule_block[rule], ip);
5800 
5801 	return rule_block[rule];
5802 }
5803 
5804 /* mk_assignment --- assignment bytecodes */
5805 
5806 static INSTRUCTION *
mk_assignment(INSTRUCTION * lhs,INSTRUCTION * rhs,INSTRUCTION * op)5807 mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op)
5808 {
5809 	INSTRUCTION *tp;
5810 	INSTRUCTION *ip;
5811 
5812 	tp = lhs->lasti;
5813 	switch (tp->opcode) {
5814 	case Op_field_spec:
5815 		tp->opcode = Op_field_spec_lhs;
5816 		break;
5817 	case Op_subscript:
5818 		tp->opcode = Op_subscript_lhs;
5819 		break;
5820 	case Op_push:
5821 	case Op_push_array:
5822 		tp->opcode = Op_push_lhs;
5823 		break;
5824 	case Op_field_assign:
5825 		yyerror(_("cannot assign a value to the result of a field post-increment expression"));
5826 		break;
5827 	default:
5828 		yyerror(_("invalid target of assignment (opcode %s)"),
5829 				opcode2str(tp->opcode));
5830 		break;
5831 	}
5832 
5833 	tp->do_reference = (op->opcode != Op_assign);	/* check for uninitialized reference */
5834 
5835 	if (rhs != NULL)
5836 		ip = list_merge(rhs, lhs);
5837 	else
5838 		ip = lhs;
5839 
5840 	(void) list_append(ip, op);
5841 
5842 	if (tp->opcode == Op_push_lhs
5843 			&& tp->memory->type == Node_var
5844 			&& tp->memory->var_assign
5845 	) {
5846 		tp->do_reference = false; /* no uninitialized reference checking
5847 		                           * for a special variable.
5848 		                           */
5849 		(void) list_append(ip, instruction(Op_var_assign));
5850 		ip->lasti->assign_var = tp->memory->var_assign;
5851 	} else if (tp->opcode == Op_field_spec_lhs) {
5852 		(void) list_append(ip, instruction(Op_field_assign));
5853 		ip->lasti->field_assign = (Func_ptr) 0;
5854 		tp->target_assign = ip->lasti;
5855 	} else if (tp->opcode == Op_subscript_lhs) {
5856 		(void) list_append(ip, instruction(Op_subscript_assign));
5857 	}
5858 
5859 	return ip;
5860 }
5861 
5862 /* optimize_assignment --- peephole optimization for assignment */
5863 
5864 static INSTRUCTION *
optimize_assignment(INSTRUCTION * exp)5865 optimize_assignment(INSTRUCTION *exp)
5866 {
5867 	INSTRUCTION *i1, *i2, *i3;
5868 
5869 	/*
5870 	 * Optimize assignment statements array[subs] = x; var = x; $n = x;
5871 	 * string concatenation of the form s = s t.
5872 	 *
5873 	 * 1) Array element assignment array[subs] = x:
5874 	 *   Replaces Op_push_array + Op_subscript_lhs + Op_assign + Op_pop
5875 	 *   with single instruction Op_store_sub.
5876 	 *	 Limitation: 1 dimension and sub is simple var/value.
5877 	 *
5878 	 * 2) Simple variable assignment var = x:
5879 	 *   Replaces Op_push_lhs + Op_assign + Op_pop with Op_store_var.
5880 	 *
5881 	 * 3) Field assignment $n = x:
5882 	 *   Replaces Op_field_spec_lhs + Op_assign + Op_field_assign + Op_pop
5883 	 *   with Op_store_field.
5884 	 *
5885 	 * 4) Optimization for string concatenation:
5886 	 *   For cases like x = x y, uses realloc to include y in x;
5887 	 *   also eliminates instructions Op_push_lhs and Op_pop.
5888 	 */
5889 
5890 	/*
5891 	 * N.B.: do not append Op_pop instruction to the returned
5892 	 * instruction list if optimized. None of these
5893 	 * optimized instructions pushes the r-value of assignment
5894 	 * onto the runtime stack.
5895 	 */
5896 
5897 	i2 = NULL;
5898 	i1 = exp->lasti;
5899 
5900 	if (   i1->opcode != Op_assign
5901 	    && i1->opcode != Op_field_assign)
5902 		return list_append(exp, instruction(Op_pop));
5903 
5904 	for (i2 = exp->nexti; i2 != i1; i2 = i2->nexti) {
5905 		switch (i2->opcode) {
5906 		case Op_concat:
5907 			if (i2->nexti->opcode == Op_push_lhs    /* l.h.s is a simple variable */
5908 				&& (i2->concat_flag & CSVAR) != 0   /* 1st exp in r.h.s is a simple variable;
5909 				                                     * see Op_concat in the grammer above.
5910 				                                     */
5911 				&& i2->nexti->memory == exp->nexti->memory	 /* and the same as in l.h.s */
5912 				&& i2->nexti->nexti == i1
5913 				&& i1->opcode == Op_assign
5914 			) {
5915 				/* s = s ... optimization */
5916 
5917 				/* avoid stuff like x = x (x = y) or x = x gsub(/./, "b", x);
5918 				 * check for l-value reference to this variable in the r.h.s.
5919 				 * Also, avoid function calls in general to guard against
5920 				 * global variable assignment.
5921 				 */
5922 
5923 				for (i3 = exp->nexti->nexti; i3 != i2; i3 = i3->nexti) {
5924 					if ((i3->opcode == Op_push_lhs && i3->memory == i2->nexti->memory)
5925 							|| i3->opcode == Op_func_call)
5926 						return list_append(exp, instruction(Op_pop)); /* no optimization */
5927 				}
5928 
5929 				/* remove the variable from r.h.s */
5930 				i3 = exp->nexti;
5931 				exp->nexti = i3->nexti;
5932 				bcfree(i3);
5933 
5934 				if (--i2->expr_count == 1)	/* one less expression in Op_concat */
5935 					i2->opcode = Op_no_op;
5936 
5937 				i3 = i2->nexti;
5938 				assert(i3->opcode == Op_push_lhs);
5939 				i3->opcode = Op_assign_concat;	/* change Op_push_lhs to Op_assign_concat */
5940 				i3->nexti = NULL;
5941 				bcfree(i1);          /* Op_assign */
5942 				exp->lasti = i3;     /* update Op_list */
5943 				return exp;
5944 			}
5945 			break;
5946 
5947 		case Op_field_spec_lhs:
5948 			if (i2->nexti->opcode == Op_assign
5949 					&& i2->nexti->nexti == i1
5950 					&& i1->opcode == Op_field_assign
5951 			) {
5952 				/* $n = .. */
5953 				i2->opcode = Op_store_field;
5954 				bcfree(i2->nexti);  /* Op_assign */
5955 				i2->nexti = NULL;
5956 				bcfree(i1);          /* Op_field_assign */
5957 				exp->lasti = i2;    /* update Op_list */
5958 				return exp;
5959 			}
5960 			break;
5961 
5962 		case Op_push_array:
5963 			if (i2->nexti->nexti->opcode == Op_subscript_lhs) {
5964 				i3 = i2->nexti->nexti;
5965 				if (i3->sub_count == 1
5966 						&& i3->nexti == i1
5967 						&& i1->opcode == Op_assign
5968 				) {
5969 					/* array[sub] = .. */
5970 					i3->opcode = Op_store_sub;
5971 					i3->memory = i2->memory;
5972 					i3->expr_count = 1;  /* sub_count shadows memory,
5973                                           * so use expr_count instead.
5974 				                          */
5975 					i3->nexti = NULL;
5976 					i2->opcode = Op_no_op;
5977 					bcfree(i1);          /* Op_assign */
5978 					exp->lasti = i3;     /* update Op_list */
5979 					return exp;
5980 				}
5981 			}
5982 			break;
5983 
5984 		case Op_push_lhs:
5985 			if (i2->nexti == i1
5986 					&& i1->opcode == Op_assign
5987 			) {
5988 				/* var = .. */
5989 				i2->opcode = Op_store_var;
5990 				i2->nexti = NULL;
5991 				bcfree(i1);          /* Op_assign */
5992 				exp->lasti = i2;     /* update Op_list */
5993 
5994 				i3 = exp->nexti;
5995 				if (i3->opcode == Op_push_i
5996 					&& (i3->memory->flags & INTLSTR) == 0
5997 					&& i3->nexti == i2
5998 				) {
5999 					/* constant initializer */
6000 					i2->initval = i3->memory;
6001 					bcfree(i3);
6002 					exp->nexti = i2;
6003 				} else
6004 					i2->initval = NULL;
6005 
6006 				return exp;
6007 			}
6008 			break;
6009 
6010 		default:
6011 			break;
6012 		}
6013 	}
6014 
6015 	/* no optimization  */
6016 	return list_append(exp, instruction(Op_pop));
6017 }
6018 
6019 
6020 /* mk_getline --- make instructions for getline */
6021 
6022 static INSTRUCTION *
mk_getline(INSTRUCTION * op,INSTRUCTION * var,INSTRUCTION * redir,int redirtype)6023 mk_getline(INSTRUCTION *op, INSTRUCTION *var, INSTRUCTION *redir, int redirtype)
6024 {
6025 	INSTRUCTION *ip;
6026 	INSTRUCTION *tp;
6027 	INSTRUCTION *asgn = NULL;
6028 
6029 	/*
6030 	 *  getline [var] < [file]
6031 	 *
6032 	 *  [ file (simp_exp)]
6033 	 *  [ [ var ] ]
6034 	 *  [ Op_K_getline_redir|NULL|redir_type|into_var]
6035 	 *  [ [var_assign] ]
6036 	 *
6037 	 */
6038 
6039 	if (redir == NULL) {
6040 		int sline = op->source_line;
6041 		bcfree(op);
6042 		op = bcalloc(Op_K_getline, 2, sline);
6043 		(op + 1)->target_endfile = ip_endfile;
6044 		(op + 1)->target_beginfile = ip_beginfile;
6045 	}
6046 
6047 	if (var != NULL) {
6048 		tp = make_assignable(var->lasti);
6049 		assert(tp != NULL);
6050 
6051 		/* check if we need after_assign bytecode */
6052 		if (tp->opcode == Op_push_lhs
6053 				&& tp->memory->type == Node_var
6054 				&& tp->memory->var_assign
6055 		) {
6056 			asgn = instruction(Op_var_assign);
6057 			asgn->assign_ctxt = op->opcode;
6058 			asgn->assign_var = tp->memory->var_assign;
6059 		} else if (tp->opcode == Op_field_spec_lhs) {
6060 			asgn = instruction(Op_field_assign);
6061 			asgn->assign_ctxt = op->opcode;
6062 			asgn->field_assign = (Func_ptr) 0;   /* determined at run time */
6063 			tp->target_assign = asgn;
6064 		} else if (tp->opcode == Op_subscript_lhs) {
6065 			asgn = instruction(Op_subscript_assign);
6066 			asgn->assign_ctxt = op->opcode;
6067 		}
6068 
6069 		if (redir != NULL) {
6070 			ip = list_merge(redir, var);
6071 			(void) list_append(ip, op);
6072 		} else
6073 			ip = list_append(var, op);
6074 	} else if (redir != NULL)
6075 		ip = list_append(redir, op);
6076 	else
6077 		ip = list_create(op);
6078 	op->into_var = (var != NULL);
6079 	op->redir_type = (redir != NULL) ? redirtype : redirect_none;
6080 
6081 	return (asgn == NULL ? ip : list_append(ip, asgn));
6082 }
6083 
6084 
6085 /* mk_for_loop --- for loop bytecodes */
6086 
6087 static INSTRUCTION *
mk_for_loop(INSTRUCTION * forp,INSTRUCTION * init,INSTRUCTION * cond,INSTRUCTION * incr,INSTRUCTION * body)6088 mk_for_loop(INSTRUCTION *forp, INSTRUCTION *init, INSTRUCTION *cond,
6089 				INSTRUCTION *incr, INSTRUCTION *body)
6090 {
6091 	/*
6092 	 *   ------------------------
6093 	 *        init                 (may be NULL)
6094 	 *   ------------------------
6095 	 * x:
6096 	 *        cond                 (Op_no_op if NULL)
6097 	 *   ------------------------
6098 	 *    [ Op_jmp_false tb      ]
6099 	 *   ------------------------
6100 	 *        body                 (may be NULL)
6101 	 *   ------------------------
6102 	 * tc:
6103 	 *    incr                      (may be NULL)
6104 	 *    [ Op_jmp x             ]
6105 	 *   ------------------------
6106 	 * tb:[ Op_no_op             ]
6107 	 */
6108 
6109 	INSTRUCTION *ip, *tbreak, *tcont;
6110 	INSTRUCTION *jmp;
6111 	INSTRUCTION *pp_cond;
6112 	INSTRUCTION *ret;
6113 
6114 	tbreak = instruction(Op_no_op);
6115 
6116 	if (cond != NULL) {
6117 		add_lint(cond, LINT_assign_in_cond);
6118 		pp_cond = cond->nexti;
6119 		ip = cond;
6120 		(void) list_append(ip, instruction(Op_jmp_false));
6121 		ip->lasti->target_jmp = tbreak;
6122 	} else {
6123 		pp_cond = instruction(Op_no_op);
6124 		ip = list_create(pp_cond);
6125 	}
6126 
6127 	if (init != NULL)
6128 		ip = list_merge(init, ip);
6129 
6130 	if (do_pretty_print) {
6131 		(void) list_append(ip, instruction(Op_exec_count));
6132 		(forp + 1)->forloop_cond = pp_cond;
6133 		(forp + 1)->forloop_body = ip->lasti;
6134 	}
6135 
6136 	if (body != NULL)
6137 		(void) list_merge(ip, body);
6138 
6139 	jmp = instruction(Op_jmp);
6140 	jmp->target_jmp = pp_cond;
6141 	if (incr == NULL)
6142 		tcont = jmp;
6143 	else {
6144 		tcont = incr->nexti;
6145 		(void) list_merge(ip, incr);
6146 	}
6147 
6148 	(void) list_append(ip, jmp);
6149 	ret = list_append(ip, tbreak);
6150 	fix_break_continue(ret, tbreak, tcont);
6151 
6152 	if (do_pretty_print) {
6153 		forp->target_break = tbreak;
6154 		forp->target_continue = tcont;
6155 		ret = list_prepend(ret, forp);
6156 	}
6157 	/* else
6158 		forp is NULL */
6159 
6160 	return ret;
6161 }
6162 
6163 /* add_lint --- add lint warning bytecode if needed */
6164 
6165 static void
add_lint(INSTRUCTION * list,LINTTYPE linttype)6166 add_lint(INSTRUCTION *list, LINTTYPE linttype)
6167 {
6168 #ifndef NO_LINT
6169 	INSTRUCTION *ip;
6170 	bool no_effect = true;
6171 
6172 	switch (linttype) {
6173 	case LINT_assign_in_cond:
6174 		ip = list->lasti;
6175 		if (ip->opcode == Op_var_assign || ip->opcode == Op_field_assign) {
6176 			assert(ip != list->nexti);
6177 			for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti)
6178 				;
6179 		}
6180 
6181 		if (ip->opcode == Op_assign || ip->opcode == Op_assign_concat) {
6182 			list_append(list, instruction(Op_lint));
6183 			list->lasti->lint_type = linttype;
6184 		}
6185 		break;
6186 
6187 	case LINT_no_effect:
6188 		if (list->lasti->opcode == Op_pop && list->nexti != list->lasti) {
6189 			int line = 0;
6190 
6191 			// Get down to the last instruction ...
6192 			for (ip = list->nexti; ip->nexti != list->lasti; ip = ip->nexti) {
6193 				// ... along the way track line numbers, we will use the line
6194 				// closest to the opcode if that opcode doesn't have one
6195 				if (ip->source_line != 0)
6196 					line = ip->source_line;
6197 
6198 				// And check each opcode for no effect
6199 				no_effect = no_effect && isnoeffect(ip->opcode);
6200 			}
6201 
6202 			// check the last one also
6203 			no_effect = no_effect && isnoeffect(ip->opcode);
6204 
6205 			// Only if all the traversed opcodes have no effect do we
6206 			// produce a warning. This avoids warnings for things like
6207 			// a == b && b = c.
6208 			if (do_lint) {		/* parse-time warning */
6209 				if (no_effect) {
6210 					if (ip->source_line != 0)
6211 						line = ip->source_line;
6212 					lintwarn_ln(line, _("statement has no effect"));
6213 				}
6214 			}
6215 
6216 			// We no longer place a run-time warning also. One warning
6217 			// at parse time is enough.
6218 		}
6219 		break;
6220 
6221 	default:
6222 		break;
6223 	}
6224 #endif
6225 }
6226 
6227 /* mk_expression_list --- list of bytecode lists */
6228 
6229 static INSTRUCTION *
mk_expression_list(INSTRUCTION * list,INSTRUCTION * s1)6230 mk_expression_list(INSTRUCTION *list, INSTRUCTION *s1)
6231 {
6232 	INSTRUCTION *r;
6233 
6234 	/* we can't just combine all bytecodes, since we need to
6235 	 * process individual expressions for a few builtins in snode() (-:
6236 	 */
6237 
6238 	/* -- list of lists     */
6239 	/* [Op_list| ... ]------
6240 	 *                       |
6241 	 * [Op_list| ... ]   --  |
6242 	 *  ...               |  |
6243 	 *  ...       <-------   |
6244 	 * [Op_list| ... ]   --  |
6245 	 *  ...               |  |
6246 	 *  ...               |  |
6247 	 *  ...       <------- --
6248 	 */
6249 
6250 	assert(s1 != NULL && s1->opcode == Op_list);
6251 	if (list == NULL) {
6252 		list = instruction(Op_list);
6253 		list->nexti = s1;
6254 		list->lasti = s1->lasti;
6255 		return list;
6256 	}
6257 
6258 	/* append expression to the end of the list */
6259 
6260 	r = list->lasti;
6261 	r->nexti = s1;
6262 	list->lasti = s1->lasti;
6263 	return list;
6264 }
6265 
6266 /* count_expressions --- fixup expression_list from mk_expression_list.
6267  *                       returns no of expressions in list. isarg is true
6268  *                       for function arguments.
6269  */
6270 
6271 static int
count_expressions(INSTRUCTION ** list,bool isarg)6272 count_expressions(INSTRUCTION **list, bool isarg)
6273 {
6274 	INSTRUCTION *expr;
6275 	INSTRUCTION *r = NULL;
6276 	int count = 0;
6277 
6278 	if (*list == NULL)	/* error earlier */
6279 		return 0;
6280 
6281 	for (expr = (*list)->nexti; expr; ) {
6282 		INSTRUCTION *t1, *t2;
6283 		t1 = expr->nexti;
6284 		t2 = expr->lasti;
6285 		if (isarg && t1 == t2 && t1->opcode == Op_push)
6286 			t1->opcode = Op_push_param;
6287 		if (++count == 1)
6288 			r = expr;
6289 		else
6290 			(void) list_merge(r, expr);
6291 		expr = t2->nexti;
6292 	}
6293 
6294 	assert(count > 0);
6295 	if (! isarg && count > max_args)
6296 		max_args = count;
6297 	bcfree(*list);
6298 	*list = r;
6299 	return count;
6300 }
6301 
6302 /* fix_break_continue --- fix up break & continue codes in loop bodies */
6303 
6304 static void
fix_break_continue(INSTRUCTION * list,INSTRUCTION * b_target,INSTRUCTION * c_target)6305 fix_break_continue(INSTRUCTION *list, INSTRUCTION *b_target, INSTRUCTION *c_target)
6306 {
6307 	INSTRUCTION *ip;
6308 
6309 	list->lasti->nexti = NULL;	/* just to make sure */
6310 
6311 	for (ip = list->nexti; ip != NULL; ip = ip->nexti) {
6312 		switch (ip->opcode) {
6313 		case Op_K_break:
6314 			if (ip->target_jmp == NULL)
6315 				ip->target_jmp = b_target;
6316 			break;
6317 
6318 		case Op_K_continue:
6319 			if (ip->target_jmp == NULL)
6320 				ip->target_jmp = c_target;
6321 			break;
6322 
6323 		default:
6324 			/* this is to keep the compiler happy. sheesh. */
6325 			break;
6326 		}
6327 	}
6328 }
6329 
6330 static inline INSTRUCTION *
list_create(INSTRUCTION * x)6331 list_create(INSTRUCTION *x)
6332 {
6333 	INSTRUCTION *l;
6334 
6335 	l = instruction(Op_list);
6336 	l->nexti = x;
6337 	l->lasti = x;
6338 	return l;
6339 }
6340 
6341 static inline INSTRUCTION *
list_append(INSTRUCTION * l,INSTRUCTION * x)6342 list_append(INSTRUCTION *l, INSTRUCTION *x)
6343 {
6344 #ifdef GAWKDEBUG
6345 	if (l->opcode != Op_list)
6346 		cant_happen();
6347 #endif
6348 	l->lasti->nexti = x;
6349 	l->lasti = x;
6350 	return l;
6351 }
6352 
6353 static inline INSTRUCTION *
list_prepend(INSTRUCTION * l,INSTRUCTION * x)6354 list_prepend(INSTRUCTION *l, INSTRUCTION *x)
6355 {
6356 #ifdef GAWKDEBUG
6357 	if (l->opcode != Op_list)
6358 		cant_happen();
6359 #endif
6360 	x->nexti = l->nexti;
6361 	l->nexti = x;
6362 	return l;
6363 }
6364 
6365 static inline INSTRUCTION *
list_merge(INSTRUCTION * l1,INSTRUCTION * l2)6366 list_merge(INSTRUCTION *l1, INSTRUCTION *l2)
6367 {
6368 #ifdef GAWKDEBUG
6369 	if (l1->opcode != Op_list)
6370 		cant_happen();
6371 	if (l2->opcode != Op_list)
6372 		cant_happen();
6373 #endif
6374 	l1->lasti->nexti = l2->nexti;
6375 	l1->lasti = l2->lasti;
6376 	bcfree(l2);
6377 	return l1;
6378 }
6379 
6380 /* See if name is a special token. */
6381 
6382 int
check_special(const char * name)6383 check_special(const char *name)
6384 {
6385 	int low, high, mid;
6386 	int i;
6387 	int non_standard_flags = 0;
6388 #ifdef USE_EBCDIC
6389 	static bool did_sort = false;
6390 
6391 	if (! did_sort) {
6392 		qsort((void *) tokentab,
6393 				sizeof(tokentab) / sizeof(tokentab[0]),
6394 				sizeof(tokentab[0]), tokcompare);
6395 		did_sort = true;
6396 	}
6397 #endif
6398 
6399 	if (do_traditional)
6400 		non_standard_flags |= GAWKX;
6401 	if (do_posix)
6402 		non_standard_flags |= NOT_POSIX;
6403 
6404 	low = 0;
6405 	high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
6406 	while (low <= high) {
6407 		mid = (low + high) / 2;
6408 		i = *name - tokentab[mid].operator[0];
6409 		if (i == 0)
6410 			i = strcmp(name, tokentab[mid].operator);
6411 
6412 		if (i < 0)		/* token < mid */
6413 			high = mid - 1;
6414 		else if (i > 0)		/* token > mid */
6415 			low = mid + 1;
6416 		else {
6417 			if ((tokentab[mid].flags & non_standard_flags) != 0)
6418 				return -1;
6419 			return mid;
6420 		}
6421 	}
6422 	return -1;
6423 }
6424 
6425 /*
6426  * This provides a private version of functions that act like VMS's
6427  * variable-length record filesystem, where there was a bug on
6428  * certain source files.
6429  */
6430 
6431 static FILE *fp = NULL;
6432 
6433 /* read_one_line --- return one input line at a time. mainly for debugging. */
6434 
6435 static ssize_t
read_one_line(int fd,void * buffer,size_t count)6436 read_one_line(int fd, void *buffer, size_t count)
6437 {
6438 	char buf[BUFSIZ];
6439 
6440 	/* Minor potential memory leak here. Too bad. */
6441 	if (fp == NULL) {
6442 		fp = fdopen(fd, "r");
6443 		if (fp == NULL) {
6444 			fprintf(stderr, "ugh. fdopen: %s\n", strerror(errno));
6445 			gawk_exit(EXIT_FAILURE);
6446 		}
6447 	}
6448 
6449 	if (fgets(buf, sizeof buf, fp) == NULL)
6450 		return 0;
6451 
6452 	memcpy(buffer, buf, strlen(buf));
6453 	return strlen(buf);
6454 }
6455 
6456 /* one_line_close --- close the open file being read with read_one_line() */
6457 
6458 static int
one_line_close(int fd)6459 one_line_close(int fd)
6460 {
6461 	int ret;
6462 
6463 	if (fp == NULL || fd != fileno(fp))
6464 		fatal("debugging read/close screwed up!");
6465 
6466 	ret = fclose(fp);
6467 	fp = NULL;
6468 	return ret;
6469 }
6470 
6471 
6472 /* lookup_builtin --- find a builtin function or return NULL */
6473 
6474 builtin_func_t
lookup_builtin(const char * name)6475 lookup_builtin(const char *name)
6476 {
6477 	if (strncmp(name, "awk::", 5) == 0)
6478 		name += 5;
6479 
6480 	int mid = check_special(name);
6481 
6482 	if (mid == -1)
6483 		return NULL;
6484 
6485 	switch (tokentab[mid].class) {
6486 	case LEX_BUILTIN:
6487 	case LEX_LENGTH:
6488 		break;
6489 	default:
6490 		return NULL;
6491 	}
6492 
6493 	/* And another special case... */
6494 	if (tokentab[mid].value == Op_sub_builtin)
6495 		return (builtin_func_t) do_sub;
6496 
6497 #ifdef HAVE_MPFR
6498 	if (do_mpfr && tokentab[mid].ptr2 != NULL)
6499 		return tokentab[mid].ptr2;
6500 #endif
6501 
6502 	return tokentab[mid].ptr;
6503 }
6504 
6505 /* install_builtins --- add built-in functions to FUNCTAB */
6506 
6507 void
install_builtins(void)6508 install_builtins(void)
6509 {
6510 	int i, j;
6511 	int flags_that_must_be_clear = DEBUG_USE;
6512 
6513 	if (do_traditional)
6514 		flags_that_must_be_clear |= GAWKX;
6515 
6516 	if (do_posix)
6517 		flags_that_must_be_clear |= NOT_POSIX;
6518 
6519 
6520 	j = sizeof(tokentab) / sizeof(tokentab[0]);
6521 	for (i = 0; i < j; i++) {
6522 		if (   (tokentab[i].class == LEX_BUILTIN
6523 		        || tokentab[i].class == LEX_LENGTH)
6524 		    && (tokentab[i].flags & flags_that_must_be_clear) == 0) {
6525 			(void) install_symbol(tokentab[i].operator, Node_builtin_func);
6526 		}
6527 	}
6528 }
6529 
6530 /*
6531  * 9/2014: Gawk cannot use <ctype.h> isalpha or isalnum when
6532  * parsing the program since that can let through non-English
6533  * letters.  So, we supply our own. !@#$%^&*()-ing locales!
6534  */
6535 
6536 /* is_alpha --- return true if c is an English letter */
6537 
6538 /*
6539  * The scene of the murder was grisly to look upon.  When the inspector
6540  * arrived, the sergeant turned to him and said, "Another programmer stabbed
6541  * in the back. He never knew what happened."
6542  *
6543  * The inspector replied, "Looks like the MO of isalpha, and his even meaner
6544  * big brother, isalnum. The Locale brothers."  The sergeant merely
6545  * shuddered in horror.
6546  */
6547 
6548 bool
is_alpha(int c)6549 is_alpha(int c)
6550 {
6551 	switch (c) {
6552 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6553 	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
6554 	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
6555 	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
6556 	case 'y': case 'z':
6557 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6558 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
6559 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
6560 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
6561 	case 'Y': case 'Z':
6562 		return true;
6563 	}
6564 	return false;
6565 }
6566 
6567 /* is_alnum --- return true for alphanumeric, English only letters */
6568 
6569 bool
is_alnum(int c)6570 is_alnum(int c)
6571 {
6572 	/* digit test is good for EBCDIC too. so there. */
6573 	return (is_alpha(c) || ('0' <= c && c <= '9'));
6574 }
6575 
6576 
6577 /*
6578  * is_letter --- function to check letters
6579  *	isalpha() isn't good enough since it can look at the locale.
6580  * Underscore counts as a letter in awk identifiers
6581  */
6582 
6583 bool
is_letter(int c)6584 is_letter(int c)
6585 {
6586 	return (is_alpha(c) || c == '_');
6587 }
6588 
6589 /* is_identchar --- return true if c can be in an identifier */
6590 
6591 bool
is_identchar(int c)6592 is_identchar(int c)
6593 {
6594 	return (is_alnum(c) || c == '_');
6595 }
6596 
6597 /* set_profile_text --- make a number that can be printed when profiling */
6598 
6599 static NODE *
set_profile_text(NODE * n,const char * str,size_t len)6600 set_profile_text(NODE *n, const char *str, size_t len)
6601 {
6602 	if (do_pretty_print) {
6603 		// two extra bytes: one for NUL termination, and another in
6604 		// case we need to add a leading minus sign in add_sign_to_num
6605 		emalloc(n->stptr, char *, len + 2, "set_profile_text");
6606 		memcpy(n->stptr, str, len);
6607 		n->stptr[len] = '\0';
6608 		n->stlen = len;
6609 		// Set STRCUR and n->stfmt for use when profiling
6610 		// (i.e., actually running the program) so that
6611 		// force_string() on this item will work ok.
6612 		// Thanks and a tip of the hatlo to valgrind.
6613 		n->flags |= (NUMCONSTSTR|STRCUR);
6614 		n->stfmt = STFMT_UNUSED;
6615 #ifdef HAVE_MPFR
6616 		n->strndmode = MPFR_round_mode;
6617 #endif
6618 	}
6619 
6620 	return n;
6621 }
6622 
6623 /*
6624  * merge_comments --- merge c2 into c1 and free c2 if successful.
6625  *	Allow c2 to be NULL, in which case just merged chained
6626  *	comments in c1.
6627  */
6628 
6629 static void
merge_comments(INSTRUCTION * c1,INSTRUCTION * c2)6630 merge_comments(INSTRUCTION *c1, INSTRUCTION *c2)
6631 {
6632 	assert(c1->opcode == Op_comment);
6633 
6634 	if (c1->comment == NULL && c2 == NULL)	// nothing to do
6635 		return;
6636 
6637 	size_t total = c1->memory->stlen;
6638 	if (c1->comment != NULL)
6639 		total += 1 /* \n */ + c1->comment->memory->stlen;
6640 
6641 	if (c2 != NULL) {
6642 		assert(c2->opcode == Op_comment);
6643 		total += 1 /* \n */ + c2->memory->stlen;
6644 		if (c2->comment != NULL)
6645 			total += c2->comment->memory->stlen + 1;
6646 	}
6647 
6648 	char *buffer;
6649 	emalloc(buffer, char *, total + 1, "merge_comments");
6650 
6651 	strcpy(buffer, c1->memory->stptr);
6652 	if (c1->comment != NULL) {
6653 		strcat(buffer, "\n");
6654 		strcat(buffer, c1->comment->memory->stptr);
6655 	}
6656 
6657 	if (c2 != NULL) {
6658 		strcat(buffer, "\n");
6659 		strcat(buffer, c2->memory->stptr);
6660 		if (c2->comment != NULL) {
6661 			strcat(buffer, "\n");
6662 			strcat(buffer, c2->comment->memory->stptr);
6663 		}
6664 
6665 		unref(c2->memory);
6666 		if (c2->comment != NULL) {
6667 			unref(c2->comment->memory);
6668 			bcfree(c2->comment);
6669 			c2->comment = NULL;
6670 		}
6671 		bcfree(c2);
6672 	}
6673 
6674 	c1->memory->comment_type = BLOCK_COMMENT;
6675 	free(c1->memory->stptr);
6676 	c1->memory->stptr = buffer;
6677 	c1->memory->stlen = strlen(buffer);
6678 
6679 	// now free everything else
6680 	if (c1->comment != NULL) {
6681 		unref(c1->comment->memory);
6682 		bcfree(c1->comment);
6683 		c1->comment = NULL;
6684 	}
6685 }
6686 
6687 /* make_braced_statements --- handle `l_brace statements r_brace' with comments */
6688 
6689 static INSTRUCTION *
make_braced_statements(INSTRUCTION * lbrace,INSTRUCTION * stmts,INSTRUCTION * rbrace)6690 make_braced_statements(INSTRUCTION *lbrace, INSTRUCTION *stmts, INSTRUCTION *rbrace)
6691 {
6692 	INSTRUCTION *ip;
6693 
6694 	if (stmts == NULL)
6695 		ip = list_create(instruction(Op_no_op));
6696 	else
6697 		ip = stmts;
6698 
6699 	if (lbrace != NULL) {
6700 		INSTRUCTION *comment2 = lbrace->comment;
6701 		if (comment2 != NULL) {
6702 			ip = list_prepend(ip, comment2);
6703 			lbrace->comment = NULL;
6704 		}
6705 		ip = list_prepend(ip, lbrace);
6706 	}
6707 
6708 	return ip;
6709 }
6710 
6711 /* validate_qualified_name --- make sure that a qualified name is built correctly */
6712 
6713 /*
6714  * This routine returns upon first error, no need to produce multiple, possibly
6715  * conflicting / confusing error messages.
6716  */
6717 
6718 bool
validate_qualified_name(char * token)6719 validate_qualified_name(char *token)
6720 {
6721 	char *cp, *cp2;
6722 
6723 	// no colon, by definition it's well formed
6724 	if ((cp = strchr(token, ':')) == NULL)
6725 		return true;
6726 
6727 	if (do_traditional || do_posix) {
6728 		error_ln(sourceline, _("identifier %s: qualified names not allowed in traditional / POSIX mode"), token);
6729 		return false;
6730 	}
6731 
6732 	if (cp[1] != ':') {	// could happen from command line
6733 		error_ln(sourceline, _("identifier %s: namespace separator is two colons, not one"), token);
6734 		return false;
6735 	}
6736 
6737 	if (! is_letter(cp[2])) {
6738 		error_ln(sourceline,
6739 				_("qualified identifier `%s' is badly formed"),
6740 				token);
6741 		return false;
6742 	}
6743 
6744 	if ((cp2 = strchr(cp+2, ':')) != NULL) {
6745 		error_ln(sourceline,
6746 			_("identifier `%s': namespace separator can only appear once in a qualified name"),
6747 			token);
6748 		return false;
6749 	}
6750 
6751 	return true;
6752 }
6753 
6754 /* check_qualified_special --- decide if a name is special or not */
6755 
6756 static int
check_qualified_special(char * token)6757 check_qualified_special(char *token)
6758 {
6759 	char *cp;
6760 
6761 	if ((cp = strchr(token, ':')) == NULL && current_namespace == awk_namespace)
6762 		return check_special(token);
6763 
6764 	/*
6765 	 * Now it's more complicated.  Here are the rules.
6766 	 *
6767 	 * 1. Namespace name cannot be a standard awk reserved word or function.
6768 	 * 2. Subordinate part of the name cannot be a standard awk reserved word or function.
6769 	 * 3. If the namespace part is explicitly "awk", return the result of check_special().
6770 	 * 4. Else return -1 (gawk extensions allowed, we check standard awk in step 2).
6771 	 */
6772 
6773 	const struct token *tok;
6774 	int i;
6775 	if (cp == NULL) {	// namespace not awk, but a simple identifier
6776 		i = check_special(token);
6777 		if (i < 0)
6778 			return i;
6779 
6780 		tok = & tokentab[i];
6781 		if ((tok->flags & GAWKX) != 0 && tok->class == LEX_BUILTIN)
6782 			return -1;
6783 		else
6784 			return i;
6785 	}
6786 
6787 	char *ns, *end, *subname;
6788 	ns = token;
6789 	*(end = cp) = '\0';	// temporarily turn it into standalone string
6790 	subname = end + 2;
6791 
6792 	// First check the namespace part
6793 	i = check_special(ns);
6794 	if (i >= 0 && (tokentab[i].flags & GAWKX) == 0) {
6795 		error_ln(sourceline, _("using reserved identifier `%s' as a namespace is not allowed"), ns);
6796 		goto done;
6797 	}
6798 
6799 	// Now check the subordinate part
6800 	i = check_special(subname);
6801 	if (i >= 0 && (tokentab[i].flags & GAWKX) == 0 && strcmp(ns, awk_namespace) != 0) {
6802 		error_ln(sourceline, _("using reserved identifier `%s' as second component of a qualified name is not allowed"), subname);
6803 		goto done;
6804 	}
6805 
6806 	if (strcmp(ns, awk_namespace) == 0) {
6807 		i = check_special(subname);
6808 		if (i >= 0) {
6809 			if ((tokentab[i].flags & GAWKX) != 0 && tokentab[i].class == LEX_BUILTIN)
6810 				;	// gawk additional builtin function, is ok
6811 			else
6812 				error_ln(sourceline, _("using reserved identifier `%s' as second component of a qualified name is not allowed"), subname);
6813 		}
6814 	} else
6815 		i = -1;
6816 done:
6817 	*end = ':';
6818 	return i;
6819 }
6820 
6821 /* set_namespace --- change the current namespace */
6822 
6823 static void
set_namespace(INSTRUCTION * ns,INSTRUCTION * comment)6824 set_namespace(INSTRUCTION *ns, INSTRUCTION *comment)
6825 {
6826 	if (ns == NULL)
6827 		return;
6828 
6829 	if (do_traditional || do_posix) {
6830 		error_ln(ns->source_line, _("@namespace is a gawk extension"));
6831 		efree(ns->lextok);
6832 		bcfree(ns);
6833 		return;
6834 	}
6835 
6836 	if (! is_valid_identifier(ns->lextok)) {
6837 		error_ln(ns->source_line, _("namespace name `%s' must meet identifier naming rules"), ns->lextok);
6838 		efree(ns->lextok);
6839 		bcfree(ns);
6840 		return;
6841 	}
6842 
6843 	int mid = check_special(ns->lextok);
6844 
6845 	if (mid >= 0) {
6846 		error_ln(ns->source_line, _("using reserved identifier `%s' as a namespace is not allowed"), ns->lextok);
6847 		efree(ns->lextok);
6848 		bcfree(ns);
6849 		return;
6850 	}
6851 
6852 	if (strcmp(ns->lextok, current_namespace) == 0)
6853 		;	// nothing to do
6854 	else if (strcmp(ns->lextok, awk_namespace) == 0) {
6855 		set_current_namespace(awk_namespace);
6856 	} else {
6857 		set_current_namespace(estrdup(ns->lextok, strlen(ns->lextok)));
6858 	}
6859 	efree(ns->lextok);
6860 
6861 	// save info and push on front of list of namespaces seen
6862 	INSTRUCTION *new_ns = instruction(Op_K_namespace);
6863 	new_ns->comment = comment;
6864 	new_ns->ns_name = estrdup(current_namespace, strlen(current_namespace));
6865 	new_ns->nexti = namespace_chain;
6866 	namespace_chain = new_ns;
6867 
6868 	ns->lextok = NULL;
6869 	bcfree(ns);
6870 
6871 	namespace_changed = true;
6872 
6873 	return;
6874 }
6875 
6876 /* qualify_name --- put name into namespace */
6877 
6878 static char *
qualify_name(const char * name,size_t len)6879 qualify_name(const char *name, size_t len)
6880 {
6881 	if (strchr(name, ':') != NULL)	// already qualified
6882 		return estrdup(name, len);
6883 
6884 	NODE *p = lookup(name);
6885 	if (p != NULL && p->type == Node_param_list)
6886 		return estrdup(name, len);
6887 
6888 	if (current_namespace != awk_namespace && ! is_all_upper(name)) {
6889 		size_t length = strlen(current_namespace) + 2 + len + 1;
6890 		char *buf;
6891 
6892 		emalloc(buf, char *, length, "qualify_name");
6893 		sprintf(buf, "%s::%s", current_namespace, name);
6894 
6895 		return buf;
6896 	}
6897 
6898 	return estrdup(name, len);
6899 }
6900