1
2 /********************************************
3 parse.y
4 copyright 1991, Michael D. Brennan
5
6 This is a source file for mawk, an implementation of
7 the AWK programming language.
8
9 Mawk is distributed without warranty under the terms of
10 the GNU General Public License, version 2, 1991.
11 ********************************************/
12
13 /* $Log: parse.y,v $
14 * Revision 5.4 1992/08/08 17:17:20 brennan
15 * patch 2: improved timing of error recovery in
16 * bungled function definitions. Fixes a core dump
17 *
18 * Revision 5.3 1992/07/08 15:43:41 brennan
19 * patch2: length returns. I am a wimp
20 *
21 * Revision 5.2 1992/01/08 16:11:42 brennan
22 * code FE_PUSHA carefully for MSDOS large mode
23 *
24 * Revision 5.1 91/12/05 07:50:22 brennan
25 * 1.1 pre-release
26 *
27 */
28
29
30 %{
31 #include <stdio.h>
32 #include "mawk.h"
33 #include "code.h"
34 #include "symtype.h"
35 #include "memory.h"
36 #include "bi_funct.h"
37 #include "bi_vars.h"
38 #include "jmp.h"
39 #include "field.h"
40 #include "files.h"
41
42 #ifdef YYXBYACC
43 #define YYBYACC 1
44 #endif
45
46 #define YYMAXDEPTH 200
47
48 /* Bison's use of MSDOS and ours clashes */
49 #undef MSDOS
50
51 extern void PROTO( eat_nl, (void) ) ;
52 static void PROTO( resize_fblock, (FBLOCK *, INST *) ) ;
53 static void PROTO( code_array, (SYMTAB *) ) ;
54 static void PROTO( code_call_id, (CA_REC *, SYMTAB *) ) ;
55 static void PROTO( field_A2I, (void)) ;
56 static int PROTO( current_offset, (void) ) ;
57 static void PROTO( check_var, (SYMTAB *) ) ;
58 static void PROTO( check_array, (SYMTAB *) ) ;
59 static void PROTO( RE_as_arg, (void)) ;
60
61 static int scope ;
62 static FBLOCK *active_funct ;
63 /* when scope is SCOPE_FUNCT */
64
65 #define code_address(x) if( is_local(x) )\
66 { code1(L_PUSHA) ; code1((x)->offset) ; }\
67 else code2(_PUSHA, (x)->stval.cp)
68
69 /* this nonsense caters to MSDOS large model */
70 #define CODE_FE_PUSHA() code_ptr->ptr = (PTR) 0 ; code1(FE_PUSHA)
71
72 %}
73
74 %union{
75 CELL *cp ;
76 SYMTAB *stp ;
77 INST *start ; /* code starting address */
78 PF_CP fp ; /* ptr to a (print/printf) or (sub/gsub) function */
79 BI_REC *bip ; /* ptr to info about a builtin */
80 FBLOCK *fbp ; /* ptr to a function block */
81 ARG2_REC *arg2p ;
82 CA_REC *ca_p ;
83 int ival ;
84 PTR ptr ;
85 }
86
87 /* two tokens to help with errors */
88 %token UNEXPECTED /* unexpected character */
89 %token BAD_DECIMAL
90
91 %token NL
92 %token SEMI_COLON
93 %token LBRACE RBRACE
94 %token LBOX RBOX
95 %token COMMA
96 %token <ival> IO_OUT /* > or output pipe */
97
98 %right ASSIGN ADD_ASG SUB_ASG MUL_ASG DIV_ASG MOD_ASG POW_ASG
99 %right QMARK COLON
100 %left OR
101 %left AND
102 %left IN
103 %left <ival> MATCH /* ~ or !~ */
104 %left EQ NEQ LT LTE GT GTE
105 %left CAT
106 %left GETLINE
107 %left PLUS MINUS
108 %left MUL DIV MOD
109 %left NOT UMINUS
110 %nonassoc IO_IN PIPE
111 %right POW
112 %left <ival> INC_or_DEC
113 %left DOLLAR FIELD /* last to remove a SR conflict
114 with getline */
115 %right LPAREN RPAREN /* removes some SR conflicts */
116
117 %token <ptr> DOUBLE STRING_ RE
118 %token <stp> ID D_ID
119 %token <fbp> FUNCT_ID
120 %token <bip> BUILTIN LENGTH
121 %token <cp> FIELD
122
123 %token PRINT PRINTF SPLIT MATCH_FUNC SUB GSUB
124 /* keywords */
125 %token DO WHILE FOR BREAK CONTINUE IF ELSE IN
126 %token DELETE BEGIN END EXIT NEXT RETURN FUNCTION
127
128 %type <start> block block_or_separator
129 %type <start> statement_list statement mark
130 %type <ival> pr_args
131 %type <arg2p> arg2
132 %type <start> builtin
133 %type <start> getline_file
134 %type <start> lvalue field fvalue
135 %type <start> expr cat_expr p_expr
136 %type <start> while_front if_front
137 %type <start> for1 for2
138 %type <start> array_loop_front
139 %type <start> return_statement
140 %type <start> split_front re_arg sub_back
141 %type <ival> arglist args
142 %type <fp> print sub_or_gsub
143 %type <fbp> funct_start funct_head
144 %type <ca_p> call_args ca_front ca_back
145 %type <ival> f_arglist f_args
146
147 %%
148 /* productions */
149
150 program : program_block
151 | program program_block
152 ;
153
154 program_block : PA_block /* pattern-action */
155 | function_def
156 | outside_error block
157 ;
158
159 PA_block : block
160 { /* this do nothing action removes a vacuous warning
161 from Bison */
162 }
163
164 | BEGIN
165 {
166 be_expand(&begin_code) ;
167 scope = SCOPE_BEGIN ;
168 }
169
170 block
171 { be_shrink(&begin_code) ;
172 scope = SCOPE_MAIN ;
173 }
174
175 | END
176 {
177 be_expand(&end_code) ;
178 scope = SCOPE_END ;
179 }
180
181 block
182 { be_shrink(&end_code) ;
183 scope = SCOPE_MAIN ;
184 }
185
186 | expr /* this works just like an if statement */
187 { code_jmp(_JZ, (INST*)0) ; }
188
189 block_or_separator
190 { patch_jmp( code_ptr ) ; }
191
192 /* range pattern, see comment in execute.c near _RANGE */
193 | expr COMMA
194 { code_push($1, code_ptr - $1) ;
195 code_ptr = $1 ;
196 code1(_RANGE) ; code1(1) ;
197 code_ptr += 3 ;
198 code_ptr += code_pop(code_ptr) ;
199 code1(_STOP) ;
200 $1[2].op = code_ptr - ($1+1) ;
201 }
202 expr
203 { code1(_STOP) ; }
204
205 block_or_separator
206 { $1[3].op = $6 - ($1+1) ;
207 $1[4].op = code_ptr - ($1+1) ;
208 }
209 ;
210
211
212
213 block : LBRACE statement_list RBRACE
214 { $$ = $2 ; }
215 | LBRACE error RBRACE
216 { $$ = code_ptr ; /* does nothing won't be executed */
217 print_flag = getline_flag = paren_cnt = 0 ;
218 yyerrok ; }
219 ;
220
221 block_or_separator : block
222 | separator /* default print action */
223 { $$ = code_ptr ;
224 code1(_PUSHINT) ; code1(0) ;
225 code2(_PRINT, bi_print) ;
226 }
227
228 statement_list : statement
229 | statement_list statement
230 ;
231
232
233 statement : block
234 | expr separator
235 { code1(_POP) ; }
236 | /* empty */ separator
237 { $$ = code_ptr ; }
238 | error separator
239 { $$ = code_ptr ;
240 print_flag = getline_flag = 0 ;
241 paren_cnt = 0 ;
242 yyerrok ;
243 }
244 | BREAK separator
245 { $$ = code_ptr ; BC_insert('B', code_ptr+1) ;
246 code2(_JMP, 0) /* don't use code_jmp ! */ ; }
247 | CONTINUE separator
248 { $$ = code_ptr ; BC_insert('C', code_ptr+1) ;
249 code2(_JMP, 0) ; }
250 | return_statement
251 { if ( scope != SCOPE_FUNCT )
252 compile_error("return outside function body") ;
253 }
254 | NEXT separator
255 { if ( scope != SCOPE_MAIN )
256 compile_error( "improper use of next" ) ;
257 $$ = code_ptr ;
258 code1(_NEXT) ;
259 }
260 ;
261
262 separator : NL | SEMI_COLON
263 ;
264
265 expr : cat_expr
266 | lvalue ASSIGN expr { code1(_ASSIGN) ; }
267 | lvalue ADD_ASG expr { code1(_ADD_ASG) ; }
268 | lvalue SUB_ASG expr { code1(_SUB_ASG) ; }
269 | lvalue MUL_ASG expr { code1(_MUL_ASG) ; }
270 | lvalue DIV_ASG expr { code1(_DIV_ASG) ; }
271 | lvalue MOD_ASG expr { code1(_MOD_ASG) ; }
272 | lvalue POW_ASG expr { code1(_POW_ASG) ; }
273 | expr EQ expr { code1(_EQ) ; }
274 | expr NEQ expr { code1(_NEQ) ; }
275 | expr LT expr { code1(_LT) ; }
276 | expr LTE expr { code1(_LTE) ; }
277 | expr GT expr { code1(_GT) ; }
278 | expr GTE expr { code1(_GTE) ; }
279
280 | expr MATCH expr
281 {
282 if ( $3 == code_ptr - 2 )
283 {
284 if ( $3->op == _MATCH0 ) $3->op = _MATCH1 ;
285
286 else /* check for string */
287 if ( $3->op == _PUSHS )
288 { CELL *cp = ZMALLOC(CELL) ;
289
290 cp->type = C_STRING ;
291 cp->ptr = $3[1].ptr ;
292 cast_to_RE(cp) ;
293 code_ptr -= 2 ;
294 code2(_MATCH1, cp->ptr) ;
295 ZFREE(cp) ;
296 }
297 else code1(_MATCH2) ;
298 }
299 else code1(_MATCH2) ;
300
301 if ( !$2 ) code1(_NOT) ;
302 }
303
304 /* short circuit boolean evaluation */
305 | expr OR
306 { code1(_DUP) ;
307 code_jmp(_JNZ, (INST*)0) ;
308 code1(_POP) ;
309 }
310 expr
311 { patch_jmp(code_ptr) ; code1(_TEST) ; }
312
313 | expr AND
314 { code1(_DUP) ; code_jmp(_JZ, (INST*)0) ;
315 code1(_POP) ; }
316 expr
317 { patch_jmp(code_ptr) ; code1(_TEST) ; }
318
319 | expr QMARK { code_jmp(_JZ, (INST*)0) ; }
320 expr COLON { code_jmp(_JMP, (INST*)0) ; }
321 expr
322 { patch_jmp(code_ptr) ; patch_jmp($7) ; }
323 ;
324
325 cat_expr : p_expr %prec CAT
326 | cat_expr p_expr %prec CAT
327 { code1(_CAT) ; }
328 ;
329
330 p_expr : DOUBLE
331 { $$ = code_ptr ; code2(_PUSHD, $1) ; }
332 | STRING_
333 { $$ = code_ptr ; code2(_PUSHS, $1) ; }
334 | ID %prec AND /* anything less than IN */
335 { check_var($1) ;
336 $$ = code_ptr ;
337 if ( is_local($1) )
338 { code1(L_PUSHI) ; code1($1->offset) ; }
339 else code2(_PUSHI, $1->stval.cp) ;
340 }
341
342 | LPAREN expr RPAREN
343 { $$ = $2 ; }
344 ;
345
346 p_expr : RE
347 { $$ = code_ptr ; code2(_MATCH0, $1) ; }
348 ;
349
350 p_expr : p_expr PLUS p_expr { code1(_ADD) ; }
351 | p_expr MINUS p_expr { code1(_SUB) ; }
352 | p_expr MUL p_expr { code1(_MUL) ; }
353 | p_expr DIV p_expr { code1(_DIV) ; }
354 | p_expr MOD p_expr { code1(_MOD) ; }
355 | p_expr POW p_expr { code1(_POW) ; }
356 | NOT p_expr
357 { $$ = $2 ; code1(_NOT) ; }
358 | PLUS p_expr %prec UMINUS
359 { $$ = $2 ; code1(_UPLUS) ; }
360 | MINUS p_expr %prec UMINUS
361 { $$ = $2 ; code1(_UMINUS) ; }
362 | builtin
363 ;
364
365 p_expr : ID INC_or_DEC
366 { check_var($1) ;
367 $$ = code_ptr ;
368 code_address($1) ;
369
370 if ( $2 == '+' ) code1(_POST_INC) ;
371 else code1(_POST_DEC) ;
372 }
373 | INC_or_DEC lvalue
374 { $$ = $2 ;
375 if ( $1 == '+' ) code1(_PRE_INC) ;
376 else code1(_PRE_DEC) ;
377 }
378 ;
379
380 p_expr : field INC_or_DEC
381 { if ($2 == '+' ) code1(F_POST_INC ) ;
382 else code1(F_POST_DEC) ;
383 }
384 | INC_or_DEC field
385 { $$ = $2 ;
386 if ( $1 == '+' ) code1(F_PRE_INC) ;
387 else code1( F_PRE_DEC) ;
388 }
389 ;
390
391 lvalue : ID
392 { $$ = code_ptr ;
393 check_var($1) ;
394 code_address($1) ;
395 }
396 ;
397
398
399 arglist : /* empty */
400 { $$ = 0 ; }
401 | args
402 ;
403
404 args : expr %prec LPAREN
405 { $$ = 1 ; }
406 | args COMMA expr
407 { $$ = $1 + 1 ; }
408 ;
409
410 builtin :
411 BUILTIN mark LPAREN arglist RPAREN
412 { BI_REC *p = $1 ;
413 $$ = $2 ;
414 if ( p-> min_args > $4 || p->max_args < $4 )
415 compile_error(
416 "wrong number of arguments in call to %s" ,
417 p->name ) ;
418 if ( p->min_args != p->max_args ) /* variable args */
419 { code1(_PUSHINT) ; code1($4) ; }
420 code2(_BUILTIN , p->fp) ;
421 }
422 | LENGTH /* this is an irritation */
423 {
424 code1(_PUSHINT) ; code1(0) ;
425 code2(_BUILTIN, $1->fp) ;
426 }
427 ;
428
429 /* an empty production to store the code_ptr */
430 mark : /* empty */
431 { $$ = code_ptr ; }
432
433 /* print_statement */
434 statement : print mark pr_args pr_direction separator
435 { code2(_PRINT, $1) ; $$ = $2 ;
436 if ( $1 == bi_printf && $3 == 0 )
437 compile_error("no arguments in call to printf") ;
438 print_flag = 0 ;
439 $$ = $2 ;
440 }
441 ;
442
443 print : PRINT { $$ = bi_print ; print_flag = 1 ;}
444 | PRINTF { $$ = bi_printf ; print_flag = 1 ; }
445 ;
446
447 pr_args : arglist { code1(_PUSHINT) ; code1($1) ; }
448 | LPAREN arg2 RPAREN
449 { $$ = $2->cnt ; zfree($2,sizeof(ARG2_REC)) ;
450 code1(_PUSHINT) ; code1($$) ;
451 }
452 | LPAREN RPAREN
453 { $$=0 ; code1(_PUSHINT) ; code1(0) ; }
454 ;
455
456 arg2 : expr COMMA expr
457 { $$ = (ARG2_REC*) zmalloc(sizeof(ARG2_REC)) ;
458 $$->start = $1 ;
459 $$->cnt = 2 ;
460 }
461 | arg2 COMMA expr
462 { $$ = $1 ; $$->cnt++ ; }
463 ;
464
465 pr_direction : /* empty */
466 | IO_OUT expr
467 { code1(_PUSHINT) ; code1($1) ; }
468 ;
469
470
471 /* IF and IF-ELSE */
472
473 if_front : IF LPAREN expr RPAREN
474 { $$ = $3 ; eat_nl() ; code_jmp(_JZ, (INST*)0) ; }
475 ;
476
477 /* if_statement */
478 statement : if_front statement
479 { patch_jmp( code_ptr ) ; }
480 ;
481
482 else : ELSE { eat_nl() ; code_jmp(_JMP, (INST*)0) ; }
483 ;
484
485 /* if_else_statement */
486 statement : if_front statement else statement
487 { patch_jmp(code_ptr) ; patch_jmp($4) ; }
488
489
490 /* LOOPS */
491
492 do : DO
493 { eat_nl() ; BC_new() ; }
494 ;
495
496 /* do_statement */
497 statement : do statement WHILE LPAREN expr RPAREN separator
498 { $$ = $2 ;
499 code_jmp(_JNZ, $2) ;
500 BC_clear(code_ptr, $5) ; }
501 ;
502
503 while_front : WHILE LPAREN expr RPAREN
504 { eat_nl() ; BC_new() ;
505 $$ = $3 ;
506
507 /* check if const expression */
508 if ( code_ptr - 2 == $3 &&
509 code_ptr[-2].op == _PUSHD &&
510 *(double*)code_ptr[-1].ptr != 0.0
511 )
512 code_ptr -= 2 ;
513 else
514 {
515 code_push($3, code_ptr-$3) ;
516 code_ptr = $3 ;
517 code2(_JMP, (INST*)0) ; /* code2() not code_jmp() */
518 }
519 }
520 ;
521
522 /* while_statement */
523 statement : while_front statement
524 {
525 INST *c_addr ; int len ;
526
527 if ( $1 != $2 ) /* real test in loop */
528 {
529 $1[1].op = code_ptr-($1+1) ;
530 c_addr = code_ptr ;
531 len = code_pop(code_ptr) ;
532 code_ptr += len ;
533 code_jmp(_JNZ, $2) ;
534 BC_clear(code_ptr, c_addr) ;
535 }
536 else /* while(1) */
537 {
538 code_jmp(_JMP, $1) ;
539 BC_clear(code_ptr, $2) ;
540 }
541 }
542 ;
543
544
545 /* for_statement */
546 statement : for1 for2 for3 statement
547 {
548 INST *cont_address = code_ptr ;
549 unsigned len = code_pop(code_ptr) ;
550
551 code_ptr += len ;
552
553 if ( $2 != $4 ) /* real test in for2 */
554 {
555 $4[-1].op = code_ptr - $4 + 1 ;
556 len = code_pop(code_ptr) ;
557 code_ptr += len ;
558 code_jmp(_JNZ, $4) ;
559 }
560 else /* for(;;) */
561 code_jmp(_JMP, $4) ;
562
563 BC_clear(code_ptr, cont_address) ;
564
565 }
566 ;
567
568 for1 : FOR LPAREN SEMI_COLON { $$ = code_ptr ; }
569 | FOR LPAREN expr SEMI_COLON
570 { $$ = $3 ; code1(_POP) ; }
571 ;
572
573 for2 : SEMI_COLON { $$ = code_ptr ; }
574 | expr SEMI_COLON
575 {
576 if ( code_ptr - 2 == $1 &&
577 code_ptr[-2].op == _PUSHD &&
578 * (double*) code_ptr[-1].ptr != 0.0
579 )
580 code_ptr -= 2 ;
581 else
582 {
583 code_push($1, code_ptr-$1) ;
584 code_ptr = $1 ;
585 code2(_JMP, (INST*)0) ;
586 }
587 }
588 ;
589
590 for3 : RPAREN
591 { eat_nl() ; BC_new() ; code_push((INST*)0,0) ; }
592 | expr RPAREN
593 { eat_nl() ; BC_new() ;
594 code1(_POP) ;
595 code_push($1, code_ptr - $1) ;
596 code_ptr -= code_ptr - $1 ;
597 }
598 ;
599
600
601 /* arrays */
602
603 expr : expr IN ID
604 { check_array($3) ;
605 code_array($3) ;
606 code1(A_TEST) ;
607 }
608 | LPAREN arg2 RPAREN IN ID
609 { $$ = $2->start ;
610 code1(A_CAT) ; code1($2->cnt) ;
611 zfree($2, sizeof(ARG2_REC)) ;
612
613 check_array($5) ;
614 code_array($5) ;
615 code1(A_TEST) ;
616 }
617 ;
618
619 lvalue : ID mark LBOX args RBOX
620 {
621 if ( $4 > 1 )
622 { code1(A_CAT) ; code1($4) ; }
623
624 check_array($1) ;
625 if( is_local($1) )
626 { code1(LAE_PUSHA) ; code1($1->offset) ; }
627 else code2(AE_PUSHA, $1->stval.array) ;
628 $$ = $2 ;
629 }
630 ;
631
632 p_expr : ID mark LBOX args RBOX %prec AND
633 {
634 if ( $4 > 1 )
635 { code1(A_CAT) ; code1($4) ; }
636
637 check_array($1) ;
638 if( is_local($1) )
639 { code1(LAE_PUSHI) ; code1($1->offset) ; }
640 else code2(AE_PUSHI, $1->stval.array) ;
641 $$ = $2 ;
642 }
643
644 | ID mark LBOX args RBOX INC_or_DEC
645 {
646 if ( $4 > 1 )
647 { code1(A_CAT) ; code1($4) ; }
648
649 check_array($1) ;
650 if( is_local($1) )
651 { code1(LAE_PUSHA) ; code1($1->offset) ; }
652 else code2(AE_PUSHA, $1->stval.array) ;
653 if ( $6 == '+' ) code1(_POST_INC) ;
654 else code1(_POST_DEC) ;
655
656 $$ = $2 ;
657 }
658 ;
659
660 /* delete A[i] */
661 statement : DELETE ID mark LBOX args RBOX separator
662 {
663 $$ = $3 ;
664 if ( $5 > 1 ) { code1(A_CAT) ; code1($5) ; }
665 check_array($2) ;
666 code_array($2) ;
667 code1(A_DEL) ;
668 }
669
670 ;
671
672 /* for ( i in A ) statement */
673
674 array_loop_front : FOR LPAREN ID IN ID RPAREN
675 { eat_nl() ; BC_new() ;
676 $$ = code_ptr ;
677
678 check_var($3) ;
679 code_address($3) ;
680 check_array($5) ;
681 code_array($5) ;
682
683 code2(SET_ALOOP, (INST*)0) ;
684 }
685 ;
686
687 /* array_loop */
688 statement : array_loop_front statement
689 {
690 $2[-1].op = code_ptr - $2 + 1 ;
691 BC_clear( code_ptr+3 , code_ptr) ;
692 code_jmp(ALOOP, $2) ;
693 code_ptr++->ptr = (PTR) ZMALLOC(ALOOP_STATE) ;
694 }
695 ;
696
697 /* fields
698 D_ID is a special token , same as an ID, but yylex()
699 only returns it after a '$'. In essense,
700 DOLLAR D_ID is really one token.
701 */
702
703 field : FIELD
704 { $$ = code_ptr ; code2(F_PUSHA, $1) ; }
705 | DOLLAR D_ID
706 { check_var($2) ;
707 $$ = code_ptr ;
708 if ( is_local($2) )
709 { code1(L_PUSHI) ; code1($2->offset) ; }
710 else code2(_PUSHI, $2->stval.cp) ;
711
712 CODE_FE_PUSHA() ;
713 }
714 | DOLLAR D_ID mark LBOX args RBOX
715 {
716 if ( $5 > 1 )
717 { code1(A_CAT) ; code1($5) ; }
718
719 check_array($2) ;
720 if( is_local($2) )
721 { code1(LAE_PUSHI) ; code1($2->offset) ; }
722 else code2(AE_PUSHI, $2->stval.array) ;
723
724 CODE_FE_PUSHA() ;
725
726 $$ = $3 ;
727 }
728 | DOLLAR p_expr
729 { $$ = $2 ; CODE_FE_PUSHA() ; }
730 | LPAREN field RPAREN
731 { $$ = $2 ; }
732 ;
733
734 p_expr : field %prec CAT /* removes field (++|--) sr conflict */
735 { field_A2I() ; }
736 ;
737
738 expr : field ASSIGN expr { code1(F_ASSIGN) ; }
739 | field ADD_ASG expr { code1(F_ADD_ASG) ; }
740 | field SUB_ASG expr { code1(F_SUB_ASG) ; }
741 | field MUL_ASG expr { code1(F_MUL_ASG) ; }
742 | field DIV_ASG expr { code1(F_DIV_ASG) ; }
743 | field MOD_ASG expr { code1(F_MOD_ASG) ; }
744 | field POW_ASG expr { code1(F_POW_ASG) ; }
745 ;
746
747 /* split is handled different than a builtin because
748 it takes an array and optionally a regular expression as args */
749
750 p_expr : split_front split_back
751 { code2(_BUILTIN, bi_split) ; }
752 ;
753
754 split_front : SPLIT LPAREN expr COMMA ID
755 { $$ = $3 ;
756 check_array($5) ;
757 code_array($5) ;
758 }
759 ;
760
761 split_back : RPAREN
762 { code2(_PUSHI, &fs_shadow) ; }
763 | COMMA expr RPAREN
764 {
765 if ( $2 == code_ptr - 2 )
766 {
767 if ( code_ptr[-2].op == _MATCH0 )
768 RE_as_arg() ;
769 else
770 if ( code_ptr[-2].op == _PUSHS )
771 { CELL *cp = ZMALLOC(CELL) ;
772
773 cp->type = C_STRING ;
774 cp->ptr = code_ptr[-1].ptr ;
775 cast_for_split(cp) ;
776 code_ptr[-2].op = _PUSHC ;
777 code_ptr[-1].ptr = (PTR) cp ;
778 }
779 }
780 }
781 ;
782
783
784
785 /* match(expr, RE) */
786
787 p_expr : MATCH_FUNC LPAREN expr COMMA re_arg RPAREN
788 { $$ = $3 ;
789 code2(_BUILTIN, bi_match) ;
790 }
791 ;
792
793
794 re_arg : expr
795 {
796 if ( $1 == code_ptr - 2 )
797 {
798 if ( $1->op == _MATCH0 ) RE_as_arg() ;
799 else
800 if ( $1->op == _PUSHS )
801 { CELL *cp = ZMALLOC(CELL) ;
802
803 cp->type = C_STRING ;
804 cp->ptr = $1[1].ptr ;
805 cast_to_RE(cp) ;
806 $1->op = _PUSHC ;
807 $1[1].ptr = (PTR) cp ;
808 }
809 }
810 }
811
812
813
814 /* exit_statement */
815 statement : EXIT separator
816 { $$ = code_ptr ;
817 code1(_EXIT0) ; }
818 | EXIT expr separator
819 { $$ = $2 ; code1(_EXIT) ; }
820
821 return_statement : RETURN separator
822 { $$ = code_ptr ;
823 code1(_RET0) ; }
824 | RETURN expr separator
825 { $$ = $2 ; code1(_RET) ; }
826
827 /* getline */
828
829 p_expr : getline %prec GETLINE
830 { $$ = code_ptr ;
831 code2(F_PUSHA, &field[0]) ;
832 code1(_PUSHINT) ; code1(0) ;
833 code2(_BUILTIN, bi_getline) ;
834 getline_flag = 0 ;
835 }
836 | getline fvalue %prec GETLINE
837 { $$ = $2 ;
838 code1(_PUSHINT) ; code1(0) ;
839 code2(_BUILTIN, bi_getline) ;
840 getline_flag = 0 ;
841 }
842 | getline_file p_expr %prec IO_IN
843 { code1(_PUSHINT) ; code1(F_IN) ;
844 code2(_BUILTIN, bi_getline) ;
845 /* getline_flag already off in yylex() */
846 }
847 | p_expr PIPE GETLINE
848 { code2(F_PUSHA, &field[0]) ;
849 code1(_PUSHINT) ; code1(PIPE_IN) ;
850 code2(_BUILTIN, bi_getline) ;
851 }
852 | p_expr PIPE GETLINE fvalue
853 {
854 code1(_PUSHINT) ; code1(PIPE_IN) ;
855 code2(_BUILTIN, bi_getline) ;
856 }
857 ;
858
859 getline : GETLINE { getline_flag = 1 ; }
860
861 fvalue : lvalue | field ;
862
863 getline_file : getline IO_IN
864 { $$ = code_ptr ;
865 code2(F_PUSHA, field+0) ;
866 }
867 | getline fvalue IO_IN
868 { $$ = $2 ; }
869 ;
870
871 /*==========================================
872 sub and gsub
873 ==========================================*/
874
875 p_expr : sub_or_gsub LPAREN re_arg COMMA expr sub_back
876 {
877 if ( $6 - $5 == 2 && $5->op == _PUSHS )
878 { /* cast from STRING to REPL at compile time */
879 CELL *cp = ZMALLOC(CELL) ;
880 cp->type = C_STRING ;
881 cp->ptr = $5[1].ptr ;
882 cast_to_REPL(cp) ;
883 $5->op = _PUSHC ;
884 $5[1].ptr = (PTR) cp ;
885 }
886 code2(_BUILTIN, $1) ;
887 $$ = $3 ;
888 }
889 ;
890
891 sub_or_gsub : SUB { $$ = bi_sub ; }
892 | GSUB { $$ = bi_gsub ; }
893 ;
894
895
896 sub_back : RPAREN /* substitute into $0 */
897 { $$ = code_ptr ;
898 code2(F_PUSHA, &field[0]) ;
899 }
900
901 | COMMA fvalue RPAREN
902 { $$ = $2 ; }
903 ;
904
905 /*================================================
906 user defined functions
907 *=================================*/
908
909 function_def : funct_start block
910 { resize_fblock($1, code_ptr) ;
911 code_ptr = main_code_ptr ;
912 scope = SCOPE_MAIN ;
913 active_funct = (FBLOCK *) 0 ;
914 restore_ids() ;
915 }
916 ;
917
918
919 funct_start : funct_head LPAREN f_arglist RPAREN
920 { eat_nl() ;
921 scope = SCOPE_FUNCT ;
922 active_funct = $1 ;
923 main_code_ptr = code_ptr ;
924
925 if ( $1->nargs = $3 )
926 $1->typev = (char *)
927 memset( zmalloc($3), ST_LOCAL_NONE, SIZE_T($3)) ;
928 else $1->typev = (char *) 0 ;
929 code_ptr = $1->code =
930 (INST *) zmalloc(PAGE_SZ*sizeof(INST)) ;
931 }
932 ;
933
934 funct_head : FUNCTION ID
935 { FBLOCK *fbp ;
936
937 if ( $2->type == ST_NONE )
938 {
939 $2->type = ST_FUNCT ;
940 fbp = $2->stval.fbp =
941 (FBLOCK *) zmalloc(sizeof(FBLOCK)) ;
942 fbp->name = $2->name ;
943 }
944 else
945 {
946 type_error( $2 ) ;
947
948 /* this FBLOCK will not be put in
949 the symbol table */
950 fbp = (FBLOCK*) zmalloc(sizeof(FBLOCK)) ;
951 fbp->name = "" ;
952 }
953 $$ = fbp ;
954 }
955
956 | FUNCTION FUNCT_ID
957 { $$ = $2 ;
958 if ( $2->code )
959 compile_error("redefinition of %s" , $2->name) ;
960 }
961 ;
962
963 f_arglist : /* empty */ { $$ = 0 ; }
964 | f_args
965 ;
966
967 f_args : ID
968 { $1 = save_id($1->name) ;
969 $1->type = ST_LOCAL_NONE ;
970 $1->offset = 0 ;
971 $$ = 1 ;
972 }
973 | f_args COMMA ID
974 { if ( is_local($3) )
975 compile_error("%s is duplicated in argument list",
976 $3->name) ;
977 else
978 { $3 = save_id($3->name) ;
979 $3->type = ST_LOCAL_NONE ;
980 $3->offset = $1 ;
981 $$ = $1 + 1 ;
982 }
983 }
984 ;
985
986 outside_error : error
987 { /* we may have to recover from a bungled function
988 definition */
989
990 /* can have local ids, before code scope
991 changes */
992 restore_ids() ;
993
994 if (scope == SCOPE_FUNCT)
995 { scope = SCOPE_MAIN ;
996 active_funct = (FBLOCK*) 0 ;
997 }
998
999 code_ptr = main_code_ptr ;
1000 }
1001 ;
1002
1003 /* a call to a user defined function */
1004
1005 p_expr : FUNCT_ID mark call_args
1006 { $$ = $2 ;
1007 code2(_CALL, $1) ;
1008
1009 if ( $3 ) code1($3->arg_num+1) ;
1010 else code1(0) ;
1011
1012 check_fcall($1, scope, active_funct,
1013 $3, token_lineno) ;
1014 }
1015 ;
1016
1017 call_args : LPAREN RPAREN
1018 { $$ = (CA_REC *) 0 ; }
1019 | ca_front ca_back
1020 { $$ = $2 ;
1021 $$->link = $1 ;
1022 $$->arg_num = $1 ? $1->arg_num+1 : 0 ;
1023 }
1024 ;
1025
1026 /* The funny definition of ca_front with the COMMA bound to the ID is to
1027 force a shift to avoid a reduce/reduce conflict
1028 ID->id or ID->array
1029
1030 Or to avoid a decision, if the type of the ID has not yet been
1031 determined
1032 */
1033
1034 ca_front : LPAREN
1035 { $$ = (CA_REC *) 0 ; }
1036 | ca_front expr COMMA
1037 { $$ = (CA_REC *) zmalloc(sizeof(CA_REC)) ;
1038 $$->link = $1 ;
1039 $$->type = CA_EXPR ;
1040 $$->arg_num = $1 ? $1->arg_num+1 : 0 ;
1041 }
1042 | ca_front ID COMMA
1043 { $$ = (CA_REC *) zmalloc(sizeof(CA_REC)) ;
1044 $$->link = $1 ;
1045 $$->arg_num = $1 ? $1->arg_num+1 : 0 ;
1046
1047 code_call_id($$, $2) ;
1048 }
1049 ;
1050
1051 ca_back : expr RPAREN
1052 { $$ = (CA_REC *) zmalloc(sizeof(CA_REC)) ;
1053 $$->type = CA_EXPR ;
1054 }
1055
1056 | ID RPAREN
1057 { $$ = (CA_REC *) zmalloc(sizeof(CA_REC)) ;
1058 code_call_id($$, $1) ;
1059 }
1060 ;
1061
1062
1063
1064
1065 %%
1066
1067 /* resize the code for a user function */
1068
resize_fblock(fbp,code_ptr)1069 static void resize_fblock( fbp, code_ptr )
1070 FBLOCK *fbp ;
1071 INST *code_ptr ;
1072 { int size ;
1073
1074 code1(_RET0) ; /* make sure there is always a return statement */
1075
1076 #if !SM_DOS
1077 if ( dump_code )
1078 { code1(_HALT) ; /*stops da() */
1079 add_to_fdump_list(fbp) ;
1080 }
1081 #endif
1082
1083 if ( (size = code_ptr - fbp->code) > PAGE_SZ-1 )
1084 overflow("function code size", PAGE_SZ ) ;
1085
1086 /* resize the code */
1087 fbp->code = (INST*) zrealloc(fbp->code, PAGE_SZ*sizeof(INST),
1088 size * sizeof(INST) ) ;
1089
1090 }
1091
1092
1093 /* convert FE_PUSHA to FE_PUSHI
1094 or F_PUSH to F_PUSHI
1095 */
1096
field_A2I()1097 static void field_A2I()
1098 { CELL *cp ;
1099
1100 if ( code_ptr[-1].op == FE_PUSHA &&
1101 code_ptr[-1].ptr == (PTR) 0)
1102 /* On most architectures, the two tests are the same; a good
1103 compiler might eliminate one. On LM_DOS, and possibly other
1104 segmented architectures, they are not */
1105 { code_ptr[-1].op = FE_PUSHI ; }
1106 else
1107 {
1108 cp = (CELL *) code_ptr[-1].ptr ;
1109
1110 if ( cp == field ||
1111
1112 #if LM_DOS
1113 SAMESEG(cp,field) &&
1114 #endif
1115 cp > NF && cp <= LAST_PFIELD )
1116 {
1117 code_ptr[-2].op = _PUSHI ;
1118 }
1119 else if ( cp == NF )
1120 { code_ptr[-2].op = NF_PUSHI ; code_ptr-- ; }
1121
1122 else
1123 {
1124 code_ptr[-2].op = F_PUSHI ;
1125 code_ptr -> op = field_addr_to_index( code_ptr[-1].ptr ) ;
1126 code_ptr++ ;
1127 }
1128 }
1129 }
1130
1131 /* we've seen an ID in a context where it should be a VAR,
1132 check that's consistent with previous usage */
1133
check_var(p)1134 static void check_var( p )
1135 register SYMTAB *p ;
1136 {
1137 switch(p->type)
1138 {
1139 case ST_NONE : /* new id */
1140 p->type = ST_VAR ;
1141 p->stval.cp = new_CELL() ;
1142 p->stval.cp->type = C_NOINIT ;
1143 break ;
1144
1145 case ST_LOCAL_NONE :
1146 p->type = ST_LOCAL_VAR ;
1147 active_funct->typev[p->offset] = ST_LOCAL_VAR ;
1148 break ;
1149
1150 case ST_VAR :
1151 case ST_LOCAL_VAR : break ;
1152
1153 default :
1154 type_error(p) ;
1155 break ;
1156 }
1157 }
1158
1159 /* we've seen an ID in a context where it should be an ARRAY,
1160 check that's consistent with previous usage */
check_array(p)1161 static void check_array(p)
1162 register SYMTAB *p ;
1163 {
1164 switch(p->type)
1165 {
1166 case ST_NONE : /* a new array */
1167 p->type = ST_ARRAY ;
1168 p->stval.array = new_ARRAY() ;
1169 break ;
1170
1171 case ST_ARRAY :
1172 case ST_LOCAL_ARRAY :
1173 break ;
1174
1175 case ST_LOCAL_NONE :
1176 p->type = ST_LOCAL_ARRAY ;
1177 active_funct->typev[p->offset] = ST_LOCAL_ARRAY ;
1178 break ;
1179
1180 default : type_error(p) ; break ;
1181 }
1182 }
1183
code_array(p)1184 static void code_array(p)
1185 register SYMTAB *p ;
1186 { if ( is_local(p) )
1187 { code1(LA_PUSHA) ; code1(p->offset) ; }
1188 else code2(A_PUSHA, p->stval.array) ;
1189 }
1190
1191
current_offset()1192 static int current_offset()
1193 {
1194 switch( scope )
1195 {
1196 case SCOPE_MAIN : return code_ptr - main_start ;
1197 case SCOPE_BEGIN : return code_ptr - begin_code.start ;
1198 case SCOPE_END : return code_ptr - end_code.start ;
1199 case SCOPE_FUNCT : return code_ptr - active_funct->code ;
1200 }
1201 /* can't get here */
1202 return 0 ;
1203 }
1204
1205 /* we've seen an ID as an argument to a user defined function */
1206
code_call_id(p,ip)1207 static void code_call_id( p, ip )
1208 register CA_REC *p ;
1209 register SYMTAB *ip ;
1210 { static CELL dummy ;
1211
1212 switch( ip->type )
1213 {
1214 case ST_VAR :
1215 p->type = CA_EXPR ;
1216 code2(_PUSHI, ip->stval.cp) ;
1217 break ;
1218
1219 case ST_LOCAL_VAR :
1220 p->type = CA_EXPR ;
1221 code1(L_PUSHI) ;
1222 code1(ip->offset) ;
1223 break ;
1224
1225 case ST_ARRAY :
1226 p->type = CA_ARRAY ;
1227 code2(A_PUSHA, ip->stval.array) ;
1228 break ;
1229
1230 case ST_LOCAL_ARRAY :
1231 p->type = CA_ARRAY ;
1232 code1(LA_PUSHA) ;
1233 code1(ip->offset) ;
1234 break ;
1235
1236 /* not enough info to code it now; it will have to
1237 be patched later */
1238
1239 case ST_NONE :
1240 p->type = ST_NONE ;
1241 p->call_offset = current_offset() ;
1242 p->sym_p = ip ;
1243 code2(_PUSHI, &dummy) ;
1244 break ;
1245
1246 case ST_LOCAL_NONE :
1247 p->type = ST_LOCAL_NONE ;
1248 p->call_offset = current_offset() ;
1249 p->type_p = & active_funct->typev[ip->offset] ;
1250 code1(L_PUSHI) ;
1251 code1(ip->offset) ;
1252 break ;
1253
1254
1255 #ifdef DEBUG
1256 default :
1257 bozo("code_call_id") ;
1258 #endif
1259
1260 }
1261 }
1262
1263 /* an RE by itself was coded as _MATCH0 , change to
1264 push as an expression */
1265
RE_as_arg()1266 static void RE_as_arg()
1267 { CELL *cp = ZMALLOC(CELL) ;
1268
1269 code_ptr -= 2 ;
1270 cp->type = C_RE ;
1271 cp->ptr = code_ptr[1].ptr ;
1272 code2(_PUSHC, cp) ;
1273 }
1274
1275
parse()1276 int parse()
1277 { int yy = yyparse() ;
1278
1279 #if YYBYACC
1280 extern struct yacc_mem *yacc_memp ;
1281
1282 yacc_memp++ ; /* puts parser tables in mem pool */
1283 #endif
1284
1285 if ( resolve_list ) resolve_fcalls() ;
1286 return yy ;
1287 }
1288
1289