1 /*------------------------------------------------------------*
2 | parse.y |
3 | copyright 1999, Andrew Sumner |
4 | copyright 1991, Michael D. Brennan |
5 | |
6 | This is a source file for the awka package, a translator |
7 | of the AWK programming language to ANSI C. |
8 | |
9 | The file is a modified version of parse.y from |
10 | Mawk, an implementation of the AWK processing language, |
11 | distributed by Michael Brennan under the GPL. |
12 | |
13 | This program is free software; you can redistribute it |
14 | and/or modify it under the terms of the GNU General Public |
15 | License as published by the Free Software Foundation; |
16 | either version 2 of the License, or any later version. |
17 | |
18 | This program is distributed in the hope that it will be |
19 | useful, but WITHOUT ANY WARRANTY; without even the implied |
20 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
21 | PURPOSE. See the GNU General Public License for more |
22 | details. |
23 | |
24 | You should have received a copy of the GNU General Public |
25 | License along with this program; if not, write to the |
26 | Free Software Foundation, Inc., 675 Mass Ave, Cambridge, |
27 | MA 02139, USA. |
28 *-----------------------------------------------------------*/
29
30 %{
31 #include <stdio.h>
32 #include "awka.h"
33 #include "symtype.h"
34 #include "code.h"
35 #include "memory.h"
36 #include "bi_funct.h"
37 #include "bi_vars.h"
38 #include "jmp.h"
39 #include "field.h"
40 #include "files.h"
41
42
43 #define YYMAXDEPTH 200
44
45
46 extern void PROTO( eat_nl, (void) ) ;
47 static void PROTO( resize_fblock, (FBLOCK *) ) ;
48 static void PROTO( switch_code_to_main, (void)) ;
49 static void PROTO( code_array, (SYMTAB *) ) ;
50 static void PROTO( code_call_id, (CA_REC *, SYMTAB *) ) ;
51 static void PROTO( field_A2I, (void)) ;
52 static void PROTO( check_var, (SYMTAB *) ) ;
53 static void PROTO( check_array, (SYMTAB *) ) ;
54 static void PROTO( RE_as_arg, (void)) ;
55
56 static int scope ;
57 static FBLOCK *active_funct ;
58 /* when scope is SCOPE_FUNCT */
59
60 #define code_address(x) if( is_local(x) ) \
61 code2op(L_PUSHA, (x)->offset) ;\
62 else code2(_PUSHA, (x)->stval.cp)
63
64 #define CDP(x) (code_base+(x))
65 /* WARNING: These CDP() calculations become invalid after calls
66 that might change code_base. Which are: code2(), code2op(),
67 code_jmp() and code_pop().
68 */
69
70 /* this nonsense caters to MSDOS large model */
71 #define CODE_FE_PUSHA() code_ptr->ptr = (PTR) 0 ; code1(FE_PUSHA)
72
73 %}
74
75 %union{
76 CELL *cp ;
77 SYMTAB *stp ;
78 int start ; /* code starting address as offset from code_base */
79 PF_CP fp ; /* ptr to a (print/printf) or (sub/gsub) function */
80 BI_REC *bip ; /* ptr to info about a builtin */
81 FBLOCK *fbp ; /* ptr to a function block */
82 ARG2_REC *arg2p ;
83 CA_REC *ca_p ;
84 int ival ;
85 PTR ptr ;
86 }
87
88 /* two tokens to help with errors */
89 %token UNEXPECTED /* unexpected character */
90 %token BAD_DECIMAL
91
92 %token NL
93 %token SEMI_COLON
94 %token LBRACE RBRACE
95 %token LBOX RBOX
96 %token COMMA
97 %token <ival> IO_OUT COPROCESS_OUT /* |& > or output pipe */
98
99 %right ASSIGN ADD_ASG SUB_ASG MUL_ASG DIV_ASG MOD_ASG POW_ASG
100 %right QMARK COLON
101 %left OR
102 %left AND
103 %left IN
104 %left <ival> MATCH /* ~ or !~ */
105 %left EQ NEQ LT LTE GT GTE
106 %left CAT
107 %left GETLINE
108 %left PLUS MINUS
109 %left MUL DIV MOD
110 %left NOT UMINUS
111 %nonassoc IO_IN PIPE COPROCESS
112 %right POW
113 %left <ival> INC_or_DEC
114 %left DOLLAR FIELD /* last to remove a SR conflict
115 with getline */
116 %right LPAREN RPAREN /* removes some SR conflicts */
117
118 %token <ptr> DOUBLE STRING_ RE
119 %token <stp> ID D_ID
120 %token <fbp> FUNCT_ID
121 %token <bip> BUILTIN LENGTH
122 %token <cp> FIELD
123
124 %token PRINT PRINTF SPLIT MATCH_FUNC SUB GSUB GENSUB ALENGTH_FUNC ASORT_FUNC
125 /* keywords */
126 %token DO WHILE FOR BREAK CONTINUE IF ELSE IN
127 %token DELETE a_BEGIN a_END EXIT ABORT NEXT NEXTFILE RETURN FUNCTION
128
129 %type <start> block block_or_separator
130 %type <start> statement_list statement mark
131 %type <ival> pr_args
132 %type <arg2p> arg2
133 %type <start> builtin
134 %type <start> getline_file
135 %type <start> lvalue field fvalue
136 %type <start> expr cat_expr p_expr
137 %type <start> while_front if_front
138 %type <start> for1 for2
139 %type <start> array_loop_front
140 %type <start> return_statement
141 %type <start> split_front re_arg sub_back gensub_back
142 %type <ival> arglist args
143 %type <fp> print sub_or_gsub gensub
144 %type <fbp> funct_start funct_head
145 %type <ca_p> call_args ca_front ca_back
146 %type <ival> f_arglist f_args
147
148 %%
149 /* productions */
150
151 program : program_block
152 | program program_block
153 ;
154
155 program_block : PA_block /* pattern-action */
156 | function_def
157 | outside_error block
158 ;
159
160 PA_block : block
161 { /* this do nothing action removes a vacuous warning
162 from Bison */
163 }
164
165 | a_BEGIN
166 { be_setup(scope = SCOPE_BEGIN) ; }
167
168 block
169 { switch_code_to_main() ; }
170
171 | a_END
172 { be_setup(scope = SCOPE_END) ; }
173
174 block
175 { switch_code_to_main() ; }
176
177 | expr /* this works just like an if statement */
178 { code_jmp(_JZ, (INST*)0) ; }
179
180 block_or_separator
181 { patch_jmp( code_ptr ) ; }
182
183 /* range pattern, see comment in execute.c near _RANGE */
184 | expr COMMA
185 {
186 INST *p1 = CDP($1) ;
187 int len ;
188
189 code_push(p1, code_ptr - p1, scope, active_funct) ;
190 code_ptr = p1 ;
191
192 code2op(_RANGE, 1) ;
193 code_ptr += 3 ;
194 len = code_pop(code_ptr) ;
195 code_ptr += len ;
196 code1(_STOP) ;
197 p1 = CDP($1) ;
198 p1[2].op = code_ptr - (p1+1) ;
199 }
200 expr
201 { code1(_STOP) ; }
202
203 block_or_separator
204 {
205 INST *p1 = CDP($1) ;
206
207 p1[3].op = CDP($6) - (p1+1) ;
208 p1[4].op = code_ptr - (p1+1) ;
209 }
210 ;
211
212
213
214 block : LBRACE statement_list RBRACE
215 { $$ = $2 ; }
216 | LBRACE error RBRACE
217 { $$ = code_offset ; /* does nothing won't be executed */
218 print_flag = getline_flag = paren_cnt = 0 ;
219 yyerrok ; }
220 ;
221
222 block_or_separator : block
223 | separator /* default print action */
224 { $$ = code_offset ;
225 code1(_PUSHINT) ; code1(0) ;
226 code2(_PRINT, bi_print) ;
227 }
228
229 statement_list : statement
230 | statement_list statement
231 ;
232
233
234 statement : block
235 | expr separator
236 { code1(_POP) ; }
237 | /* empty */ separator
238 { $$ = code_offset ; }
239 | error separator
240 { $$ = code_offset ;
241 print_flag = getline_flag = 0 ;
242 paren_cnt = 0 ;
243 yyerrok ;
244 }
245 | BREAK separator
246 { $$ = code_offset ; BC_insert('B', code_ptr+1) ;
247 code2(_BREAK, 0) /* don't use code_jmp ! */ ; }
248 | CONTINUE separator
249 { $$ = code_offset ; BC_insert('C', code_ptr+1) ;
250 code2(_GOTO, 0) ; }
251 | return_statement
252 { if ( scope != SCOPE_FUNCT )
253 compile_error("return outside function body") ;
254 }
255 | NEXT separator
256 { if ( scope != SCOPE_MAIN && scope != SCOPE_FUNCT )
257 compile_error( "improper use of next" ) ;
258 $$ = code_offset ;
259 code1(_NEXT) ;
260 }
261 | NEXTFILE separator
262 { if ( scope != SCOPE_MAIN && scope != SCOPE_FUNCT )
263 compile_error( "improper use of nextfile" ) ;
264 $$ = code_offset ;
265 code1(_NEXTFILE) ;
266 }
267 ;
268
269 separator : NL | SEMI_COLON
270 ;
271
272 expr : cat_expr
273 | lvalue ASSIGN expr { code1(_ASSIGN) ; }
274 | lvalue ADD_ASG expr { code1(_ADD_ASG) ; }
275 | lvalue SUB_ASG expr { code1(_SUB_ASG) ; }
276 | lvalue MUL_ASG expr { code1(_MUL_ASG) ; }
277 | lvalue DIV_ASG expr { code1(_DIV_ASG) ; }
278 | lvalue MOD_ASG expr { code1(_MOD_ASG) ; }
279 | lvalue POW_ASG expr { code1(_POW_ASG) ; }
280 | expr EQ expr { code1(_EQ) ; }
281 | expr NEQ expr { code1(_NEQ) ; }
282 | expr LT expr { code1(_LT) ; }
283 | expr LTE expr { code1(_LTE) ; }
284 | expr GT expr { code1(_GT) ; }
285 | expr GTE expr { code1(_GTE) ; }
286
287 | expr MATCH expr
288 {
289 INST *p3 = CDP($3) ;
290
291 if ( p3 == code_ptr - 2 )
292 {
293 if ( p3->op == _MATCH0 ) p3->op = _MATCH1 ;
294
295 else /* check for string */
296 if ( p3->op == _PUSHS )
297 { CELL *cp = ZMALLOC(CELL) ;
298
299 cp->type = C_STRING ;
300 cp->ptr = p3[1].ptr ;
301 cast_to_RE(cp) ;
302 code_ptr -= 2 ;
303 code2(_MATCH1, cp->ptr) ;
304 ZFREE(cp) ;
305 }
306 else code1(_MATCH2) ;
307 }
308 else code1(_MATCH2) ;
309
310 if ( !$2 ) code1(_NOT) ;
311 }
312
313 /* short circuit boolean evaluation */
314 | expr OR
315 { code1(_TEST) ;
316 code_jmp(_LJNZ, (INST*)0) ;
317 }
318 expr
319 { code1(_TEST) ; patch_jmp(code_ptr) ; }
320
321 | expr AND
322 { code1(_TEST) ;
323 code_jmp(_LJZ, (INST*)0) ;
324 }
325 expr
326 { code1(_TEST) ; patch_jmp(code_ptr) ; }
327
328 | expr QMARK { code_jmp(_QMARK, (INST*)0) ; }
329 expr COLON { code_jmp(_COLON, (INST*)0) ; }
330 expr
331 { patch_jmp(code_ptr) ; patch_jmp(CDP($7)) ; }
332 ;
333
334 cat_expr : p_expr %prec CAT
335 | cat_expr p_expr %prec CAT
336 { code1(_CAT) ; }
337 ;
338
339 p_expr : DOUBLE
340 { $$ = code_offset ; code2(_PUSHD, $1) ; }
341 | STRING_
342 { $$ = code_offset ; code2(_PUSHS, $1) ; }
343 | ID %prec AND /* anything less than IN */
344 { check_var($1) ;
345 $$ = code_offset ;
346 if ( is_local($1) )
347 { code2op(L_PUSHI, $1->offset) ; }
348 else code2(_PUSHI, $1->stval.cp) ;
349 }
350
351 | LPAREN expr RPAREN
352 { $$ = $2 ; }
353 ;
354
355 p_expr : RE
356 { $$ = code_offset ; code2(_MATCH0, $1) ; }
357 ;
358
359 p_expr : p_expr PLUS p_expr { code1(_ADD) ; }
360 | p_expr MINUS p_expr { code1(_SUB) ; }
361 | p_expr MUL p_expr { code1(_MUL) ; }
362 | p_expr DIV p_expr { code1(_DIV) ; }
363 | p_expr MOD p_expr { code1(_MOD) ; }
364 | p_expr POW p_expr { code1(_POW) ; }
365 | NOT p_expr
366 { $$ = $2 ; code1(_NOT) ; }
367 | PLUS p_expr %prec UMINUS
368 { $$ = $2 ; code1(_UPLUS) ; }
369 | MINUS p_expr %prec UMINUS
370 { $$ = $2 ; code1(_UMINUS) ; }
371 | builtin
372 ;
373
374 p_expr : ID INC_or_DEC
375 { check_var($1) ;
376 $$ = code_offset ;
377 code_address($1) ;
378
379 if ( $2 == '+' ) code1(_POST_INC) ;
380 else code1(_POST_DEC) ;
381 }
382 | INC_or_DEC lvalue
383 { $$ = $2 ;
384 if ( $1 == '+' ) code1(_PRE_INC) ;
385 else code1(_PRE_DEC) ;
386 }
387 ;
388
389 p_expr : field INC_or_DEC
390 { if ($2 == '+' ) code1(F_POST_INC ) ;
391 else code1(F_POST_DEC) ;
392 }
393 | INC_or_DEC field
394 { $$ = $2 ;
395 if ( $1 == '+' ) code1(F_PRE_INC) ;
396 else code1( F_PRE_DEC) ;
397 }
398 ;
399
400 lvalue : ID
401 { $$ = code_offset ;
402 check_var($1) ;
403 code_address($1) ;
404 }
405 | BUILTIN mark LPAREN arglist RPAREN
406 {
407 BI_REC *p = $1 ;
408 $$ = $2;
409 if (strcmp(p->name, "argval"))
410 compile_error("builtin function '%s' used as an lvalue", p->name);
411 if ( (int)p->min_args > $4 || (int)p->max_args < $4 )
412 compile_error(
413 "wrong number of arguments in call to %s" ,
414 p->name ) ;
415 if ( p->min_args != p->max_args ) /* variable args */
416 { code1(_PUSHINT) ; code1($4) ; }
417 code2(_BUILTIN , p->fp) ;
418 }
419
420 ;
421
422
423 arglist : /* empty */
424 { $$ = 0 ; }
425 | args
426 ;
427
428 args : expr %prec LPAREN
429 { $$ = 1 ; }
430 | args COMMA expr
431 { $$ = $1 + 1 ; }
432 ;
433
434 builtin :
435 BUILTIN mark LPAREN arglist RPAREN
436 {
437 BI_REC *p = $1 ;
438 $$ = $2 ;
439 if ( (int)p->min_args > $4 || (int)p->max_args < $4 )
440 compile_error(
441 "wrong number of arguments in call to %s" ,
442 p->name ) ;
443 if ( p->min_args != p->max_args ) /* variable args */
444 { code1(_PUSHINT) ; code1($4) ; }
445 code2(_BUILTIN , p->fp) ;
446 }
447 | LENGTH /* this is an irritation */
448 {
449 $$ = code_offset ;
450 code1(_PUSHINT) ; code1(0) ;
451 code2(_BUILTIN, $1->fp) ;
452 }
453 ;
454
455 /* an empty production to store the code_ptr */
456 mark : /* empty */
457 { $$ = code_offset ; }
458
459 /* print_statement */
460 statement : print mark pr_args pr_direction separator
461 { code2(_PRINT, $1) ;
462 if ( $1 == bi_printf && $3 == 0 )
463 compile_error("no arguments in call to printf") ;
464 print_flag = 0 ;
465 $$ = $2 ;
466 }
467 ;
468
469 print : PRINT { $$ = bi_print ; print_flag = 1 ;}
470 | PRINTF { $$ = bi_printf ; print_flag = 1 ; }
471 ;
472
473 pr_args : arglist { code2op(_PUSHINT, $1) ; }
474 | LPAREN arg2 RPAREN
475 { $$ = $2->cnt ; zfree($2,sizeof(ARG2_REC)) ;
476 code2op(_PUSHINT, $$) ;
477 }
478 | LPAREN RPAREN
479 { $$=0 ; code2op(_PUSHINT, 0) ; }
480 ;
481
482 arg2 : expr COMMA expr
483 { $$ = (ARG2_REC*) zmalloc(sizeof(ARG2_REC)) ;
484 $$->start = $1 ;
485 $$->cnt = 2 ;
486 }
487 | arg2 COMMA expr
488 { $$ = $1 ; $$->cnt++ ; }
489 ;
490
491 pr_direction : /* empty */
492 | IO_OUT expr
493 { code2op(_PUSHINT, $1) ; }
494 | COPROCESS_OUT expr
495 { code2op(_PUSHINT, $1) ; }
496 ;
497
498
499 /* IF and IF-ELSE */
500
501 if_front : IF LPAREN expr RPAREN
502 { $$ = $3 ; eat_nl() ; code_jmp(_JZ, (INST*)0) ; }
503 ;
504
505 /* if_statement */
506 statement : if_front statement
507 { patch_jmp( code_ptr ) ; }
508 ;
509
510 else : ELSE { eat_nl() ; code_jmp(_ELSE, (INST*)0) ; }
511 ;
512
513 /* if_else_statement */
514 statement : if_front statement else statement
515 { patch_jmp(code_ptr) ;
516 patch_jmp(CDP($4)) ;
517 }
518
519
520 /* LOOPS */
521
522 do : DO
523 { eat_nl() ; BC_new() ; }
524 ;
525
526 /* do_statement */
527 statement : do statement WHILE LPAREN expr RPAREN separator
528 { $$ = $2 ;
529 code_jmp(_JNZ, CDP($2)) ;
530 BC_clear(code_ptr, CDP($5)) ; }
531 ;
532
533 while_front : WHILE LPAREN expr RPAREN
534 { eat_nl() ; BC_new() ;
535 $$ = $3 ;
536
537 /* check if const expression */
538 if ( code_ptr - 2 == CDP($3) &&
539 code_ptr[-2].op == _PUSHD &&
540 *(double*)code_ptr[-1].ptr != 0.0
541 )
542 code_ptr -= 2 ;
543 else
544 { INST *p3 = CDP($3) ;
545 code_push(p3, code_ptr-p3, scope, active_funct) ;
546 code_ptr = p3 ;
547 code2(_JMP, (INST*)0) ; /* code2() not code_jmp() */
548 }
549 }
550 ;
551
552 /* while_statement */
553 statement : while_front statement
554 {
555 int saved_offset ;
556 int len ;
557 INST *p1 = CDP($1) ;
558 INST *p2 = CDP($2) ;
559
560 if ( p1 != p2 ) /* real test in loop */
561 {
562 p1[1].op = code_ptr-(p1+1) ;
563 saved_offset = code_offset ;
564 len = code_pop(code_ptr) ;
565 code_ptr += len ;
566 code_jmp(_JNZ, CDP($2)) ;
567 BC_clear(code_ptr, CDP(saved_offset)) ;
568 }
569 else /* while(1) */
570 {
571 code_jmp(_JMP, p1) ;
572 BC_clear(code_ptr, CDP($2)) ;
573 }
574 }
575 ;
576
577
578 /* for_statement */
579 statement : for1 for2 for3 statement
580 {
581 int cont_offset = code_offset ;
582 unsigned len = code_pop(code_ptr) ;
583 INST *p2 = CDP($2) ;
584 INST *p4 = CDP($4) ;
585
586 code_ptr += len ;
587
588 if ( p2 != p4 ) /* real test in for2 */
589 {
590 p4[-1].op = code_ptr - p4 + 1 ;
591 len = code_pop(code_ptr) ;
592 code_ptr += len ;
593 code_jmp(_JNZ, CDP($4)) ;
594 }
595 else /* for(;;) */
596 code_jmp(_JMP, p4) ;
597
598 BC_clear(code_ptr, CDP(cont_offset)) ;
599
600 }
601 ;
602
603 for1 : FOR LPAREN SEMI_COLON { $$ = code_offset ; }
604 | FOR LPAREN expr SEMI_COLON
605 { $$ = $3 ; code1(_POP) ; }
606 ;
607
608 for2 : SEMI_COLON { $$ = code_offset ; }
609 | expr SEMI_COLON
610 {
611 if ( code_ptr - 2 == CDP($1) &&
612 code_ptr[-2].op == _PUSHD &&
613 * (double*) code_ptr[-1].ptr != 0.0
614 )
615 code_ptr -= 2 ;
616 else
617 {
618 INST *p1 = CDP($1) ;
619 code_push(p1, code_ptr-p1, scope, active_funct) ;
620 code_ptr = p1 ;
621 code2(_JMP, (INST*)0) ;
622 }
623 }
624 ;
625
626 for3 : RPAREN
627 { eat_nl() ; BC_new() ;
628 code_push((INST*)0,0, scope, active_funct) ;
629 }
630 | expr RPAREN
631 { INST *p1 = CDP($1) ;
632
633 eat_nl() ; BC_new() ;
634 code1(_POP) ;
635 code_push(p1, code_ptr - p1, scope, active_funct) ;
636 code_ptr -= code_ptr - p1 ;
637 }
638 ;
639
640
641 /* arrays */
642
643 expr : expr IN ID
644 { check_array($3) ;
645 code_array($3) ;
646 code1(A_TEST) ;
647 }
648 | LPAREN arg2 RPAREN IN ID
649 { $$ = $2->start ;
650 code2op(A_CAT, $2->cnt) ;
651 zfree($2, sizeof(ARG2_REC)) ;
652
653 check_array($5) ;
654 code_array($5) ;
655 code1(A_TEST) ;
656 }
657 ;
658
659 lvalue : ID mark LBOX args RBOX
660 {
661 if ( $4 > 1 )
662 { code2op(A_CAT, $4) ; }
663
664 check_array($1) ;
665 if( is_local($1) )
666 { code2op(LAE_PUSHA, $1->offset) ; }
667 else code2(AE_PUSHA, $1->stval.array) ;
668 $$ = $2 ;
669 }
670 ;
671
672 p_expr : ID mark LBOX args RBOX %prec AND
673 {
674 if ( $4 > 1 )
675 { code2op(A_CAT, $4) ; }
676
677 check_array($1) ;
678 if( is_local($1) )
679 { code2op(LAE_PUSHI, $1->offset) ; }
680 else code2(AE_PUSHI, $1->stval.array) ;
681 $$ = $2 ;
682 }
683
684 | ID mark LBOX args RBOX INC_or_DEC
685 {
686 if ( $4 > 1 )
687 { code2op(A_CAT,$4) ; }
688
689 check_array($1) ;
690 if( is_local($1) )
691 { code2op(LAE_PUSHA, $1->offset) ; }
692 else code2(AE_PUSHA, $1->stval.array) ;
693 if ( $6 == '+' ) code1(_POST_INC) ;
694 else code1(_POST_DEC) ;
695
696 $$ = $2 ;
697 }
698 ;
699
700 /* delete A[i] or delete A */
701 statement : DELETE ID mark LBOX args RBOX separator
702 {
703 $$ = $3 ;
704 if ( $5 > 1 ) { code2op(A_CAT, $5) ; }
705 check_array($2) ;
706 code_array($2) ;
707 code1(A_DEL) ;
708 }
709 | DELETE ID separator
710 {
711 $$ = code_offset ;
712 check_array($2) ;
713 code_array($2) ;
714 code1(DEL_A) ;
715 }
716 ;
717
718 /* for ( i in A ) statement */
719
720 array_loop_front : FOR LPAREN ID IN ID RPAREN
721 { eat_nl() ; BC_new() ;
722 $$ = code_offset ;
723
724 check_var($3) ;
725 code_address($3) ;
726 check_array($5) ;
727 code_array($5) ;
728
729 code2(SET_ALOOP, (INST*)0) ;
730 }
731 ;
732
733 /* array_loop */
734 statement : array_loop_front statement
735 {
736 INST *p2 = CDP($2) ;
737
738 p2[-1].op = code_ptr - p2 + 1 ;
739 BC_clear( code_ptr+2 , code_ptr) ;
740 code_jmp(ALOOP, p2) ;
741 code1(POP_AL) ;
742 }
743 ;
744
745 /* fields
746 D_ID is a special token , same as an ID, but yylex()
747 only returns it after a '$'. In essense,
748 DOLLAR D_ID is really one token.
749 */
750
751 field : FIELD
752 { $$ = code_offset ; code2(F_PUSHA, $1) ; }
753 | DOLLAR D_ID
754 {
755 check_var($2) ;
756 $$ = code_offset ;
757 if ( is_local($2) )
758 { code2op(L_PUSHI, $2->offset) ; }
759 else code2(_PUSHI, $2->stval.cp) ;
760
761 CODE_FE_PUSHA() ;
762 }
763 | DOLLAR D_ID mark LBOX args RBOX
764 {
765 if ( $5 > 1 )
766 { code2op(A_CAT, $5) ; }
767
768 check_array($2) ;
769 if( is_local($2) )
770 { code2op(LAE_PUSHI, $2->offset) ; }
771 else code2(AE_PUSHI, $2->stval.array) ;
772
773 CODE_FE_PUSHA() ;
774
775 $$ = $3 ;
776 }
777 | DOLLAR p_expr
778 {
779 $$ = $2 ; CODE_FE_PUSHA() ;
780 }
781 | LPAREN field RPAREN
782 { $$ = $2 ; }
783 ;
784
785 p_expr : field %prec CAT /* removes field (++|--) sr conflict */
786 { field_A2I() ; }
787 ;
788
789 expr : field ASSIGN expr { code1(F_ASSIGN) ; }
790 | field ADD_ASG expr { code1(F_ADD_ASG) ; }
791 | field SUB_ASG expr { code1(F_SUB_ASG) ; }
792 | field MUL_ASG expr { code1(F_MUL_ASG) ; }
793 | field DIV_ASG expr { code1(F_DIV_ASG) ; }
794 | field MOD_ASG expr { code1(F_MOD_ASG) ; }
795 | field POW_ASG expr { code1(F_POW_ASG) ; }
796 ;
797
798 /* split is handled different than a builtin because
799 it takes an array and optionally a regular expression as args */
800
801 p_expr : split_front split_back
802 { code2(_BUILTIN, bi_split) ; }
803 ;
804
805 split_front : SPLIT LPAREN expr COMMA ID
806 { $$ = $3 ;
807 check_array($5) ;
808 code_array($5) ;
809 }
810 ;
811
812 split_back : RPAREN
813 { code2(_PUSHI, &fs_shadow) ; }
814 | COMMA expr RPAREN
815 {
816 if ( CDP($2) == code_ptr - 2 )
817 {
818 if ( code_ptr[-2].op == _MATCH0 )
819 RE_as_arg() ;
820 else
821 if ( code_ptr[-2].op == _PUSHS )
822 { CELL *cp = ZMALLOC(CELL) ;
823
824 cp->type = C_STRING ;
825 cp->ptr = code_ptr[-1].ptr ;
826 cast_for_split(cp) ;
827 code_ptr[-2].op = _PUSHC ;
828 code_ptr[-1].ptr = (PTR) cp ;
829 }
830 }
831 }
832 ;
833
834
835
836 /* alength(array) */
837
838 p_expr : ALENGTH_FUNC LPAREN ID RPAREN
839 {
840 check_array($3) ;
841 code_array($3) ;
842 code2(_BUILTIN, bi_alength) ;
843 }
844 ;
845
846 /* asort(array [, array]) */
847
848 p_expr : ASORT_FUNC mark LPAREN ID RPAREN
849 {
850 $$ = $2 ;
851 check_array($4) ;
852 code_array($4) ;
853 code1(_PUSHINT) ; code1(1) ;
854 code2(_BUILTIN, bi_asort) ;
855 }
856 | ASORT_FUNC mark LPAREN ID COMMA ID RPAREN
857 {
858 $$ = $2 ;
859 check_array($6) ;
860 code_array($6) ;
861 check_array($4) ;
862 code_array($4) ;
863 code1(_PUSHINT) ; code1(2) ;
864 code2(_BUILTIN, bi_asort) ;
865 }
866 ;
867
868 /* match(expr, RE) */
869
870 p_expr : MATCH_FUNC LPAREN expr COMMA re_arg COMMA ID RPAREN
871 { $$ = $3 ;
872 check_array($7);
873 code_array($7);
874 code1(_PUSHINT); code1(3);
875 code2(_BUILTIN, bi_match) ;
876 }
877 | MATCH_FUNC LPAREN expr COMMA re_arg RPAREN
878 { $$ = $3 ;
879 code1(_PUSHINT); code1(2);
880 code2(_BUILTIN, bi_match) ;
881 }
882 ;
883 /*
884 p_expr : MATCH_FUNC LPAREN expr COMMA re_arg RPAREN
885 { $$ = $3 ;
886 code2(_BUILTIN, bi_match) ;
887 }
888 ;
889 */
890
891
892 re_arg : expr
893 {
894 INST *p1 = CDP($1) ;
895
896 if ( p1 == code_ptr - 2 )
897 {
898 if ( p1->op == _MATCH0 ) RE_as_arg() ;
899 else
900 if ( p1->op == _PUSHS )
901 { CELL *cp = ZMALLOC(CELL) ;
902
903 cp->type = C_STRING ;
904 cp->ptr = p1[1].ptr ;
905 cast_to_RE(cp) ;
906 p1->op = _PUSHC ;
907 p1[1].ptr = (PTR) cp ;
908 }
909 }
910 }
911
912
913
914 /* exit_statement */
915 statement : EXIT separator
916 { $$ = code_offset ;
917 code1(_EXIT0) ; }
918 | EXIT expr separator
919 { $$ = $2 ; code1(_EXIT) ; }
920 | ABORT separator
921 { $$ = code_offset ;
922 code1(_ABORT0) ; }
923 | ABORT expr separator
924 { $$ = $2 ; code1(_ABORT) ; }
925
926 return_statement : RETURN separator
927 { $$ = code_offset ;
928 code1(_RET0) ; }
929 | RETURN expr separator
930 { $$ = $2 ; code1(_RET) ; }
931
932
933 /* getline */
934
935 p_expr : getline %prec GETLINE
936 { $$ = code_offset ;
937 code2(F_PUSHA, &field[0]) ;
938 code1(_PUSHINT) ; code1(0) ;
939 code2(_BUILTIN, bi_getline) ;
940 getline_flag = 0 ;
941 }
942 | getline fvalue %prec GETLINE
943 { $$ = $2 ;
944 code1(_PUSHINT) ; code1(0) ;
945 code2(_BUILTIN, bi_getline) ;
946 getline_flag = 0 ;
947 }
948 | getline_file p_expr %prec IO_IN
949 { code1(_PUSHINT) ; code1(F_IN) ;
950 code2(_BUILTIN, bi_getline) ;
951 /* getline_flag already off in yylex() */
952 }
953 | p_expr COPROCESS GETLINE
954 { code2(F_PUSHA, &field[0]) ;
955 code1(_PUSHINT) ; code1(COP_IN) ;
956 code2(_BUILTIN, bi_getline) ;
957 }
958 | p_expr COPROCESS GETLINE fvalue
959 {
960 code1(_PUSHINT) ; code1(COP_IN) ;
961 code2(_BUILTIN, bi_getline) ;
962 }
963 | p_expr PIPE GETLINE
964 { code2(F_PUSHA, &field[0]) ;
965 code1(_PUSHINT) ; code1(PIPE_IN) ;
966 code2(_BUILTIN, bi_getline) ;
967 }
968 | p_expr PIPE GETLINE fvalue
969 {
970 code1(_PUSHINT) ; code1(PIPE_IN) ;
971 code2(_BUILTIN, bi_getline) ;
972 }
973 ;
974
975 getline : GETLINE { getline_flag = 1 ; }
976
977 fvalue : lvalue | field ;
978
979 getline_file : getline IO_IN
980 { $$ = code_offset ;
981 code2(F_PUSHA, field+0) ;
982 }
983 | getline fvalue IO_IN
984 { $$ = $2 ; }
985 ;
986
987 /*==========================================
988 sub and gsub
989 ==========================================*/
990
991 p_expr : gensub LPAREN re_arg COMMA expr COMMA expr gensub_back
992 {
993 INST *p5 = CDP($5) ;
994 INST *p8 = CDP($8) ;
995
996 if ( p8 - p5 == 2 && p5->op == _PUSHS )
997 { /* cast from STRING to REPL at compile time */
998 CELL *cp = ZMALLOC(CELL) ;
999 cp->type = C_STRING ;
1000 cp->ptr = p5[1].ptr ;
1001 cast_to_REPL(cp) ;
1002 p5->op = _PUSHC ;
1003 p5[1].ptr = (PTR) cp ;
1004 }
1005 code2(_BUILTIN, $1) ;
1006 $$ = $3 ;
1007 }
1008 ;
1009
1010 gensub : GENSUB { $$ = bi_gensub ; }
1011 ;
1012
1013 p_expr : sub_or_gsub LPAREN re_arg COMMA expr sub_back
1014 {
1015 INST *p5 = CDP($5) ;
1016 INST *p6 = CDP($6) ;
1017
1018 if ( p6 - p5 == 2 && p5->op == _PUSHS )
1019 { /* cast from STRING to REPL at compile time */
1020 CELL *cp = ZMALLOC(CELL) ;
1021 cp->type = C_STRING ;
1022 cp->ptr = p5[1].ptr ;
1023 cast_to_REPL(cp) ;
1024 p5->op = _PUSHC ;
1025 p5[1].ptr = (PTR) cp ;
1026 }
1027 code2(_BUILTIN, $1) ;
1028 $$ = $3 ;
1029 }
1030 ;
1031
1032 sub_or_gsub : SUB { $$ = bi_sub ; }
1033 | GSUB { $$ = bi_gsub ; }
1034 ;
1035
1036
1037 sub_back : RPAREN /* substitute into $0 */
1038 { $$ = code_offset ;
1039 code2(F_PUSHA, &field[0]) ;
1040 }
1041
1042 | COMMA fvalue RPAREN
1043 { $$ = $2 ; }
1044 ;
1045
1046 gensub_back : RPAREN /* substitute into $0 */
1047 { $$ = code_offset ;
1048 code2(F_PUSHA, &field[0]) ;
1049 }
1050 | COMMA p_expr RPAREN
1051 { $$ = $2 ; }
1052 ;
1053
1054 /*================================================
1055 user defined functions
1056 *=================================*/
1057
1058 function_def : funct_start block
1059 {
1060 resize_fblock($1) ;
1061 restore_ids() ;
1062 switch_code_to_main() ;
1063 }
1064 ;
1065
1066
1067 funct_start : funct_head LPAREN f_arglist RPAREN
1068 {
1069 eat_nl() ;
1070 scope = SCOPE_FUNCT ;
1071 active_funct = $1 ;
1072 *main_code_p = active_code ;
1073
1074 $1->nargs = $3 ;
1075 if ( $3 )
1076 $1->typev = (char *)
1077 memset( zmalloc($3), ST_LOCAL_NONE, $3) ;
1078 else $1->typev = (char *) 0 ;
1079
1080 code_ptr = code_base =
1081 (INST *) zmalloc(INST_BYTES(PAGESZ));
1082 code_limit = code_base + PAGESZ ;
1083 code_warn = code_limit - CODEWARN ;
1084 }
1085 ;
1086
1087 funct_head : FUNCTION ID
1088 { FBLOCK *fbp ;
1089
1090 if ( $2->type == ST_NONE )
1091 {
1092 $2->type = ST_FUNCT ;
1093 fbp = $2->stval.fbp =
1094 (FBLOCK *) zmalloc(sizeof(FBLOCK)) ;
1095 fbp->name = $2->name ;
1096 fbp->code = (INST*) 0 ;
1097 }
1098 else
1099 {
1100 type_error( $2 ) ;
1101
1102 /* this FBLOCK will not be put in
1103 the symbol table */
1104 fbp = (FBLOCK*) zmalloc(sizeof(FBLOCK)) ;
1105 fbp->name = "" ;
1106 }
1107 $$ = fbp ;
1108 }
1109
1110 | FUNCTION FUNCT_ID
1111 {
1112 $$ = $2 ;
1113 if ( $2->code )
1114 compile_error("redefinition of %s" , $2->name) ;
1115 }
1116 ;
1117
1118 f_arglist : /* empty */ { $$ = 0 ; }
1119 | f_args
1120 ;
1121
1122 f_args : ID
1123 {
1124 $1 = save_id($1->name) ;
1125 $1->type = ST_LOCAL_NONE ;
1126 $1->offset = 0 ;
1127 $$ = 1 ;
1128 }
1129 | f_args COMMA ID
1130 {
1131 if ( is_local($3) )
1132 compile_error("%s is duplicated in argument list",
1133 $3->name) ;
1134 else
1135 { $3 = save_id($3->name) ;
1136 $3->type = ST_LOCAL_NONE ;
1137 $3->offset = $1 ;
1138 $$ = $1 + 1 ;
1139 }
1140 }
1141 ;
1142
1143 outside_error : error
1144 { /* we may have to recover from a bungled function
1145 definition */
1146 /* can have local ids, before code scope
1147 changes */
1148 restore_ids() ;
1149
1150 switch_code_to_main() ;
1151 }
1152 ;
1153
1154 /* a call to a user defined function */
1155
1156 p_expr : FUNCT_ID mark call_args
1157 { $$ = $2 ;
1158 code2(_CALL, $1) ;
1159
1160 if ( $3 ) code1($3->arg_num+1) ;
1161 else code1(0) ;
1162
1163 check_fcall($1, scope, code_move_level, active_funct,
1164 $3, token_lineno) ;
1165 }
1166 ;
1167
1168 call_args : LPAREN RPAREN
1169 { $$ = (CA_REC *) 0 ; }
1170 | ca_front ca_back
1171 { $$ = $2 ;
1172 $$->link = $1 ;
1173 $$->arg_num = $1 ? $1->arg_num+1 : 0 ;
1174 }
1175 ;
1176
1177 /* The funny definition of ca_front with the COMMA bound to the ID is to
1178 force a shift to avoid a reduce/reduce conflict
1179 ID->id or ID->array
1180
1181 Or to avoid a decision, if the type of the ID has not yet been
1182 determined
1183 */
1184
1185 ca_front : LPAREN
1186 { $$ = (CA_REC *) 0 ; }
1187 | ca_front expr COMMA
1188 { $$ = ZMALLOC(CA_REC) ;
1189 $$->link = $1 ;
1190 $$->type = CA_EXPR ;
1191 $$->arg_num = $1 ? $1->arg_num+1 : 0 ;
1192 $$->call_offset = code_offset ;
1193 }
1194 | ca_front ID COMMA
1195 { $$ = ZMALLOC(CA_REC) ;
1196 $$->link = $1 ;
1197 $$->arg_num = $1 ? $1->arg_num+1 : 0 ;
1198
1199 code_call_id($$, $2) ;
1200 }
1201 ;
1202
1203 ca_back : expr RPAREN
1204 { $$ = ZMALLOC(CA_REC) ;
1205 $$->type = CA_EXPR ;
1206 $$->call_offset = code_offset ;
1207 }
1208
1209 | ID RPAREN
1210 { $$ = ZMALLOC(CA_REC) ;
1211 code_call_id($$, $1) ;
1212 }
1213 ;
1214
1215
1216
1217
1218 %%
1219
1220 /* resize the code for a user function */
1221
resize_fblock(fbp)1222 static void resize_fblock( fbp )
1223 FBLOCK *fbp ;
1224 {
1225 CODEBLOCK *p = ZMALLOC(CODEBLOCK) ;
1226 unsigned dummy ;
1227
1228 code2op(_RET0, _HALT) ;
1229 /* make sure there is always a return */
1230
1231 *p = active_code ;
1232 fbp->code = code_shrink(p, &dummy) ;
1233 /* code_shrink() zfrees p */
1234
1235 add_to_fdump_list(fbp) ;
1236 }
1237
1238
1239 /* convert FE_PUSHA to FE_PUSHI
1240 or F_PUSH to F_PUSHI
1241 */
1242
field_A2I()1243 static void field_A2I()
1244 { CELL *cp ;
1245
1246 if ( code_ptr[-1].op == FE_PUSHA &&
1247 code_ptr[-1].ptr == (PTR) 0)
1248 /* On most architectures, the two tests are the same; a good
1249 compiler might eliminate one. On LM_DOS, and possibly other
1250 segmented architectures, they are not */
1251 { code_ptr[-1].op = FE_PUSHI ; }
1252 else
1253 {
1254 cp = (CELL *) code_ptr[-1].ptr ;
1255
1256 if ( cp == field ||
1257 ( cp > NF && cp <= LAST_PFIELD ))
1258 {
1259 code_ptr[-2].op = _PUSHI ;
1260 }
1261 else if ( cp == NF )
1262 { code_ptr[-2].op = NF_PUSHI ;
1263 code_ptr-- ; }
1264
1265 else
1266 {
1267 code_ptr[-2].op = F_PUSHI ;
1268 code_ptr -> op = field_addr_to_index( code_ptr[-1].ptr ) ;
1269 code_ptr++ ;
1270 }
1271 }
1272 }
1273
1274 /* we've seen an ID in a context where it should be a VAR,
1275 check that's consistent with previous usage */
1276
check_var(p)1277 static void check_var( p )
1278 register SYMTAB *p ;
1279 {
1280 switch(p->type)
1281 {
1282 case ST_NONE : /* new id */
1283 st_none:
1284 p->type = ST_VAR ;
1285 p->stval.cp = ZMALLOC(CELL) ;
1286 p->stval.cp->type = C_NOINIT ;
1287 break ;
1288
1289 case ST_LOCAL_NONE :
1290 p->type = ST_LOCAL_VAR ;
1291 active_funct->typev[p->offset] = ST_LOCAL_VAR ;
1292 break ;
1293
1294 case ST_VAR :
1295 case ST_LOCAL_VAR : break ;
1296
1297 case ST_BUILTIN :
1298 if (is_ext_builtin(p->name))
1299 goto st_none;
1300
1301 default :
1302 type_error(p) ;
1303 break ;
1304 }
1305 }
1306
1307 /* we've seen an ID in a context where it should be an ARRAY,
1308 check that's consistent with previous usage */
check_array(p)1309 static void check_array(p)
1310 register SYMTAB *p ;
1311 {
1312 switch(p->type)
1313 {
1314 case ST_NONE : /* a new array */
1315 st_none:
1316 p->type = ST_ARRAY ;
1317 p->stval.array = new_ARRAY() ;
1318 break ;
1319
1320 case ST_ARRAY :
1321 case ST_LOCAL_ARRAY :
1322 break ;
1323
1324 case ST_LOCAL_NONE :
1325 p->type = ST_LOCAL_ARRAY ;
1326 active_funct->typev[p->offset] = ST_LOCAL_ARRAY ;
1327 break ;
1328
1329 case ST_BUILTIN :
1330 if (is_ext_builtin(p->name))
1331 goto st_none;
1332
1333 default : type_error(p) ; break ;
1334 }
1335 }
1336
code_array(p)1337 static void code_array(p)
1338 register SYMTAB *p ;
1339 {
1340 if ( is_local(p) ) code2op(LA_PUSHA, p->offset) ;
1341 else code2(A_PUSHA, p->stval.array) ;
1342 }
1343
1344
1345 /* we've seen an ID as an argument to a user defined function */
1346
code_call_id(p,ip)1347 static void code_call_id( p, ip )
1348 register CA_REC *p ;
1349 register SYMTAB *ip ;
1350 { static CELL dummy ;
1351
1352 p->call_offset = code_offset ;
1353 /* This always get set now. So that fcall:relocate_arglist
1354 works. */
1355
1356 switch( ip->type )
1357 {
1358 case ST_VAR :
1359 p->type = CA_EXPR ;
1360 code2(_PUSHI, ip->stval.cp) ;
1361 break ;
1362
1363 case ST_LOCAL_VAR :
1364 p->type = CA_EXPR ;
1365 code2op(L_PUSHI, ip->offset) ;
1366 break ;
1367
1368 case ST_ARRAY :
1369 p->type = CA_ARRAY ;
1370 code2(A_PUSHA, ip->stval.array) ;
1371 break ;
1372
1373 case ST_LOCAL_ARRAY :
1374 p->type = CA_ARRAY ;
1375 code2op(LA_PUSHA, ip->offset) ;
1376 break ;
1377
1378 /* not enough info to code it now; it will have to
1379 be patched later */
1380
1381 case ST_NONE :
1382 p->type = ST_NONE ;
1383 p->sym_p = ip ;
1384 code2(_PUSHI, &dummy) ;
1385 break ;
1386
1387 case ST_LOCAL_NONE :
1388 p->type = ST_LOCAL_NONE ;
1389 p->type_p = & active_funct->typev[ip->offset] ;
1390 code2op(L_PUSHI, ip->offset) ;
1391 break ;
1392
1393
1394 #ifdef DEBUG
1395 default :
1396 bozo("code_call_id") ;
1397 #endif
1398
1399 }
1400 }
1401
1402 /* an RE by itself was coded as _MATCH0 , change to
1403 push as an expression */
1404
RE_as_arg()1405 static void RE_as_arg()
1406 { CELL *cp = ZMALLOC(CELL) ;
1407
1408 code_ptr -= 2 ;
1409 cp->type = C_RE ;
1410 cp->ptr = code_ptr[1].ptr ;
1411 code2(_PUSHC, cp) ;
1412 }
1413
1414 /* reset the active_code back to the MAIN block */
1415 static void
switch_code_to_main()1416 switch_code_to_main()
1417 {
1418 switch(scope)
1419 {
1420 case SCOPE_BEGIN :
1421 *begin_code_p = active_code ;
1422 active_code = *main_code_p ;
1423 break ;
1424
1425 case SCOPE_END :
1426 *end_code_p = active_code ;
1427 active_code = *main_code_p ;
1428 break ;
1429
1430 case SCOPE_FUNCT :
1431 active_code = *main_code_p ;
1432 break ;
1433
1434 case SCOPE_MAIN :
1435 break ;
1436 }
1437 active_funct = (FBLOCK*) 0 ;
1438 scope = SCOPE_MAIN ;
1439 }
1440
1441
1442 void
parse()1443 parse()
1444 {
1445 if ( yyparse() || compile_error_count != 0 ) exit(2) ;
1446 scan_cleanup() ;
1447 set_code() ;
1448
1449 /* code must be set before call to resolve_fcalls() */
1450 if ( resolve_list ) resolve_fcalls() ;
1451
1452 if ( compile_error_count != 0 ) exit(2) ;
1453 dump_code() ;
1454 }
1455
1456