1 /*
2 scan.l -- Algae lexical scanner.
3
4 Copyright (C) 1994-2002 K. Scott Hunziker.
5 Copyright (C) 1990-1994 The Boeing Company.
6
7 See the file COPYING for license, warranty, and permission details.
8 */
9
10 /* $Id: scan.l,v 1.12 2003/08/07 02:34:09 ksh Exp $ */
11
12 %{
13
14 #define PSR_DATA_HERE
15
16 #include "algae.h"
17 #include <string.h>
18 #include <ctype.h>
19 #include "entity.h"
20 #include "psr.h"
21 #include "code.h"
22 #include "parse.h"
23 #include "mem.h"
24 #include "pmem.h"
25 #include "exception.h"
26 #include "message.h"
27 #include "print.h"
28 #include "printf.h"
29 #include "error.h"
30
31 int PROTO(yylex, (YYSTYPE *lexval)) ;
32 #define YY_DECL int yylex( lexval ) YYSTYPE *lexval;
33
34 static int PROTO( line_input, ( char *buff, int buffer_size ) );
35
36 #if HAVE_LIBREADLINE
37 extern int use_readline;
38
39 char *PROTO( readline, ( char *prompt ) );
40 char *PROTO( readline_fgets, ( char *s, int n, char *prompt ) );
41 #endif /* HAVE_LIBREADLINE */
42
43 int issue_prompt; /* Give prompt if set. */
44
45 extern double atof() ;
46 static char *PROTO(rm_escape, (char *)) ;
47 static void PROTO(add_to_string, (char *,int)) ;
48 static char *PROTO(get_string, (void)) ;
49
50
51 static int next_offset ;
52 static int max_size ; /* max size string curr_line will hold */
53 static int continuing ; /* helps with prompt */
54 static int eof_flag ; /* only used when non-interactive,
55 to help when last line lacks a newline */
56
57
58
59
60 #define ADJ_OFFSET() curr_offset = next_offset ;\
61 next_offset += yyleng
62
yywrap()63 int yywrap() { return 1; }
64
65 /* this needs to be expanded for non-interactive input */
66 #undef YY_INPUT
67 #define YY_INPUT(buff, result, max_size) \
68 do {\
69 result = line_input((char *)buff, max_size) ; \
70 if ( flush_flag )\
71 { flush_flag = 0 ; BEGIN(0) ; }}while(0)
72
73 static char *input_string; /* input, if commands not coming from file */
74
75 #define FGETS(s, n, stream) \
76 ((stream) ? fgets (s, n, stream) : sgets (s, n, &input_string))
77
78 /*
79 * Like fgets(3), except it reads from the string pointed to by `c' and
80 * then modifies that string to point to the remainder.
81 */
82 static char *
sgets(s,n,c)83 sgets (s, n, c)
84 char *s;
85 int n;
86 char **c;
87 {
88 char *e, *p;
89 int i;
90
91 p = *c;
92 if (!*p) return NULL;
93
94 e = strchr (p, '\n');
95 i = e ? e-p+1 : strlen (p);
96 n--;
97 if (i > n) i = n;
98 memcpy (s, p, i);
99 s[i] = '\0';
100 *c = p+i;
101 return s;
102 }
103
104 static int
line_input(buff,buffer_size)105 line_input (buff, buffer_size)
106 char *buff;
107 int buffer_size;
108 {
109 int len;
110 char *prompt, *more_input;
111
112 SIGINT_RAISE_ON ();
113
114 again:
115 if (issue_prompt && yyin)
116 {
117 if (continuing)
118 {
119 prompt = get_prompt (1);
120 continuing = 0;
121 }
122 else
123 prompt = get_prompt (brace_cnt || flow_cnt);
124
125 #if HAVE_LIBREADLINE
126 if (use_readline)
127 more_input = readline_fgets (buff, buffer_size, prompt);
128 else
129 #endif
130 {
131 #ifdef VAX_VMS
132 xputc ('\n', stderr);
133 #endif
134 xfputs (prompt, stderr);
135 more_input = fgets (buff, buffer_size, yyin);
136 }
137 FREE (prompt);
138 }
139 else
140 more_input = FGETS (buff, buffer_size, yyin);
141
142 if (more_input)
143 {
144 curr_line_no++;
145 len = strlen (buff);
146
147 if (len > max_size)
148 {
149 if (max_size)
150 FREE (curr_line);
151 curr_line = MALLOC (len + 1);
152 max_size = len;
153 }
154
155 (void) strcpy (curr_line, buff);
156 next_offset = 0;
157 }
158 else
159 {
160 if (errno == EINTR)
161 {
162 errno = 0;
163 goto again;
164 }
165 else if (yyin && ferror (yyin))
166 {
167 p_error ("%s", strerror (errno));
168 exit (1);
169 }
170 else /* EOF */
171 len = 0;
172 }
173
174 SIGINT_RAISE_OFF ();
175 return len;
176 }
177
178
179 #if HAVE_LIBREADLINE
180 extern void PROTO (add_history, (char * string));
181
182 char *
readline_fgets(s,n,prompt)183 readline_fgets( s, n, prompt )
184 char *s;
185 int n;
186 char *prompt;
187 {
188 char *line_read = readline( prompt );
189 if ( line_read ) {
190 if ( *line_read ) add_history( line_read );
191 strncpy( s, line_read, n-2 );
192 s[n-2] = '\0';
193 n = strlen( s );
194 s[n] = '\n';
195 s[n+1] = '\0';
196 free( line_read );
197 line_read = s;
198 }
199
200 return line_read;
201 }
202 #endif /* HAVE_LIBREADLINE */
203
204
205 %}
206
207 DIGIT [0-9]
208 INTEGER {DIGIT}+
209 PURE_DECIMAL ({DIGIT}+"."|"."{DIGIT}){DIGIT}*
210 EXPONENT [eE][-+]?{DIGIT}+
211 DECIMAL ({INTEGER}{EXPONENT}|{PURE_DECIMAL}{EXPONENT}?)
212
213
214 ID [_A-Za-z\$][_A-Za-z0-9\$]*
215
216 WS [ \t\v\f\r]
217 TERMINATOR [;?]
218
219 LITERAL [()=\[\]:!{}.^]
220
221 ADDOP [-+]
222 MULOP [*/%@]
223 RELOP [<>]=?|==|!=
224 CAT ,
225 COMMENT #[^\n]*
226 CONTINUE \\{WS}*{COMMENT}?\n
227 STRBODY ([^\\\"\n]|\\.)*
228
229 %x FLUSH
230 %x STRINGING
231
232 %%
233
234 /* this always gets done */
235 if ( flush_flag )
236 { continuing = 0 ; BEGIN(FLUSH) ; }
237
238 <FLUSH>.*\n { /* eat the line */
239 BEGIN(0) ;
240 }
241
242
243
244 /*----------------------------------------*/
245
246 /* keywords are hardwired into the scanner */
247
248 NULL { ADJ_OFFSET() ; return _NULL ; }
249 if { ADJ_OFFSET() ; return IF ; }
250 else { ADJ_OFFSET() ; return ELSE ; }
251 elseif { ADJ_OFFSET() ; return ELSEIF ; }
252 while { ADJ_OFFSET() ; return WHILE ; }
253 break { ADJ_OFFSET() ; return BREAK ; }
254 continue { ADJ_OFFSET() ; return CONTINUE ; }
255 function { ADJ_OFFSET() ; return FUNCTION_ ; }
256 for { ADJ_OFFSET() ; return FOR ; }
257 in { ADJ_OFFSET() ; return IN ; }
258 return { ADJ_OFFSET() ; return RETURN ; }
259 local { ADJ_OFFSET() ; return LOCAL ; }
260 self { ADJ_OFFSET() ; return SELF ; }
261 \$\$ { ADJ_OFFSET() ; return SYMBOL_TABLE ; }
262 try { ADJ_OFFSET() ; return TRY ; }
263 catch { ADJ_OFFSET() ; return CATCH ; }
264 veil { ADJ_OFFSET() ; return VEIL ; }
265
266 {CONTINUE} { continuing = 1 ; }
267
268 {COMMENT}?\n? { ADJ_OFFSET() ;
269 if ( brace_cnt == 0 ) return '\n' ;
270 }
271
272 {TERMINATOR}{WS}*{CONTINUE} { ADJ_OFFSET() ;
273 continuing = 1 ;
274 lexval->ival = 0 ;
275 return *yytext ;
276 }
277
278 {TERMINATOR}{WS}*{COMMENT}?\n { ADJ_OFFSET() ;
279 lexval->ival = 1 ;
280 return *yytext ;
281 }
282
283 {TERMINATOR} { ADJ_OFFSET() ;
284 lexval->ival = 0 ;
285 return *yytext ;
286 }
287
288 {LITERAL} { ADJ_OFFSET() ; return *yytext ; }
289
290 {WS}+ { ADJ_OFFSET() ; }
291
292 {ID} { ADJ_OFFSET() ;
293 lexval->id_name = strcpy(MALLOC(yyleng+1),yytext) ;
294 return ID ;
295 }
296
297 {INTEGER} { ADJ_OFFSET() ;
298 errno = 0;
299 lexval->datum = new_INT(atoi(yytext)) ;
300 if ( errno ) {
301 p_error( "Value out of range." );
302 raise_exception();
303 }
304 return CONSTANT ;
305 }
306
307 {DECIMAL} { ADJ_OFFSET() ;
308 errno = 0;
309 lexval->datum = new_REAL(atof(yytext)) ;
310 if ( errno ) {
311 p_error( "Value out of range." );
312 raise_exception();
313 }
314 return CONSTANT ;
315 }
316
317 \" { ADJ_OFFSET() ;
318 BEGIN(STRINGING) ;
319 }
320
321 <STRINGING>{STRBODY}{CONTINUE} {
322 char *s = yytext + yyleng - 2 ;
323
324 continuing = 1 ;
325 assert(s>=yytext) ;
326 while ( *s != '\\')
327 {
328 assert(s>yytext) ;
329 s-- ;
330 }
331 *s = 0 ;
332 add_to_string(yytext,s-yytext) ;
333 }
334
335 <STRINGING>{STRBODY}[\"\n] { char * s ;
336 int last ;
337
338 ADJ_OFFSET() ;
339 last = yytext[yyleng-1] ;
340 yytext[yyleng-1] = 0 ;
341 add_to_string(yytext,yyleng-1) ;
342 s = rm_escape(get_string()) ;
343
344 if ( last == '\n' )
345 {
346 p_error("Unterminated character scalar:\n\"%s", s) ;
347 FREE(s) ;
348 raise_exception() ;
349 }
350 /* found end of string */
351 lexval->cs = strcpy(pmem( strlen(s) + 1 ),s) ;
352 FREE(s) ;
353 BEGIN(0) ;
354 return STRING ;
355 }
356
357 <STRINGING>{STRBODY} { /* EOF */
358 char *s ;
359
360 ADJ_OFFSET() ;
361 add_to_string(yytext,yyleng) ;
362 p_error("Unterminated character scalar at EOF:\n\"%s",
363 s = rm_escape(get_string())) ;
364 FREE(s) ;
365 raise_exception() ;
366 }
367
368
369 {ADDOP} { ADJ_OFFSET() ;
370 lexval->ival = *yytext == '+' ? OP_ADD : OP_SUB ;
371 return ADDOP ;
372 }
373
374 {MULOP} { ADJ_OFFSET() ;
375 lexval->ival = *yytext == '*' ? OP_MUL :
376 *yytext == '/' ? OP_DIV :
377 *yytext == '%' ? OP_MOD :
378 OP_PROD;
379 return MULOP ;
380 }
381
382 {RELOP} { ADJ_OFFSET() ;
383
384 switch( yytext[0] )
385 {
386 case '=' :
387 lexval->ival = OP_EQ ;
388 break ;
389
390 case '<' :
391 lexval->ival = yytext[1] ? OP_LTE : OP_LT ;
392 break ;
393
394 case '>' :
395 lexval->ival = yytext[1] ? OP_GTE : OP_GT ;
396 break ;
397
398 case '!' :
399 lexval->ival = OP_NE ;
400 break ;
401
402 default :
403 wipeout("scanner relop") ;
404 }
405 return RELOP ;
406 }
407
408
409 [-+*/@%]= { ADJ_OFFSET() ;
410
411 switch (yytext[0])
412 {
413 case '-':
414 lexval->ival = OP_SUB;
415 break;
416 case '+':
417 lexval->ival = OP_ADD;
418 break;
419 case '*':
420 lexval->ival = OP_MUL;
421 break;
422 case '/':
423 lexval->ival = OP_DIV;
424 break;
425 case '@':
426 lexval->ival = OP_PROD;
427 break;
428 case '%':
429 lexval->ival = OP_MOD;
430 break;
431 default:
432 wipeout ("scanner binop_assign");
433 }
434
435 return BINOP_ASSIGN ;
436 }
437
438
439 "||" { ADJ_OFFSET() ; return SHORT_OR ; }
440 "&&" { ADJ_OFFSET() ; return SHORT_AND ; }
441
442 "|" { ADJ_OFFSET() ; return OR ; }
443 "&" { ADJ_OFFSET() ; return AND ; }
444 {CAT} { ADJ_OFFSET() ; return CAT ; }
445 \' { ADJ_OFFSET() ; return TRANS ; }
446
447
448 . { /* anything not covered */
449 ADJ_OFFSET() ;
450 if ( yytext[0] )
451 yyerror("unexpected input character -- ignored") ;
452 }
453
454
455 <<EOF>> { if ( !interactive && eof_flag == 0 )
456 { eof_flag = 1 ; return '\n' /* phoney */ ; }
457 else return EOF ;
458 }
459
460 %%
461
462
463 typedef struct ps_state
464 {
465 struct ps_state *link;
466 YY_BUFFER_STATE buffer_state;
467 FILE *yyin;
468 char *input_string;
469 char *curr_line;
470 int curr_line_no;
471 char *curr_file;
472 int max_size;
473 int eof_flag;
474
475 int flush_flag;
476 int interaction;
477 int prompting;
478 int brace_cnt;
479 int flow_cnt;
480 CODE_BLOCK code_block;
481 struct pm saved_pmem;
482 void *jmp_top;
483 void *bc_top;
484 }
485 PS_STATE;
486
487 extern int interactive;
488
489 /*
490 * The `stdin_is_interactive' flag may be set in `initialize'. If it's set,
491 * then we use interactive mode for stdin whether it looks like a terminal
492 * or not.
493 */
494 extern int stdin_is_interactive;
495
496 static PS_STATE *ps_state_list;
497
498 /*
499 * Use `push_parser_scanner_state' to save the parser's state (the file
500 * from which it's reading, along with globals like `interactive', etc.)
501 * and change to a new one.
502 */
503
504 void
505 push_parser_scanner_state (fp, file_name)
506 FILE *fp;
507 char *file_name;
508 {
509 /*
510 * If `fp' is NULL, then `file_name' is a pointer to a command string.
511 * Otherwise, `fp' points to the input file, and `file_name' is its name.
512 * The `file_name' string is not FREE'd.
513 */
514
515 PS_STATE *p = MALLOC (sizeof (PS_STATE));
516
517 p->link = ps_state_list;
518 ps_state_list = p;
519
520 p->buffer_state = YY_CURRENT_BUFFER;
521 p->yyin = yyin;
522 yyin = fp;
523 p->input_string = input_string;
524 input_string = fp ? NULL : file_name;
525 yy_switch_to_buffer (yy_create_buffer (yyin, YY_BUF_SIZE));
526
527 p->interaction = interactive;
528 p->prompting = issue_prompt;
529
530 issue_prompt = fp && isatty (fileno (fp));
531 interactive = issue_prompt || stdin_is_interactive && fp == stdin;
532
533 continuing = 0;
534
535 p->curr_line = curr_line;
536 curr_line = (char *) 0;
537 p->max_size = max_size;
538 max_size = 0;
539 p->curr_line_no = curr_line_no;
540 curr_line_no = 0;
541 p->curr_file = curr_file;
542 curr_file = file_name;
543
544 p->eof_flag = eof_flag;
545 eof_flag = 0;
546 p->flush_flag = flush_flag;
547 flush_flag = 0;
548 p->brace_cnt = brace_cnt;
549 brace_cnt = 0;
550 p->flow_cnt = flow_cnt;
551 flow_cnt = 0;
552
553 p->code_block = the_code_block;
554 new_code_block ();
555
556 p->saved_pmem = active_pmem;
557 (void) memset (&active_pmem, 0, sizeof (active_pmem));
558
559 p->jmp_top = jmp_top_push ();
560 p->bc_top = bc_top_push ();
561 }
562
563 void
564 pop_parser_scanner_state ()
565 {
566 PS_STATE *p;
567
568 assert (ps_state_list);
569
570 p = ps_state_list;
571 ps_state_list = p->link;
572
573 yyin = p->yyin;
574 input_string = p->input_string;
575 yy_delete_buffer (YY_CURRENT_BUFFER);
576 if (p->buffer_state)
577 yy_switch_to_buffer (p->buffer_state);
578
579 interactive = p->interaction;
580 issue_prompt = p->prompting;
581
582 continuing = 0;
583
584 if (curr_line)
585 FREE (curr_line);
586 curr_line = p->curr_line;
587 max_size = p->max_size;
588 curr_line_no = p->curr_line_no;
589 curr_file = p->curr_file;
590
591 flush_flag = p->flush_flag;
592 eof_flag = p->eof_flag;
593 brace_cnt = p->brace_cnt;
594 flow_cnt = p->flow_cnt;
595
596 FREE (code_base);
597 the_code_block = p->code_block;
598
599 free_all_pmem ();
600 active_pmem = p->saved_pmem;
601
602 jmp_top_pop (p->jmp_top);
603 bc_top_pop (p->bc_top);
604
605 FREE (p);
606 }
607
608
609 /*---------- process escape characters in strings ----*/
610
611
612 #define isoctal(x) ((x)>='0'&&(x)<='7')
613
614 #define hex_value(x) ((x)<='F'?(x)-'A'+10:(x)-'a'+10)
615
616
617 /* process one , two or three octal digits
618 moving a pointer forward by reference */
619 static int octal( start_p )
620 char **start_p ;
621 { register char *p = *start_p ;
622 register unsigned x ;
623
624 x = *p++ - '0' ;
625 if ( isoctal(*p) )
626 {
627 x = (x<<3) + *p++ - '0' ;
628 if ( isoctal(*p) ) x = (x<<3) + *p++ - '0' ;
629 }
630 *start_p = p ;
631 return x & 0xff ;
632 }
633
634 /* process one or two hex digits
635 moving a pointer forward by reference */
636
637 static int hex( start_p )
638 char **start_p ;
639 { register unsigned char *p = (unsigned char*) *start_p ;
640 register unsigned x ;
641
642 if ( isdigit(*p) ) x = *p - '0' ;
643 else x = hex_value(*p) ;
644 p++ ;
645
646 if ( isdigit(*p) ) x = (x<<4) + *p++ - '0' ;
647 else
648 if ( isxdigit(*p) )
649 { x = (x<<4) + hex_value(*p) ; p++ ; }
650
651 *start_p = (char *) p ;
652 return x ;
653 }
654
655 #define NUM_ESCAPE_CHARS 8
656
657 static struct { char in , out ; }
658 escape_test[NUM_ESCAPE_CHARS+1] = {
659 'n' , '\n', /* newline */
660 't' , '\t', /* tab */
661 'f' , '\f', /* formfeed */
662 'b' , '\b', /* backspace */
663 'r' , '\r', /* carriage return */
664 'a' , '\07', /* bell */
665 'v' , '\013', /* vertical tab */
666 'e' , '\033', /* escape */
667 0 , 0
668 };
669
670 /* all other escaped chars stand for themselves */
671
672
673 /* process the escape characters in a string, in place . */
674
675 static char *rm_escape(s)
676 char *s ;
677 { char *p, *q ;
678 char *t ;
679 int i ;
680
681 q = p = s ;
682
683 while ( *p )
684 if ( *p == '\\' )
685 {
686 escape_test[NUM_ESCAPE_CHARS].in = * ++p ; /* sentinel */
687 i = 0 ;
688 while ( escape_test[i].in != *p ) i++ ;
689
690 if ( i != NUM_ESCAPE_CHARS ) /* in table */
691 {
692 p++ ; *q++ = escape_test[i].out ;
693 }
694 else
695 if ( isoctal(*p) )
696 { /* keep p in register */
697 t = p ; *q++ = octal(&t) ; p = t ;
698 }
699 else
700 if ( *p == 'x' && isxdigit(*(unsigned char*)(p+1)) )
701 {
702 t = p+1 ; *q++ = hex(&t) ; p = t ;
703 }
704 else
705 if ( *p == 0 ) *q++ = '\\' ;
706 else /* not an escape sequence */
707 *q++ = *p++ ;
708 }
709 else *q++ = *p++ ;
710
711 *q = 0 ;
712 return s ;
713 }
714
715
716 /* stuff to put multiple line strings together */
717
718 static char *the_string ; /* built it here */
719 static int len ; /* strlen(the_string) */
720
721 static char *get_string()
722 {
723 char *s = the_string ;
724 the_string = (char *) 0 ;
725 len = 0 ;
726 return s ;
727 }
728
729 static void
730 add_to_string( s, slen)
731 char *s ;
732 int slen ;
733 {
734 char *new = MALLOC(len+slen+1) ;
735 if ( the_string )
736 { (void) memcpy(new, the_string, len) ;
737 FREE(the_string) ;
738 }
739 (void) strcpy(new+len,s) ;
740 len += slen ;
741 the_string = new ;
742 }
743