1 /*
2    scan.l -- Algae lexical scanner.
3 
4    Copyright (C) 1994-2002  K. Scott Hunziker.
5    Copyright (C) 1990-1994  The Boeing Company.
6 
7    See the file COPYING for license, warranty, and permission details.
8 */
9 
10 /* $Id: scan.l,v 1.12 2003/08/07 02:34:09 ksh Exp $ */
11 
12 %{
13 
14 #define  PSR_DATA_HERE
15 
16 #include "algae.h"
17 #include <string.h>
18 #include <ctype.h>
19 #include "entity.h"
20 #include "psr.h"
21 #include "code.h"
22 #include "parse.h"
23 #include "mem.h"
24 #include "pmem.h"
25 #include "exception.h"
26 #include "message.h"
27 #include "print.h"
28 #include "printf.h"
29 #include "error.h"
30 
31 int PROTO(yylex, (YYSTYPE *lexval)) ;
32 #define YY_DECL int yylex( lexval ) YYSTYPE *lexval;
33 
34 static int PROTO( line_input, ( char *buff, int buffer_size ) );
35 
36 #if HAVE_LIBREADLINE
37 extern int use_readline;
38 
39 char *PROTO( readline, ( char *prompt ) );
40 char *PROTO( readline_fgets, ( char *s, int n, char *prompt ) );
41 #endif /* HAVE_LIBREADLINE */
42 
43 int issue_prompt;	/* Give prompt if set. */
44 
45 extern double atof() ;
46 static char *PROTO(rm_escape, (char *)) ;
47 static void PROTO(add_to_string, (char *,int)) ;
48 static char *PROTO(get_string, (void)) ;
49 
50 
51 static int next_offset ;
52 static int max_size ; /* max size string curr_line will hold */
53 static int continuing ; /* helps with prompt */
54 static int eof_flag ;   /* only used when non-interactive,
55 	to help when last line lacks a newline */
56 
57 
58 
59 
60 #define  ADJ_OFFSET()    curr_offset = next_offset ;\
61 			 next_offset += yyleng
62 
yywrap()63 int yywrap() { return 1; }
64 
65 /* this needs to be expanded for non-interactive input */
66 #undef  YY_INPUT
67 #define YY_INPUT(buff, result, max_size) \
68 	do {\
69 	    result = line_input((char *)buff, max_size) ; \
70 	    if ( flush_flag )\
71 	    { flush_flag = 0 ; BEGIN(0) ; }}while(0)
72 
73 static char *input_string;	/* input, if commands not coming from file */
74 
75 #define FGETS(s, n, stream) \
76   ((stream) ? fgets (s, n, stream) : sgets (s, n, &input_string))
77 
78 /*
79  * Like fgets(3), except it reads from the string pointed to by `c' and
80  * then modifies that string to point to the remainder.
81  */
82 static char *
sgets(s,n,c)83 sgets (s, n, c)
84      char *s;
85      int n;
86      char **c;
87 {
88   char *e, *p;
89   int i;
90 
91   p = *c;
92   if (!*p) return NULL;
93 
94   e = strchr (p, '\n');
95   i = e ? e-p+1 : strlen (p);
96   n--;
97   if (i > n) i = n;
98   memcpy (s, p, i);
99   s[i] = '\0';
100   *c = p+i;
101   return s;
102 }
103 
104 static int
line_input(buff,buffer_size)105 line_input (buff, buffer_size)
106      char *buff;
107      int buffer_size;
108 {
109   int len;
110   char *prompt, *more_input;
111 
112   SIGINT_RAISE_ON ();
113 
114 again:
115   if (issue_prompt && yyin)
116     {
117       if (continuing)
118 	{
119 	  prompt = get_prompt (1);
120 	  continuing = 0;
121 	}
122       else
123 	prompt = get_prompt (brace_cnt || flow_cnt);
124 
125 #if HAVE_LIBREADLINE
126       if (use_readline)
127 	more_input = readline_fgets (buff, buffer_size, prompt);
128       else
129 #endif
130 	{
131 #ifdef VAX_VMS
132 	  xputc ('\n', stderr);
133 #endif
134 	  xfputs (prompt, stderr);
135 	  more_input = fgets (buff, buffer_size, yyin);
136 	}
137       FREE (prompt);
138     }
139   else
140     more_input = FGETS (buff, buffer_size, yyin);
141 
142   if (more_input)
143     {
144       curr_line_no++;
145       len = strlen (buff);
146 
147       if (len > max_size)
148 	{
149 	  if (max_size)
150 	    FREE (curr_line);
151 	  curr_line = MALLOC (len + 1);
152 	  max_size = len;
153 	}
154 
155       (void) strcpy (curr_line, buff);
156       next_offset = 0;
157     }
158   else
159     {
160       if (errno == EINTR)
161 	{
162 	  errno = 0;
163 	  goto again;
164 	}
165       else if (yyin && ferror (yyin))
166 	{
167 	  p_error ("%s", strerror (errno));
168 	  exit (1);
169 	}
170       else			/* EOF */
171 	len = 0;
172     }
173 
174   SIGINT_RAISE_OFF ();
175   return len;
176 }
177 
178 
179 #if HAVE_LIBREADLINE
180 extern void PROTO (add_history, (char * string));
181 
182 char *
readline_fgets(s,n,prompt)183 readline_fgets( s, n, prompt )
184   char *s;
185   int n;
186   char *prompt;
187 {
188     char *line_read = readline( prompt );
189     if ( line_read ) {
190 	if ( *line_read ) add_history( line_read );
191 	strncpy( s, line_read, n-2 );
192 	s[n-2] = '\0';
193 	n = strlen( s );
194 	s[n] = '\n';
195 	s[n+1] = '\0';
196 	free( line_read );
197 	line_read = s;
198     }
199 
200     return line_read;
201 }
202 #endif /* HAVE_LIBREADLINE */
203 
204 
205 %}
206 
207 DIGIT		[0-9]
208 INTEGER 	{DIGIT}+
209 PURE_DECIMAL	({DIGIT}+"."|"."{DIGIT}){DIGIT}*
210 EXPONENT	[eE][-+]?{DIGIT}+
211 DECIMAL		({INTEGER}{EXPONENT}|{PURE_DECIMAL}{EXPONENT}?)
212 
213 
214 ID	[_A-Za-z\$][_A-Za-z0-9\$]*
215 
216 WS	[ \t\v\f\r]
217 TERMINATOR	[;?]
218 
219 LITERAL		[()=\[\]:!{}.^]
220 
221 ADDOP	[-+]
222 MULOP	[*/%@]
223 RELOP   [<>]=?|==|!=
224 CAT     ,
225 COMMENT	#[^\n]*
226 CONTINUE	\\{WS}*{COMMENT}?\n
227 STRBODY		([^\\\"\n]|\\.)*
228 
229 %x  FLUSH
230 %x  STRINGING
231 
232 %%
233 
234 	/* this always gets done */
235 	if ( flush_flag )
236 	{ continuing = 0 ; BEGIN(FLUSH) ; }
237 
238 <FLUSH>.*\n		{ /* eat the line */
239 			  BEGIN(0) ;
240 			}
241 
242 
243 
244         /*----------------------------------------*/
245 
246 	/* keywords are hardwired into the scanner */
247 
248 NULL		{ ADJ_OFFSET() ; return _NULL ; }
249 if		{ ADJ_OFFSET() ; return IF ; }
250 else		{ ADJ_OFFSET() ; return ELSE ; }
251 elseif		{ ADJ_OFFSET() ; return ELSEIF ; }
252 while		{ ADJ_OFFSET() ; return WHILE ; }
253 break		{ ADJ_OFFSET() ; return BREAK ; }
254 continue	{ ADJ_OFFSET() ; return CONTINUE ; }
255 function	{ ADJ_OFFSET() ; return FUNCTION_ ; }
256 for		{ ADJ_OFFSET() ; return FOR ; }
257 in		{ ADJ_OFFSET() ; return IN ; }
258 return		{ ADJ_OFFSET() ; return RETURN ; }
259 local		{ ADJ_OFFSET() ; return LOCAL ; }
260 self		{ ADJ_OFFSET() ; return SELF  ; }
261 \$\$		{ ADJ_OFFSET() ; return SYMBOL_TABLE ; }
262 try		{ ADJ_OFFSET() ; return TRY  ; }
263 catch		{ ADJ_OFFSET() ; return CATCH  ; }
264 veil		{ ADJ_OFFSET() ; return VEIL  ; }
265 
266 {CONTINUE}	{ continuing = 1 ; }
267 
268 {COMMENT}?\n?	{ ADJ_OFFSET() ;
269 		  if ( brace_cnt == 0 ) return '\n' ;
270 		}
271 
272 {TERMINATOR}{WS}*{CONTINUE}	{ ADJ_OFFSET() ;
273 			continuing = 1 ;
274 			lexval->ival = 0 ;
275 			return *yytext ;
276 		    }
277 
278 {TERMINATOR}{WS}*{COMMENT}?\n	{ ADJ_OFFSET() ;
279 			lexval->ival = 1 ;
280 			return *yytext ;
281 		      }
282 
283 {TERMINATOR}	{ ADJ_OFFSET() ;
284 		  lexval->ival = 0 ;
285 		  return *yytext ;
286 		}
287 
288 {LITERAL}	{ ADJ_OFFSET() ; return *yytext ; }
289 
290 {WS}+			{ ADJ_OFFSET() ; }
291 
292 {ID}		{ ADJ_OFFSET() ;
293 		  lexval->id_name = strcpy(MALLOC(yyleng+1),yytext) ;
294 		  return ID ;
295 		}
296 
297 {INTEGER}	{ ADJ_OFFSET() ;
298 		  errno = 0;
299 		  lexval->datum = new_INT(atoi(yytext)) ;
300 		  if ( errno ) {
301 		      p_error( "Value out of range." );
302 		      raise_exception();
303 		  }
304 		  return CONSTANT ;
305 		}
306 
307 {DECIMAL}       { ADJ_OFFSET() ;
308 		  errno = 0;
309 		  lexval->datum = new_REAL(atof(yytext)) ;
310 		  if ( errno ) {
311 		      p_error( "Value out of range." );
312 		      raise_exception();
313 		  }
314 		  return CONSTANT ;
315 		}
316 
317 \"		{ ADJ_OFFSET() ;
318 		  BEGIN(STRINGING) ;
319 		}
320 
321 <STRINGING>{STRBODY}{CONTINUE}	{
322 		  char *s = yytext + yyleng - 2 ;
323 
324 		  continuing = 1 ;
325 		  assert(s>=yytext) ;
326 		  while ( *s != '\\')
327 		  {
328 		    assert(s>yytext) ;
329 		    s-- ;
330 		  }
331 		  *s = 0 ;
332 		  add_to_string(yytext,s-yytext) ;
333 		}
334 
335 <STRINGING>{STRBODY}[\"\n]   { char * s ;
336 			      int last ;
337 
338 		  ADJ_OFFSET() ;
339 		  last = yytext[yyleng-1] ;
340 		  yytext[yyleng-1] = 0 ;
341 		  add_to_string(yytext,yyleng-1) ;
342 		  s = rm_escape(get_string()) ;
343 
344 		  if ( last == '\n' )
345 		  {
346 		    p_error("Unterminated character scalar:\n\"%s", s) ;
347 		    FREE(s) ;
348 		    raise_exception() ;
349 		  }
350 		  /* found end of string */
351 		  lexval->cs = strcpy(pmem( strlen(s) + 1 ),s) ;
352 		  FREE(s) ;
353 		  BEGIN(0) ;
354 		  return STRING ;
355 		}
356 
357 <STRINGING>{STRBODY}  { /* EOF */
358 		  char *s ;
359 
360 		  ADJ_OFFSET() ;
361 		  add_to_string(yytext,yyleng) ;
362 		  p_error("Unterminated character scalar at EOF:\n\"%s",
363 			s = rm_escape(get_string())) ;
364 		  FREE(s) ;
365 		  raise_exception() ;
366 		}
367 
368 
369 {ADDOP}		{ ADJ_OFFSET() ;
370 		  lexval->ival = *yytext == '+' ? OP_ADD : OP_SUB ;
371 		  return ADDOP ;
372 		}
373 
374 {MULOP}		{ ADJ_OFFSET() ;
375 		  lexval->ival = *yytext == '*' ? OP_MUL :
376 		                 *yytext == '/' ? OP_DIV :
377 				 *yytext == '%' ? OP_MOD :
378 				                  OP_PROD;
379 		  return MULOP ;
380 		}
381 
382 {RELOP}		{ ADJ_OFFSET() ;
383 
384 		  switch( yytext[0] )
385 		  {
386 		    case '=' :
387 			lexval->ival = OP_EQ ;
388 			break ;
389 
390 		    case '<' :
391 			lexval->ival = yytext[1] ? OP_LTE : OP_LT ;
392 			break ;
393 
394 		    case '>' :
395 			lexval->ival = yytext[1] ? OP_GTE : OP_GT ;
396 			break ;
397 
398 		    case '!' :
399 			lexval->ival = OP_NE ;
400 			break ;
401 
402 		    default :
403 			wipeout("scanner relop") ;
404 		  }
405 		  return RELOP ;
406 		}
407 
408 
409 [-+*/@%]=	{ ADJ_OFFSET() ;
410 
411 		  switch (yytext[0])
412                   {
413                     case '-':
414                       lexval->ival = OP_SUB;
415                       break;
416                     case '+':
417                       lexval->ival = OP_ADD;
418                       break;
419                     case '*':
420                       lexval->ival = OP_MUL;
421                       break;
422                     case '/':
423                       lexval->ival = OP_DIV;
424                       break;
425                     case '@':
426                       lexval->ival = OP_PROD;
427                       break;
428                     case '%':
429                       lexval->ival = OP_MOD;
430                       break;
431                     default:
432                       wipeout ("scanner binop_assign");
433                   }
434 
435 		  return BINOP_ASSIGN ;
436 		}
437 
438 
439 "||"		{ ADJ_OFFSET() ;  return SHORT_OR ; }
440 "&&"		{ ADJ_OFFSET() ;  return SHORT_AND ; }
441 
442 "|"		{ ADJ_OFFSET() ;  return OR ; }
443 "&"		{ ADJ_OFFSET() ;  return AND ; }
444 {CAT}		{ ADJ_OFFSET() ;  return CAT ; }
445 \'		{ ADJ_OFFSET() ;  return TRANS ; }
446 
447 
448 .		{ /* anything not covered */
449 		  ADJ_OFFSET() ;
450 		  if ( yytext[0] )
451 		    yyerror("unexpected input character -- ignored") ;
452 		}
453 
454 
455 <<EOF>> { if ( !interactive && eof_flag == 0 )
456 	  { eof_flag = 1 ; return '\n' /* phoney */ ; }
457 	  else  return EOF ;
458 	}
459 
460 %%
461 
462 
463 typedef struct ps_state
464   {
465     struct ps_state *link;
466     YY_BUFFER_STATE buffer_state;
467     FILE *yyin;
468     char *input_string;
469     char *curr_line;
470     int curr_line_no;
471     char *curr_file;
472     int max_size;
473     int eof_flag;
474 
475     int flush_flag;
476     int interaction;
477     int prompting;
478     int brace_cnt;
479     int flow_cnt;
480     CODE_BLOCK code_block;
481     struct pm saved_pmem;
482     void *jmp_top;
483     void *bc_top;
484   }
485 PS_STATE;
486 
487 extern int interactive;
488 
489 /*
490  * The `stdin_is_interactive' flag may be set in `initialize'.  If it's set,
491  * then we use interactive mode for stdin whether it looks like a terminal
492  * or not.
493  */
494 extern int stdin_is_interactive;
495 
496 static PS_STATE *ps_state_list;
497 
498 /*
499  * Use `push_parser_scanner_state' to save the parser's state (the file
500  * from which it's reading, along with globals like `interactive', etc.)
501  * and change to a new one.
502  */
503 
504 void
505 push_parser_scanner_state (fp, file_name)
506      FILE *fp;
507      char *file_name;
508 {
509   /*
510    * If `fp' is NULL, then `file_name' is a pointer to a command string.
511    * Otherwise, `fp' points to the input file, and `file_name' is its name.
512    * The `file_name' string is not FREE'd.
513    */
514 
515   PS_STATE *p = MALLOC (sizeof (PS_STATE));
516 
517   p->link = ps_state_list;
518   ps_state_list = p;
519 
520   p->buffer_state = YY_CURRENT_BUFFER;
521   p->yyin = yyin;
522   yyin = fp;
523   p->input_string = input_string;
524   input_string = fp ? NULL : file_name;
525   yy_switch_to_buffer (yy_create_buffer (yyin, YY_BUF_SIZE));
526 
527   p->interaction = interactive;
528   p->prompting = issue_prompt;
529 
530   issue_prompt = fp && isatty (fileno (fp));
531   interactive = issue_prompt || stdin_is_interactive && fp == stdin;
532 
533   continuing = 0;
534 
535   p->curr_line = curr_line;
536   curr_line = (char *) 0;
537   p->max_size = max_size;
538   max_size = 0;
539   p->curr_line_no = curr_line_no;
540   curr_line_no = 0;
541   p->curr_file = curr_file;
542   curr_file = file_name;
543 
544   p->eof_flag = eof_flag;
545   eof_flag = 0;
546   p->flush_flag = flush_flag;
547   flush_flag = 0;
548   p->brace_cnt = brace_cnt;
549   brace_cnt = 0;
550   p->flow_cnt = flow_cnt;
551   flow_cnt = 0;
552 
553   p->code_block = the_code_block;
554   new_code_block ();
555 
556   p->saved_pmem = active_pmem;
557   (void) memset (&active_pmem, 0, sizeof (active_pmem));
558 
559   p->jmp_top = jmp_top_push ();
560   p->bc_top = bc_top_push ();
561 }
562 
563 void
564 pop_parser_scanner_state ()
565 {
566   PS_STATE *p;
567 
568   assert (ps_state_list);
569 
570   p = ps_state_list;
571   ps_state_list = p->link;
572 
573   yyin = p->yyin;
574   input_string = p->input_string;
575   yy_delete_buffer (YY_CURRENT_BUFFER);
576   if (p->buffer_state)
577     yy_switch_to_buffer (p->buffer_state);
578 
579   interactive = p->interaction;
580   issue_prompt = p->prompting;
581 
582   continuing = 0;
583 
584   if (curr_line)
585     FREE (curr_line);
586   curr_line = p->curr_line;
587   max_size = p->max_size;
588   curr_line_no = p->curr_line_no;
589   curr_file = p->curr_file;
590 
591   flush_flag = p->flush_flag;
592   eof_flag = p->eof_flag;
593   brace_cnt = p->brace_cnt;
594   flow_cnt = p->flow_cnt;
595 
596   FREE (code_base);
597   the_code_block = p->code_block;
598 
599   free_all_pmem ();
600   active_pmem = p->saved_pmem;
601 
602   jmp_top_pop (p->jmp_top);
603   bc_top_pop (p->bc_top);
604 
605   FREE (p);
606 }
607 
608 
609 /*----------  process escape characters in strings ----*/
610 
611 
612 #define isoctal(x)  ((x)>='0'&&(x)<='7')
613 
614 #define  hex_value(x)   ((x)<='F'?(x)-'A'+10:(x)-'a'+10)
615 
616 
617 /* process one , two or three octal digits
618    moving a pointer forward by reference */
619 static int octal( start_p )
620   char **start_p ;
621 { register char *p = *start_p ;
622   register unsigned x ;
623 
624   x = *p++ - '0' ;
625   if ( isoctal(*p) )
626   {
627     x = (x<<3) + *p++ - '0' ;
628     if ( isoctal(*p) )   x = (x<<3) + *p++ - '0' ;
629   }
630   *start_p = p ;
631   return  x & 0xff ;
632 }
633 
634 /* process one or two hex digits
635    moving a pointer forward by reference */
636 
637 static int  hex( start_p )
638   char **start_p ;
639 { register unsigned char *p = (unsigned char*) *start_p ;
640   register unsigned x ;
641 
642   if ( isdigit(*p) ) x = *p - '0' ;
643   else  x = hex_value(*p) ;
644   p++ ;
645 
646   if ( isdigit(*p) ) x = (x<<4) + *p++ - '0' ;
647   else
648   if ( isxdigit(*p) )
649   { x = (x<<4) + hex_value(*p) ; p++ ; }
650 
651   *start_p = (char *) p ;
652   return x ;
653 }
654 
655 #define  NUM_ESCAPE_CHARS    8
656 
657 static struct { char in , out ; }
658 escape_test[NUM_ESCAPE_CHARS+1] = {
659     'n' , '\n',		/* newline	   */
660     't' , '\t',		/* tab		   */
661     'f' , '\f',		/* formfeed	   */
662     'b' , '\b',		/* backspace	   */
663     'r' , '\r',		/* carriage return */
664     'a' , '\07',	/* bell		   */
665     'v' , '\013',	/* vertical tab	   */
666     'e' , '\033',	/* escape	   */
667     0 , 0
668 };
669 
670 /* all other escaped chars stand for themselves */
671 
672 
673 /* process the escape characters in a string, in place . */
674 
675 static char *rm_escape(s)
676   char *s ;
677 { char *p, *q ;
678   char *t ;
679   int i ;
680 
681   q = p = s ;
682 
683   while ( *p )
684       if ( *p == '\\' )
685       {
686         escape_test[NUM_ESCAPE_CHARS].in = * ++p ; /* sentinel */
687         i = 0 ;
688         while ( escape_test[i].in != *p )  i++ ;
689 
690         if ( i != NUM_ESCAPE_CHARS )  /* in table */
691         {
692           p++ ; *q++ = escape_test[i].out ;
693         }
694         else
695         if ( isoctal(*p) )
696         { /* keep p in register */
697           t = p ;  *q++ = octal(&t) ; p = t ;
698         }
699         else
700         if ( *p == 'x' && isxdigit(*(unsigned char*)(p+1)) )
701         {
702           t = p+1 ; *q++ = hex(&t) ; p = t ;
703         }
704 	else
705 	if ( *p == 0 )  *q++ = '\\' ;
706         else  /* not an escape sequence */
707            *q++ = *p++ ;
708       }
709       else  *q++ = *p++ ;
710 
711   *q = 0 ;
712   return s ;
713 }
714 
715 
716 /* stuff to put multiple line strings together */
717 
718 static char *the_string ; /* built it here */
719 static int len ;  /* strlen(the_string) */
720 
721 static char *get_string()
722 {
723   char *s = the_string ;
724   the_string = (char *) 0 ;
725   len = 0 ;
726   return s ;
727 }
728 
729 static void
730 add_to_string( s,  slen)
731   char *s ;
732   int slen ;
733 {
734     char *new = MALLOC(len+slen+1) ;
735     if ( the_string )
736     { (void) memcpy(new, the_string, len) ;
737       FREE(the_string) ;
738     }
739     (void) strcpy(new+len,s) ;
740     len += slen ;
741     the_string = new ;
742 }
743