1/*  This is the parser for the dlg
2 *  This is a part of the Purdue Compiler Construction Tool Set
3 *
4 * SOFTWARE RIGHTS
5 *
6 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
7 * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
8 * company may do whatever they wish with source code distributed with
9 * PCCTS or the code generated by PCCTS, including the incorporation of
10 * PCCTS, or its output, into commerical software.
11 *
12 * We encourage users to develop software with PCCTS.  However, we do ask
13 * that credit is given to us for developing PCCTS.  By "credit",
14 * we mean that if you incorporate our source code into one of your
15 * programs (commercial product, research project, or otherwise) that you
16 * acknowledge this fact somewhere in the documentation, research report,
17 * etc...  If you like PCCTS and have developed a nice tool with the
18 * output, please mention that you developed it using PCCTS.  In
19 * addition, we ask that this header remain intact in our source code.
20 * As long as these guidelines are kept, we expect to continue enhancing
21 * this system and expect to make other tools available as they are
22 * completed.
23 *
24 * DLG 1.33
25 * Will Cohen
26 * With mods by Terence Parr; AHPCRC, University of Minnesota
27 * 1989-1995
28 */
29
30#header	<<
31#include <ctype.h>
32#include "dlg.h"
33>>
34
35<<
36
37/* MR20 G. Hobbelt
38   Fix for Borland C++ 4.x & 5.x compiling with ALL warnings enabled
39*/
40
41#ifdef __TURBOC__
42#pragma warn -aus  /* unused assignment of 'xxx' */
43#endif
44
45#pragma clang diagnostic ignored "-Wparentheses-equality"
46
47int	action_no = 0;	   /* keep track of actions outputted */
48int	nfa_allocated = 0; /* keeps track of number of nfa nodes */
49nfa_node **nfa_array = NULL;/* root of binary tree that stores nfa array */
50nfa_node nfa_model_node;   /* model to initialize new nodes */
51set	used_chars;	   /* used to label trans. arcs */
52set	used_classes;	   /* classes or chars used to label trans. arcs */
53set	normal_chars;	   /* mask to get rid elements that aren't used
54			      in set */
55int	flag_paren = FALSE;
56int	flag_brace = FALSE;
57int	mode_counter = 0;  /* keep track of number of %%names */
58
59>>
60
61#lexaction <<
62int	func_action;		/* should actions be turned into functions?*/
63int	lex_mode_counter = 0;	/* keeps track of the number of %%names */
64/* MR1									    */
65/* MR1  11-Apr-97	Provide mechanism for inserting code into DLG class */
66/* MR1				via <<%%lexmember...>>			    */
67/* MR1									    */
68int	lexMember = 0;		/* <<%%lexmemeber ...>>	   		MR1 */
69int	lexAction = 0;		/* <<%%lexaction ...>>			MR1 */
70int	parserClass = 0;	/* <<%%parserclass ...>>        MR1 */
71int	lexPrefix = 0;		/* <<%%lexprefix ...>>			MR1 */
72char	theClassName[100];						     /* MR11 */
73char	*pClassName=theClassName;					 /* MR11 */
74int	firstLexMember=1;					             /* MR1 */
75
76#ifdef __USE_PROTOS
77void  xxputc(int c) {						/* MR1 */
78#else
79void xxputc(c)							/* MR1 */
80  int	c;							/* MR1 */
81{								/* MR1 */
82#endif
83  if (parserClass) {						/* MR1 */
84    *pClassName++=c;						/* MR1 */
85    *pClassName=0;						/* MR1 */
86  } else if (lexMember || lexPrefix) {				/* MR1 */
87    if (class_stream != NULL) fputc(c,class_stream);		/* MR1 */
88  } else {							/* MR1 */
89    fputc(c,OUT);						/* MR1 */
90  };								/* MR1 */
91}  								/* MR1 */
92
93#ifdef __USE_PROTOS
94void xxprintf(char *format,char *string) {			/* MR1 */
95#else
96void xxprintf(format,string) 					/* MR1 */
97  char *format;							/* MR1 */
98  char *string;							/* MR1 */
99{								/* MR1 */
100#endif
101  if (lexMember || lexPrefix || parserClass) {			/* MR1 */
102    if (class_stream != NULL)					/* MR1 */
103	 fprintf(class_stream,format,string);			/* MR1 */
104  } else {							/* MR1 */
105    fprintf(OUT,format,string);					/* MR1 */
106  };								/* MR1 */
107}  								/* MR1 */
108>>
109
110#token "[\r\t\ ]+"	<< zzskip(); >>						/* Ignore white */
111#token "\n"			<< zzline++; zzskip(); DAWDLE; >>	/* Track Line # */
112#token L_EOF		"\@"
113#token PER_PER		"\%\%"
114#token NAME_PER_PER	"\%\%[a-zA-Z_][a-zA-Z0-9_]*"
115		<< p_mode_def(&zzlextext[2],lex_mode_counter++); >>
116
117#token LEXMEMBER	"\<\<\%\%lexmember"			/* MR1 */
118		<<lexMember=1;					/* MR1 */
119	          if (firstLexMember != 0) {			/* MR1 */
120	            firstLexMember=0;				/* MR1 */
121	            p_class_def1();				/* MR1 */
122		  };						/* MR1 */
123	          zzmode(ACT);					/* MR1 */
124                >>						/* MR1 */
125#token LEXACTION	"\<\<\%\%lexaction"			/* MR1 */
126		<<lexAction=1;zzmode(ACT);>>			/* MR1 */
127#token PARSERCLASS	"\<\<\%\%parserclass"			/* MR1 */
128		<<parserClass=1;				/* MR1 */
129		  zzmode(ACT);					/* MR1 */
130		>>						/* MR1 */
131#token LEXPREFIX	"\<\<\%\%lexprefix"			/* MR1 */
132		<<lexPrefix=1;zzmode(ACT);>>			/* MR1 */
133
134#token ACTION		"\<\<"
135		<< if (func_action)
136			fprintf(OUT,"\n%s %sact%d()\n{ ",
137					gen_cpp?"ANTLRTokenType":"static void",
138					gen_cpp?ClassName("::"):"", ++action_no);
139		   zzmode(ACT); zzskip();
140		>>
141#token GREAT_GREAT	"\>\>"
142#token L_BRACE		"\{"
143#token R_BRACE		"\}"
144#token L_PAR		"\("
145#token R_PAR		"\)"
146#token L_BRACK		"\["
147#token R_BRACK		"\]"
148#token ZERO_MORE	"\*"
149#token ONE_MORE		"\+"
150#token OR		"\|"
151#token RANGE		"\-"
152#token NOT		"\~"
153#token OCTAL_VALUE "\\0[0-7]*"
154	<< {int t; sscanf(&zzlextext[1],"%o",&t); zzlextext[0] = t;}>>
155#token HEX_VALUE   "\\0[Xx][0-9a-fA-F]+"
156	<< {int t; sscanf(&zzlextext[3],"%x",&t); zzlextext[0] = t;}>>
157#token DEC_VALUE   "\\[1-9][0-9]*"
158	<< {int t; sscanf(&zzlextext[1],"%d",&t); zzlextext[0] = t;}>>
159#token TAB		"\\t"		<< zzlextext[0] = '\t';>>
160#token NL		"\\n"		<< zzlextext[0] = '\n';>>
161#token CR		"\\r"		<< zzlextext[0] = '\r';>>
162#token BS		"\\b"		<< zzlextext[0] = '\b';>>
163
164/* MR1									*/
165/* MR1 10-Apr-97 MR1	Allow #token regular expressions to cross lines	*/
166/* MR1									*/
167#token CONTINUATION	"\\ \n"		<< zzline++; zzskip();>> /* MR1 */
168
169/* NOTE: this takes ANYTHING after the \ */
170#token LIT		"\\~[tnrb]"	<< zzlextext[0] = zzlextext[1];>>
171
172/* NOTE: this takes ANYTHING that doesn't match the other tokens */
173#token REGCHAR		"~[\\]"
174
175
176grammar		:   << p_head(); p_class_hdr(); func_action = FALSE;>>
177		 ( {LEXACTION | LEXMEMBER | LEXPREFIX | PARSERCLASS } ACTION)* /* MR1 */
178		    <<if ( gen_cpp ) p_includes();>>
179		    start_states
180		    << func_action = FALSE; p_tables(); p_tail(); >>
181		    (ACTION)* "@"
182			<< if (firstLexMember != 0) p_class_def1(); >> 		/* MR1 */
183		;
184
185start_states	: ( PER_PER do_conversion
186		  | NAME_PER_PER do_conversion (NAME_PER_PER do_conversion)*)
187		    PER_PER
188		;
189
190do_conversion	: <<new_automaton_mode(); func_action = TRUE;>>
191			rule_list
192			<<
193				dfa_class_nop[mode_counter] =
194					relabel($1.l,comp_level);
195				if (comp_level)
196					p_shift_table(mode_counter);
197				dfa_basep[mode_counter] = dfa_allocated+1;
198				make_dfa_model_node(dfa_class_nop[mode_counter]);
199				nfa_to_dfa($1.l);
200				++mode_counter;
201		    		func_action = FALSE;
202#ifdef HASH_STAT
203				fprint_hash_stats(stderr);
204#endif
205			>>
206		;
207
208rule_list	: rule <<$$.l=$1.l; $$.r=$1.r;>>
209			(rule
210				<<{nfa_node *t1;
211				   t1 = new_nfa_node();
212				   (t1)->trans[0]=$$.l;
213				   (t1)->trans[1]=$1.l;
214				   /* all accept nodes "dead ends" */
215				   $$.l=t1; $$.r=NULL;
216				   }
217				>>
218			)*
219		| /* empty */
220			<<$$.l = new_nfa_node(); $$.r = NULL;
221			   warning("no regular expressions", zzline);
222			>>
223		;
224
225rule	: reg_expr ACTION
226/* MR23 */		<< if ($1.r != NULL) {
227					$$.l=$1.l; $$.r=$1.r; ($1.r)->accept=action_no;
228				   }
229				>>
230		| ACTION
231			<<$$.l = NULL; $$.r = NULL;
232			  error("no expression for action  ", zzline);
233			>>
234		;
235
236reg_expr	: and_expr <<$$.l=$1.l; $$.r=$1.r;>>
237			(OR and_expr
238				<<{nfa_node *t1, *t2;
239				   t1 = new_nfa_node(); t2 = new_nfa_node();
240				   (t1)->trans[0]=$$.l;
241				   (t1)->trans[1]=$2.l;
242/* MR23 */		   if ($$.r != NULL) ($$.r)->trans[1]=t2;
243                   if ($2.r) {
244    				   ($2.r)->trans[1]=t2;     /* MR20 */
245                   }
246				   $$.l=t1; $$.r=t2;
247				  }
248				>>
249			)*
250		;
251
252and_expr	: repeat_expr
253					<<
254						$$.l=$1.l; $$.r=$1.r;
255				    >>
256			(repeat_expr
257/* MR23 */				<< if ($$.r != NULL) {
258							($$.r)->trans[1]=$1.l;
259							$$.r=$1.r;
260						   }
261						>>
262			)*
263		;
264
265repeat_expr	: expr <<$$.l=$1.l; $$.r=$1.r;>>
266			{ ZERO_MORE
267			<<{	nfa_node *t1,*t2;
268/* MR23 */		if ($$.r != NULL) ($$.r)->trans[0] = $$.l;
269				t1 = new_nfa_node(); t2 = new_nfa_node();
270				t1->trans[0]=$$.l;
271				t1->trans[1]=t2;
272/* MR23 */		if ($$.r != NULL) ($$.r)->trans[1]=t2;
273				$$.l=t1;$$.r=t2;
274			  }
275			>>
276			| ONE_MORE
277/* MR23 */		<<if ($$.r != NULL) ($$.r)->trans[0] = $$.l;>>
278			}
279		| ZERO_MORE
280			<< error("no expression for *", zzline);>>
281		| ONE_MORE
282			<< error("no expression for +", zzline);>>
283		;
284
285expr	: << $$.l = new_nfa_node();
286			 $$.r = new_nfa_node();
287		  >>
288		  L_BRACK atom_list R_BRACK
289			<<
290/* MR23 */		if ($$.l != NULL) {
291					($$.l)->trans[0] = $$.r;
292					($$.l)->label = set_dup($2.label);
293					set_orin(&used_chars,($$.l)->label);
294				}
295			>>
296		| NOT L_BRACK atom_list R_BRACK
297			<<
298/* MR23 */		if ($$.l != NULL) {
299					($$.l)->trans[0] = $$.r;
300					($$.l)->label = set_dif(normal_chars,$3.label);
301					set_orin(&used_chars,($$.l)->label);
302				}
303			>>
304		| L_PAR reg_expr R_PAR
305			<<
306/* MR23 */		if ($$.l != NULL) {
307					($$.l)->trans[0] = $2.l;
308					if ($2.r) {
309    					($2.r)->trans[1] = $$.r;    /* MR20 */
310					}
311				}
312			>>
313		| L_BRACE reg_expr R_BRACE
314			<<
315/* MR23 */		if ($$.l != NULL) {
316					($$.l)->trans[0] = $2.l;
317					($$.l)->trans[1] = $$.r;
318			        if ($2.r) {
319    					($2.r)->trans[1] = $$.r;    /* MR20 */
320					}
321				}
322			>>
323		| atom
324			<<
325/* MR23 */		if ($$.l != NULL) {
326					($$.l)->trans[0] = $$.r;
327					($$.l)->label = set_dup($1.label);
328					set_orin(&used_chars,($$.l)->label);
329				}
330			>>
331		;
332
333atom_list	: << set_free($$.label); >>
334				(near_atom <<set_orin(&($$.label),$1.label);>>)*
335		;
336
337near_atom	: << register int i;
338		     register int i_prime;
339		  >>
340		  anychar
341			<<$$.letter=$1.letter; $$.label=set_of($1.letter);
342			i_prime = $1.letter + MIN_CHAR;
343			if (case_insensitive && islower(i_prime))
344				set_orel(toupper(i_prime)-MIN_CHAR,
345					&($$.label));
346			if (case_insensitive && isupper(i_prime))
347	 			set_orel(tolower(i_prime)-MIN_CHAR,
348					&($$.label));
349			>>
350			{ RANGE anychar
351				<< if (case_insensitive){
352					i_prime = $$.letter+MIN_CHAR;
353					$$.letter = (islower(i_prime) ?
354						toupper(i_prime) : i_prime)-MIN_CHAR;
355					i_prime = $2.letter+MIN_CHAR;
356					$2.letter = (islower(i_prime) ?
357						toupper(i_prime) : i_prime)-MIN_CHAR;
358				   }
359				   /* check to see if range okay */
360					{
361					    int debugLetter1 = $$.letter;
362						int debugLetter2 = $2.letter;
363					}
364				   if ($$.letter > $2.letter
365                                       && $2.letter != 0xff){       /* MR16 */
366					  error("invalid range  ", zzline);
367				   }
368				   for (i=$$.letter; i<= (int)$2.letter; ++i){
369					set_orel(i,&($$.label));
370					i_prime = i+MIN_CHAR;
371					if (case_insensitive && islower(i_prime))
372						set_orel(toupper(i_prime)-MIN_CHAR,
373							&($$.label));
374					if (case_insensitive && isupper(i_prime))
375		 				set_orel(tolower(i_prime)-MIN_CHAR,
376							&($$.label));
377					}
378				>>
379			}
380		;
381
382atom		: << register int i_prime;>>
383		  anychar
384		  <<$$.label = set_of($1.letter);
385		    i_prime = $1.letter + MIN_CHAR;
386		    if (case_insensitive && islower(i_prime))
387			set_orel(toupper(i_prime)-MIN_CHAR,
388				&($$.label));
389		    if (case_insensitive && isupper(i_prime))
390	 		set_orel(tolower(i_prime)-MIN_CHAR,
391				&($$.label));
392		  >>
393		;
394
395anychar		: REGCHAR	<<$$.letter = $1.letter - MIN_CHAR;>>
396		| OCTAL_VALUE	<<$$.letter = $1.letter - MIN_CHAR;>>
397		| HEX_VALUE	<<$$.letter = $1.letter - MIN_CHAR;>>
398		| DEC_VALUE	<<$$.letter = $1.letter - MIN_CHAR;>>
399		| TAB		<<$$.letter = $1.letter - MIN_CHAR;>>
400		| NL		<<$$.letter = $1.letter - MIN_CHAR;>>
401		| CR		<<$$.letter = $1.letter - MIN_CHAR;>>
402		| BS		<<$$.letter = $1.letter - MIN_CHAR;>>
403		| LIT		<<$$.letter = $1.letter - MIN_CHAR;>>
404		/* NOTE: LEX_EOF is ALWAYS shifted to 0 = MIN_CHAR - MIN_CHAR*/
405		| L_EOF		<<$$.letter = 0;>>
406		;
407
408<</* empty action */>>
409
410#lexclass ACT
411#token "@"	<< error("unterminated action", zzline); zzmode(START); >>
412#token ACTION "\>\>"
413		<< if (func_action) fprintf(OUT,"}\n\n");
414		   zzmode(START);
415/* MR1									    */
416/* MR1  11-Apr-97	Provide mechanism for inserting code into DLG class */
417/* MR1				via <<%%lexmember ...>>			    */
418/* MR1			This is a consequence of not saving actions         */
419/* MR1									    */
420/* MR1 */	   parserClass=0;
421/* MR1 */	   lexPrefix=0;
422/* MR1 */	   lexAction=0;
423/* MR1 */	   lexMember=0;
424		>>
425#token "\>"		<< xxputc(zzlextext[0]); zzskip(); >>		/* MR1 */
426#token "\\\>"		<< xxputc('>'); zzskip(); >>			/* MR1 */
427#token "\\"		<< xxputc('\\'); zzskip(); >>			/* MR1 */
428#token "\n"		<< xxputc(zzlextext[0]); ++zzline; zzskip(); >>	/* MR1 */
429#token "/\*"		<< zzmode(ACTION_COMMENTS);			/* MR1 */
430			   xxprintf("%s", &(zzlextext[0])); zzskip();	/* MR1 */
431			>>						/* MR1 */
432#token "//"		<< zzmode(ACTION_CPP_COMMENTS);			/* MR1 */
433			   xxprintf("%s", &(zzlextext[0])); zzskip();	/* MR1 */
434			>>						/* MR1 */
435#token "~[]"		<< xxputc(zzlextext[0]); zzskip(); >>		/* MR1 */
436									/* MR1 */
437#lexclass ACTION_COMMENTS						/* MR1 */
438#token "\*/"		<< zzmode(ACT);					/* MR1 */
439			   xxprintf("%s", &(zzlextext[0])); zzskip();	/* MR1 */
440			>>						/* MR1 */
441#token "[\n\r]"		<< zzline++; xxputc(zzlextext[0]); zzskip();>>	/* MR1 */
442#token "~[]"		<< xxputc(zzlextext[0]); zzskip();>>		/* MR1 */
443									/* MR1 */
444#lexclass ACTION_CPP_COMMENTS						/* MR1 */
445#token "[\n\r]"		<< zzmode(ACT); zzline++;			/* MR1 */
446			   xxprintf("%s", &(zzlextext[0])); zzskip();	/* MR1 */
447			>>						/* MR1 */
448#token "~[]"		<< xxputc(zzlextext[0]); zzskip();>>		/* MR1 */
449
450<<
451/* adds a new nfa to the binary tree and returns a pointer to it */
452nfa_node *
453#ifdef __USE_PROTOS
454new_nfa_node(void)
455#else
456new_nfa_node()
457#endif
458{
459	register nfa_node *t;
460	static int nfa_size=0;	/* elements nfa_array[] can hold */
461
462	++nfa_allocated;
463	if (nfa_size<=nfa_allocated){
464		/* need to redo array */
465		if (!nfa_array){
466			/* need some to do initial allocation */
467			nfa_size=nfa_allocated+NFA_MIN;
468			nfa_array=(nfa_node **) malloc(sizeof(nfa_node*)*
469				nfa_size);
470		}else{
471			/* need more space */
472			nfa_size=2*(nfa_allocated+1);
473			nfa_array=(nfa_node **) realloc(nfa_array,
474				sizeof(nfa_node*)*nfa_size);
475		}
476	}
477	/* fill out entry in array */
478	t = (nfa_node*) malloc(sizeof(nfa_node));
479	nfa_array[nfa_allocated] = t;
480	*t = nfa_model_node;
481	t->node_no = nfa_allocated;
482	return t;
483}
484
485
486/* initialize the model node used to fill in newly made nfa_nodes */
487void
488#ifdef __USE_PROTOS
489make_nfa_model_node(void)
490#else
491make_nfa_model_node()
492#endif
493{
494	nfa_model_node.node_no = -1; /* impossible value for real nfa node */
495	nfa_model_node.nfa_set = 0;
496	nfa_model_node.accept = 0;   /* error state default*/
497	nfa_model_node.trans[0] = NULL;
498	nfa_model_node.trans[1] = NULL;
499	nfa_model_node.label = empty;
500}
501>>
502
503<<
504#if defined(DEBUG) || defined(_DEBUG)
505
506/* print out the pointer value and the node_number */
507void
508#ifdef __USE_PROTOS
509fprint_dfa_pair(FILE *f, nfa_node *p)
510#else
511fprint_dfa_pair(f, p)
512FILE *f;
513nfa_node *p;
514#endif
515{
516	if (p){
517		fprintf(f, "%x (%d)", p, p->node_no);
518	}else{
519		fprintf(f, "(nil)");
520	}
521}
522
523/* print out interest information on a set */
524void
525#ifdef __USE_PROTOS
526fprint_set(FILE *f, set s)
527#else
528fprint_set(f,s)
529FILE *f;
530set s;
531#endif
532{
533	unsigned int *x;
534
535	fprintf(f, "n = %d,", s.n);
536	if (s.setword){
537		fprintf(f, "setword = %x,   ", s.setword);
538		/* print out all the elements in the set */
539		x = set_pdq(s);
540		while (*x!=nil){
541			fprintf(f, "%d ", *x);
542			++x;
543		}
544	}else{
545		fprintf(f, "setword = (nil)");
546	}
547}
548
549/* code to be able to dump out the nfas
550	return 0 if okay dump
551	return 1 if screwed up
552 */
553int
554#ifdef __USE_PROTOS
555dump_nfas(int first_node, int last_node)
556#else
557dump_nfas(first_node, last_node)
558int first_node;
559int last_node;
560#endif
561{
562	register int i;
563	nfa_node *t;
564
565	for (i=first_node; i<=last_node; ++i){
566		t = NFA(i);
567		if (!t) break;
568		fprintf(stderr, "nfa_node %d {\n", t->node_no);
569		fprintf(stderr, "\n\tnfa_set = %d\n", t->nfa_set);
570		fprintf(stderr, "\taccept\t=\t%d\n", t->accept);
571		fprintf(stderr, "\ttrans\t=\t(");
572		fprint_dfa_pair(stderr, t->trans[0]);
573		fprintf(stderr, ",");
574		fprint_dfa_pair(stderr, t->trans[1]);
575		fprintf(stderr, ")\n");
576		fprintf(stderr, "\tlabel\t=\t{ ");
577		fprint_set(stderr, t->label);
578		fprintf(stderr, "\t}\n");
579		fprintf(stderr, "}\n\n");
580	}
581	return 0;
582}
583#endif
584>>
585
586<<
587/* DLG-specific syntax error message generator
588 * (define USER_ZZSYN when compiling so don't get 2 definitions)
589 */
590void
591#ifdef __USE_PROTOS
592zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text)
593#else
594zzsyn(text, tok, egroup, eset, etok, k, bad_text)
595char *text, *egroup, *bad_text;
596int tok;
597int etok;
598int k;
599SetWordType *eset;
600#endif
601{
602	fprintf(stderr, ErrHdr, file_str[0]!=NULL?file_str[0]:"stdin", zzline);
603	fprintf(stderr, " syntax error at \"%s\"", (tok==zzEOF_TOKEN)?"EOF":text);
604	if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
605	if ( k==1 ) fprintf(stderr, " missing");
606	else
607	{
608		fprintf(stderr, "; \"%s\" not", bad_text);
609		if ( zzset_deg(eset)>1 ) fprintf(stderr, " in");
610	}
611	if ( zzset_deg(eset)>0 ) zzedecode(eset);
612	else fprintf(stderr, " %s", zztokens[etok]);
613	if ( strlen(egroup) > (size_t)0 ) fprintf(stderr, " in %s", egroup);
614	fprintf(stderr, "\n");
615}
616>>
617