1/* This is the parser for the dlg 2 * This is a part of the Purdue Compiler Construction Tool Set 3 * 4 * SOFTWARE RIGHTS 5 * 6 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool 7 * Set (PCCTS) -- PCCTS is in the public domain. An individual or 8 * company may do whatever they wish with source code distributed with 9 * PCCTS or the code generated by PCCTS, including the incorporation of 10 * PCCTS, or its output, into commerical software. 11 * 12 * We encourage users to develop software with PCCTS. However, we do ask 13 * that credit is given to us for developing PCCTS. By "credit", 14 * we mean that if you incorporate our source code into one of your 15 * programs (commercial product, research project, or otherwise) that you 16 * acknowledge this fact somewhere in the documentation, research report, 17 * etc... If you like PCCTS and have developed a nice tool with the 18 * output, please mention that you developed it using PCCTS. In 19 * addition, we ask that this header remain intact in our source code. 20 * As long as these guidelines are kept, we expect to continue enhancing 21 * this system and expect to make other tools available as they are 22 * completed. 23 * 24 * DLG 1.33 25 * Will Cohen 26 * With mods by Terence Parr; AHPCRC, University of Minnesota 27 * 1989-1995 28 */ 29 30#header << 31#include <ctype.h> 32#include "dlg.h" 33>> 34 35<< 36 37/* MR20 G. Hobbelt 38 Fix for Borland C++ 4.x & 5.x compiling with ALL warnings enabled 39*/ 40 41#ifdef __TURBOC__ 42#pragma warn -aus /* unused assignment of 'xxx' */ 43#endif 44 45#pragma clang diagnostic ignored "-Wparentheses-equality" 46 47int action_no = 0; /* keep track of actions outputted */ 48int nfa_allocated = 0; /* keeps track of number of nfa nodes */ 49nfa_node **nfa_array = NULL;/* root of binary tree that stores nfa array */ 50nfa_node nfa_model_node; /* model to initialize new nodes */ 51set used_chars; /* used to label trans. arcs */ 52set used_classes; /* classes or chars used to label trans. arcs */ 53set normal_chars; /* mask to get rid elements that aren't used 54 in set */ 55int flag_paren = FALSE; 56int flag_brace = FALSE; 57int mode_counter = 0; /* keep track of number of %%names */ 58 59>> 60 61#lexaction << 62int func_action; /* should actions be turned into functions?*/ 63int lex_mode_counter = 0; /* keeps track of the number of %%names */ 64/* MR1 */ 65/* MR1 11-Apr-97 Provide mechanism for inserting code into DLG class */ 66/* MR1 via <<%%lexmember...>> */ 67/* MR1 */ 68int lexMember = 0; /* <<%%lexmemeber ...>> MR1 */ 69int lexAction = 0; /* <<%%lexaction ...>> MR1 */ 70int parserClass = 0; /* <<%%parserclass ...>> MR1 */ 71int lexPrefix = 0; /* <<%%lexprefix ...>> MR1 */ 72char theClassName[100]; /* MR11 */ 73char *pClassName=theClassName; /* MR11 */ 74int firstLexMember=1; /* MR1 */ 75 76#ifdef __USE_PROTOS 77void xxputc(int c) { /* MR1 */ 78#else 79void xxputc(c) /* MR1 */ 80 int c; /* MR1 */ 81{ /* MR1 */ 82#endif 83 if (parserClass) { /* MR1 */ 84 *pClassName++=c; /* MR1 */ 85 *pClassName=0; /* MR1 */ 86 } else if (lexMember || lexPrefix) { /* MR1 */ 87 if (class_stream != NULL) fputc(c,class_stream); /* MR1 */ 88 } else { /* MR1 */ 89 fputc(c,OUT); /* MR1 */ 90 }; /* MR1 */ 91} /* MR1 */ 92 93#ifdef __USE_PROTOS 94void xxprintf(char *format,char *string) { /* MR1 */ 95#else 96void xxprintf(format,string) /* MR1 */ 97 char *format; /* MR1 */ 98 char *string; /* MR1 */ 99{ /* MR1 */ 100#endif 101 if (lexMember || lexPrefix || parserClass) { /* MR1 */ 102 if (class_stream != NULL) /* MR1 */ 103 fprintf(class_stream,format,string); /* MR1 */ 104 } else { /* MR1 */ 105 fprintf(OUT,format,string); /* MR1 */ 106 }; /* MR1 */ 107} /* MR1 */ 108>> 109 110#token "[\r\t\ ]+" << zzskip(); >> /* Ignore white */ 111#token "\n" << zzline++; zzskip(); DAWDLE; >> /* Track Line # */ 112#token L_EOF "\@" 113#token PER_PER "\%\%" 114#token NAME_PER_PER "\%\%[a-zA-Z_][a-zA-Z0-9_]*" 115 << p_mode_def(&zzlextext[2],lex_mode_counter++); >> 116 117#token LEXMEMBER "\<\<\%\%lexmember" /* MR1 */ 118 <<lexMember=1; /* MR1 */ 119 if (firstLexMember != 0) { /* MR1 */ 120 firstLexMember=0; /* MR1 */ 121 p_class_def1(); /* MR1 */ 122 }; /* MR1 */ 123 zzmode(ACT); /* MR1 */ 124 >> /* MR1 */ 125#token LEXACTION "\<\<\%\%lexaction" /* MR1 */ 126 <<lexAction=1;zzmode(ACT);>> /* MR1 */ 127#token PARSERCLASS "\<\<\%\%parserclass" /* MR1 */ 128 <<parserClass=1; /* MR1 */ 129 zzmode(ACT); /* MR1 */ 130 >> /* MR1 */ 131#token LEXPREFIX "\<\<\%\%lexprefix" /* MR1 */ 132 <<lexPrefix=1;zzmode(ACT);>> /* MR1 */ 133 134#token ACTION "\<\<" 135 << if (func_action) 136 fprintf(OUT,"\n%s %sact%d()\n{ ", 137 gen_cpp?"ANTLRTokenType":"static void", 138 gen_cpp?ClassName("::"):"", ++action_no); 139 zzmode(ACT); zzskip(); 140 >> 141#token GREAT_GREAT "\>\>" 142#token L_BRACE "\{" 143#token R_BRACE "\}" 144#token L_PAR "\(" 145#token R_PAR "\)" 146#token L_BRACK "\[" 147#token R_BRACK "\]" 148#token ZERO_MORE "\*" 149#token ONE_MORE "\+" 150#token OR "\|" 151#token RANGE "\-" 152#token NOT "\~" 153#token OCTAL_VALUE "\\0[0-7]*" 154 << {int t; sscanf(&zzlextext[1],"%o",&t); zzlextext[0] = t;}>> 155#token HEX_VALUE "\\0[Xx][0-9a-fA-F]+" 156 << {int t; sscanf(&zzlextext[3],"%x",&t); zzlextext[0] = t;}>> 157#token DEC_VALUE "\\[1-9][0-9]*" 158 << {int t; sscanf(&zzlextext[1],"%d",&t); zzlextext[0] = t;}>> 159#token TAB "\\t" << zzlextext[0] = '\t';>> 160#token NL "\\n" << zzlextext[0] = '\n';>> 161#token CR "\\r" << zzlextext[0] = '\r';>> 162#token BS "\\b" << zzlextext[0] = '\b';>> 163 164/* MR1 */ 165/* MR1 10-Apr-97 MR1 Allow #token regular expressions to cross lines */ 166/* MR1 */ 167#token CONTINUATION "\\ \n" << zzline++; zzskip();>> /* MR1 */ 168 169/* NOTE: this takes ANYTHING after the \ */ 170#token LIT "\\~[tnrb]" << zzlextext[0] = zzlextext[1];>> 171 172/* NOTE: this takes ANYTHING that doesn't match the other tokens */ 173#token REGCHAR "~[\\]" 174 175 176grammar : << p_head(); p_class_hdr(); func_action = FALSE;>> 177 ( {LEXACTION | LEXMEMBER | LEXPREFIX | PARSERCLASS } ACTION)* /* MR1 */ 178 <<if ( gen_cpp ) p_includes();>> 179 start_states 180 << func_action = FALSE; p_tables(); p_tail(); >> 181 (ACTION)* "@" 182 << if (firstLexMember != 0) p_class_def1(); >> /* MR1 */ 183 ; 184 185start_states : ( PER_PER do_conversion 186 | NAME_PER_PER do_conversion (NAME_PER_PER do_conversion)*) 187 PER_PER 188 ; 189 190do_conversion : <<new_automaton_mode(); func_action = TRUE;>> 191 rule_list 192 << 193 dfa_class_nop[mode_counter] = 194 relabel($1.l,comp_level); 195 if (comp_level) 196 p_shift_table(mode_counter); 197 dfa_basep[mode_counter] = dfa_allocated+1; 198 make_dfa_model_node(dfa_class_nop[mode_counter]); 199 nfa_to_dfa($1.l); 200 ++mode_counter; 201 func_action = FALSE; 202#ifdef HASH_STAT 203 fprint_hash_stats(stderr); 204#endif 205 >> 206 ; 207 208rule_list : rule <<$$.l=$1.l; $$.r=$1.r;>> 209 (rule 210 <<{nfa_node *t1; 211 t1 = new_nfa_node(); 212 (t1)->trans[0]=$$.l; 213 (t1)->trans[1]=$1.l; 214 /* all accept nodes "dead ends" */ 215 $$.l=t1; $$.r=NULL; 216 } 217 >> 218 )* 219 | /* empty */ 220 <<$$.l = new_nfa_node(); $$.r = NULL; 221 warning("no regular expressions", zzline); 222 >> 223 ; 224 225rule : reg_expr ACTION 226/* MR23 */ << if ($1.r != NULL) { 227 $$.l=$1.l; $$.r=$1.r; ($1.r)->accept=action_no; 228 } 229 >> 230 | ACTION 231 <<$$.l = NULL; $$.r = NULL; 232 error("no expression for action ", zzline); 233 >> 234 ; 235 236reg_expr : and_expr <<$$.l=$1.l; $$.r=$1.r;>> 237 (OR and_expr 238 <<{nfa_node *t1, *t2; 239 t1 = new_nfa_node(); t2 = new_nfa_node(); 240 (t1)->trans[0]=$$.l; 241 (t1)->trans[1]=$2.l; 242/* MR23 */ if ($$.r != NULL) ($$.r)->trans[1]=t2; 243 if ($2.r) { 244 ($2.r)->trans[1]=t2; /* MR20 */ 245 } 246 $$.l=t1; $$.r=t2; 247 } 248 >> 249 )* 250 ; 251 252and_expr : repeat_expr 253 << 254 $$.l=$1.l; $$.r=$1.r; 255 >> 256 (repeat_expr 257/* MR23 */ << if ($$.r != NULL) { 258 ($$.r)->trans[1]=$1.l; 259 $$.r=$1.r; 260 } 261 >> 262 )* 263 ; 264 265repeat_expr : expr <<$$.l=$1.l; $$.r=$1.r;>> 266 { ZERO_MORE 267 <<{ nfa_node *t1,*t2; 268/* MR23 */ if ($$.r != NULL) ($$.r)->trans[0] = $$.l; 269 t1 = new_nfa_node(); t2 = new_nfa_node(); 270 t1->trans[0]=$$.l; 271 t1->trans[1]=t2; 272/* MR23 */ if ($$.r != NULL) ($$.r)->trans[1]=t2; 273 $$.l=t1;$$.r=t2; 274 } 275 >> 276 | ONE_MORE 277/* MR23 */ <<if ($$.r != NULL) ($$.r)->trans[0] = $$.l;>> 278 } 279 | ZERO_MORE 280 << error("no expression for *", zzline);>> 281 | ONE_MORE 282 << error("no expression for +", zzline);>> 283 ; 284 285expr : << $$.l = new_nfa_node(); 286 $$.r = new_nfa_node(); 287 >> 288 L_BRACK atom_list R_BRACK 289 << 290/* MR23 */ if ($$.l != NULL) { 291 ($$.l)->trans[0] = $$.r; 292 ($$.l)->label = set_dup($2.label); 293 set_orin(&used_chars,($$.l)->label); 294 } 295 >> 296 | NOT L_BRACK atom_list R_BRACK 297 << 298/* MR23 */ if ($$.l != NULL) { 299 ($$.l)->trans[0] = $$.r; 300 ($$.l)->label = set_dif(normal_chars,$3.label); 301 set_orin(&used_chars,($$.l)->label); 302 } 303 >> 304 | L_PAR reg_expr R_PAR 305 << 306/* MR23 */ if ($$.l != NULL) { 307 ($$.l)->trans[0] = $2.l; 308 if ($2.r) { 309 ($2.r)->trans[1] = $$.r; /* MR20 */ 310 } 311 } 312 >> 313 | L_BRACE reg_expr R_BRACE 314 << 315/* MR23 */ if ($$.l != NULL) { 316 ($$.l)->trans[0] = $2.l; 317 ($$.l)->trans[1] = $$.r; 318 if ($2.r) { 319 ($2.r)->trans[1] = $$.r; /* MR20 */ 320 } 321 } 322 >> 323 | atom 324 << 325/* MR23 */ if ($$.l != NULL) { 326 ($$.l)->trans[0] = $$.r; 327 ($$.l)->label = set_dup($1.label); 328 set_orin(&used_chars,($$.l)->label); 329 } 330 >> 331 ; 332 333atom_list : << set_free($$.label); >> 334 (near_atom <<set_orin(&($$.label),$1.label);>>)* 335 ; 336 337near_atom : << register int i; 338 register int i_prime; 339 >> 340 anychar 341 <<$$.letter=$1.letter; $$.label=set_of($1.letter); 342 i_prime = $1.letter + MIN_CHAR; 343 if (case_insensitive && islower(i_prime)) 344 set_orel(toupper(i_prime)-MIN_CHAR, 345 &($$.label)); 346 if (case_insensitive && isupper(i_prime)) 347 set_orel(tolower(i_prime)-MIN_CHAR, 348 &($$.label)); 349 >> 350 { RANGE anychar 351 << if (case_insensitive){ 352 i_prime = $$.letter+MIN_CHAR; 353 $$.letter = (islower(i_prime) ? 354 toupper(i_prime) : i_prime)-MIN_CHAR; 355 i_prime = $2.letter+MIN_CHAR; 356 $2.letter = (islower(i_prime) ? 357 toupper(i_prime) : i_prime)-MIN_CHAR; 358 } 359 /* check to see if range okay */ 360 { 361 int debugLetter1 = $$.letter; 362 int debugLetter2 = $2.letter; 363 } 364 if ($$.letter > $2.letter 365 && $2.letter != 0xff){ /* MR16 */ 366 error("invalid range ", zzline); 367 } 368 for (i=$$.letter; i<= (int)$2.letter; ++i){ 369 set_orel(i,&($$.label)); 370 i_prime = i+MIN_CHAR; 371 if (case_insensitive && islower(i_prime)) 372 set_orel(toupper(i_prime)-MIN_CHAR, 373 &($$.label)); 374 if (case_insensitive && isupper(i_prime)) 375 set_orel(tolower(i_prime)-MIN_CHAR, 376 &($$.label)); 377 } 378 >> 379 } 380 ; 381 382atom : << register int i_prime;>> 383 anychar 384 <<$$.label = set_of($1.letter); 385 i_prime = $1.letter + MIN_CHAR; 386 if (case_insensitive && islower(i_prime)) 387 set_orel(toupper(i_prime)-MIN_CHAR, 388 &($$.label)); 389 if (case_insensitive && isupper(i_prime)) 390 set_orel(tolower(i_prime)-MIN_CHAR, 391 &($$.label)); 392 >> 393 ; 394 395anychar : REGCHAR <<$$.letter = $1.letter - MIN_CHAR;>> 396 | OCTAL_VALUE <<$$.letter = $1.letter - MIN_CHAR;>> 397 | HEX_VALUE <<$$.letter = $1.letter - MIN_CHAR;>> 398 | DEC_VALUE <<$$.letter = $1.letter - MIN_CHAR;>> 399 | TAB <<$$.letter = $1.letter - MIN_CHAR;>> 400 | NL <<$$.letter = $1.letter - MIN_CHAR;>> 401 | CR <<$$.letter = $1.letter - MIN_CHAR;>> 402 | BS <<$$.letter = $1.letter - MIN_CHAR;>> 403 | LIT <<$$.letter = $1.letter - MIN_CHAR;>> 404 /* NOTE: LEX_EOF is ALWAYS shifted to 0 = MIN_CHAR - MIN_CHAR*/ 405 | L_EOF <<$$.letter = 0;>> 406 ; 407 408<</* empty action */>> 409 410#lexclass ACT 411#token "@" << error("unterminated action", zzline); zzmode(START); >> 412#token ACTION "\>\>" 413 << if (func_action) fprintf(OUT,"}\n\n"); 414 zzmode(START); 415/* MR1 */ 416/* MR1 11-Apr-97 Provide mechanism for inserting code into DLG class */ 417/* MR1 via <<%%lexmember ...>> */ 418/* MR1 This is a consequence of not saving actions */ 419/* MR1 */ 420/* MR1 */ parserClass=0; 421/* MR1 */ lexPrefix=0; 422/* MR1 */ lexAction=0; 423/* MR1 */ lexMember=0; 424 >> 425#token "\>" << xxputc(zzlextext[0]); zzskip(); >> /* MR1 */ 426#token "\\\>" << xxputc('>'); zzskip(); >> /* MR1 */ 427#token "\\" << xxputc('\\'); zzskip(); >> /* MR1 */ 428#token "\n" << xxputc(zzlextext[0]); ++zzline; zzskip(); >> /* MR1 */ 429#token "/\*" << zzmode(ACTION_COMMENTS); /* MR1 */ 430 xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */ 431 >> /* MR1 */ 432#token "//" << zzmode(ACTION_CPP_COMMENTS); /* MR1 */ 433 xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */ 434 >> /* MR1 */ 435#token "~[]" << xxputc(zzlextext[0]); zzskip(); >> /* MR1 */ 436 /* MR1 */ 437#lexclass ACTION_COMMENTS /* MR1 */ 438#token "\*/" << zzmode(ACT); /* MR1 */ 439 xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */ 440 >> /* MR1 */ 441#token "[\n\r]" << zzline++; xxputc(zzlextext[0]); zzskip();>> /* MR1 */ 442#token "~[]" << xxputc(zzlextext[0]); zzskip();>> /* MR1 */ 443 /* MR1 */ 444#lexclass ACTION_CPP_COMMENTS /* MR1 */ 445#token "[\n\r]" << zzmode(ACT); zzline++; /* MR1 */ 446 xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */ 447 >> /* MR1 */ 448#token "~[]" << xxputc(zzlextext[0]); zzskip();>> /* MR1 */ 449 450<< 451/* adds a new nfa to the binary tree and returns a pointer to it */ 452nfa_node * 453#ifdef __USE_PROTOS 454new_nfa_node(void) 455#else 456new_nfa_node() 457#endif 458{ 459 register nfa_node *t; 460 static int nfa_size=0; /* elements nfa_array[] can hold */ 461 462 ++nfa_allocated; 463 if (nfa_size<=nfa_allocated){ 464 /* need to redo array */ 465 if (!nfa_array){ 466 /* need some to do initial allocation */ 467 nfa_size=nfa_allocated+NFA_MIN; 468 nfa_array=(nfa_node **) malloc(sizeof(nfa_node*)* 469 nfa_size); 470 }else{ 471 /* need more space */ 472 nfa_size=2*(nfa_allocated+1); 473 nfa_array=(nfa_node **) realloc(nfa_array, 474 sizeof(nfa_node*)*nfa_size); 475 } 476 } 477 /* fill out entry in array */ 478 t = (nfa_node*) malloc(sizeof(nfa_node)); 479 nfa_array[nfa_allocated] = t; 480 *t = nfa_model_node; 481 t->node_no = nfa_allocated; 482 return t; 483} 484 485 486/* initialize the model node used to fill in newly made nfa_nodes */ 487void 488#ifdef __USE_PROTOS 489make_nfa_model_node(void) 490#else 491make_nfa_model_node() 492#endif 493{ 494 nfa_model_node.node_no = -1; /* impossible value for real nfa node */ 495 nfa_model_node.nfa_set = 0; 496 nfa_model_node.accept = 0; /* error state default*/ 497 nfa_model_node.trans[0] = NULL; 498 nfa_model_node.trans[1] = NULL; 499 nfa_model_node.label = empty; 500} 501>> 502 503<< 504#if defined(DEBUG) || defined(_DEBUG) 505 506/* print out the pointer value and the node_number */ 507void 508#ifdef __USE_PROTOS 509fprint_dfa_pair(FILE *f, nfa_node *p) 510#else 511fprint_dfa_pair(f, p) 512FILE *f; 513nfa_node *p; 514#endif 515{ 516 if (p){ 517 fprintf(f, "%x (%d)", p, p->node_no); 518 }else{ 519 fprintf(f, "(nil)"); 520 } 521} 522 523/* print out interest information on a set */ 524void 525#ifdef __USE_PROTOS 526fprint_set(FILE *f, set s) 527#else 528fprint_set(f,s) 529FILE *f; 530set s; 531#endif 532{ 533 unsigned int *x; 534 535 fprintf(f, "n = %d,", s.n); 536 if (s.setword){ 537 fprintf(f, "setword = %x, ", s.setword); 538 /* print out all the elements in the set */ 539 x = set_pdq(s); 540 while (*x!=nil){ 541 fprintf(f, "%d ", *x); 542 ++x; 543 } 544 }else{ 545 fprintf(f, "setword = (nil)"); 546 } 547} 548 549/* code to be able to dump out the nfas 550 return 0 if okay dump 551 return 1 if screwed up 552 */ 553int 554#ifdef __USE_PROTOS 555dump_nfas(int first_node, int last_node) 556#else 557dump_nfas(first_node, last_node) 558int first_node; 559int last_node; 560#endif 561{ 562 register int i; 563 nfa_node *t; 564 565 for (i=first_node; i<=last_node; ++i){ 566 t = NFA(i); 567 if (!t) break; 568 fprintf(stderr, "nfa_node %d {\n", t->node_no); 569 fprintf(stderr, "\n\tnfa_set = %d\n", t->nfa_set); 570 fprintf(stderr, "\taccept\t=\t%d\n", t->accept); 571 fprintf(stderr, "\ttrans\t=\t("); 572 fprint_dfa_pair(stderr, t->trans[0]); 573 fprintf(stderr, ","); 574 fprint_dfa_pair(stderr, t->trans[1]); 575 fprintf(stderr, ")\n"); 576 fprintf(stderr, "\tlabel\t=\t{ "); 577 fprint_set(stderr, t->label); 578 fprintf(stderr, "\t}\n"); 579 fprintf(stderr, "}\n\n"); 580 } 581 return 0; 582} 583#endif 584>> 585 586<< 587/* DLG-specific syntax error message generator 588 * (define USER_ZZSYN when compiling so don't get 2 definitions) 589 */ 590void 591#ifdef __USE_PROTOS 592zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text) 593#else 594zzsyn(text, tok, egroup, eset, etok, k, bad_text) 595char *text, *egroup, *bad_text; 596int tok; 597int etok; 598int k; 599SetWordType *eset; 600#endif 601{ 602 fprintf(stderr, ErrHdr, file_str[0]!=NULL?file_str[0]:"stdin", zzline); 603 fprintf(stderr, " syntax error at \"%s\"", (tok==zzEOF_TOKEN)?"EOF":text); 604 if ( !etok && !eset ) {fprintf(stderr, "\n"); return;} 605 if ( k==1 ) fprintf(stderr, " missing"); 606 else 607 { 608 fprintf(stderr, "; \"%s\" not", bad_text); 609 if ( zzset_deg(eset)>1 ) fprintf(stderr, " in"); 610 } 611 if ( zzset_deg(eset)>0 ) zzedecode(eset); 612 else fprintf(stderr, " %s", zztokens[etok]); 613 if ( strlen(egroup) > (size_t)0 ) fprintf(stderr, " in %s", egroup); 614 fprintf(stderr, "\n"); 615} 616>> 617