1#!/usr/bin/perl 2# src/interfaces/ecpg/preproc/parse.pl 3# parser generater for ecpg version 2 4# call with backend parser as stdin 5# 6# Copyright (c) 2007-2018, PostgreSQL Global Development Group 7# 8# Written by Mike Aubury <mike.aubury@aubit.com> 9# Michael Meskes <meskes@postgresql.org> 10# Andy Colson <andy@squeakycode.net> 11# 12# Placed under the same license as PostgreSQL. 13# 14 15use strict; 16use warnings; 17no warnings 'uninitialized'; 18 19my $path = shift @ARGV; 20$path = "." unless $path; 21 22my $copymode = 0; 23my $brace_indent = 0; 24my $yaccmode = 0; 25my $in_rule = 0; 26my $header_included = 0; 27my $feature_not_supported = 0; 28my $tokenmode = 0; 29 30my (%buff, $infield, $comment, %tokens, %addons); 31my ($stmt_mode, @fields); 32my ($line, $non_term_id); 33 34 35# some token have to be replaced by other symbols 36# either in the rule 37my %replace_token = ( 38 'BCONST' => 'ecpg_bconst', 39 'FCONST' => 'ecpg_fconst', 40 'Sconst' => 'ecpg_sconst', 41 'IDENT' => 'ecpg_ident', 42 'PARAM' => 'ecpg_param',); 43 44# or in the block 45my %replace_string = ( 46 'NOT_LA' => 'not', 47 'NULLS_LA' => 'nulls', 48 'WITH_LA' => 'with', 49 'TYPECAST' => '::', 50 'DOT_DOT' => '..', 51 'COLON_EQUALS' => ':=', 52 'EQUALS_GREATER' => '=>', 53 'LESS_EQUALS' => '<=', 54 'GREATER_EQUALS' => '>=', 55 'NOT_EQUALS' => '<>',); 56 57# specific replace_types for specific non-terminals - never include the ':' 58# ECPG-only replace_types are defined in ecpg-replace_types 59my %replace_types = ( 60 'PrepareStmt' => '<prep>', 61 'opt_array_bounds' => '<index>', 62 63 # "ignore" means: do not create type and rules for this non-term-id 64 'stmtblock' => 'ignore', 65 'stmtmulti' => 'ignore', 66 'CreateAsStmt' => 'ignore', 67 'DeallocateStmt' => 'ignore', 68 'ColId' => 'ignore', 69 'type_function_name' => 'ignore', 70 'ColLabel' => 'ignore', 71 'Sconst' => 'ignore',); 72 73# these replace_line commands excise certain keywords from the core keyword 74# lists. Be sure to account for these in ColLabel and related productions. 75my %replace_line = ( 76 'unreserved_keywordCONNECTION' => 'ignore', 77 'unreserved_keywordCURRENT_P' => 'ignore', 78 'unreserved_keywordDAY_P' => 'ignore', 79 'unreserved_keywordHOUR_P' => 'ignore', 80 'unreserved_keywordINPUT_P' => 'ignore', 81 'unreserved_keywordMINUTE_P' => 'ignore', 82 'unreserved_keywordMONTH_P' => 'ignore', 83 'unreserved_keywordSECOND_P' => 'ignore', 84 'unreserved_keywordYEAR_P' => 'ignore', 85 'col_name_keywordCHAR_P' => 'ignore', 86 'col_name_keywordINT_P' => 'ignore', 87 'col_name_keywordVALUES' => 'ignore', 88 'reserved_keywordTO' => 'ignore', 89 'reserved_keywordUNION' => 'ignore', 90 91 # some other production rules have to be ignored or replaced 92 'fetch_argsFORWARDopt_from_incursor_name' => 'ignore', 93 'fetch_argsBACKWARDopt_from_incursor_name' => 'ignore', 94 "opt_array_boundsopt_array_bounds'['Iconst']'" => 'ignore', 95 'VariableShowStmtSHOWvar_name' => 'SHOW var_name ecpg_into', 96 'VariableShowStmtSHOWTIMEZONE' => 'SHOW TIME ZONE ecpg_into', 97 'VariableShowStmtSHOWTRANSACTIONISOLATIONLEVEL' => 98 'SHOW TRANSACTION ISOLATION LEVEL ecpg_into', 99 'VariableShowStmtSHOWSESSIONAUTHORIZATION' => 100 'SHOW SESSION AUTHORIZATION ecpg_into', 101 'returning_clauseRETURNINGtarget_list' => 102 'RETURNING target_list opt_ecpg_into', 103 'ExecuteStmtEXECUTEnameexecute_param_clause' => 104 'EXECUTE prepared_name execute_param_clause execute_rest', 105 'ExecuteStmtCREATEOptTempTABLEcreate_as_targetASEXECUTEnameexecute_param_clause' 106 => 'CREATE OptTemp TABLE create_as_target AS EXECUTE prepared_name execute_param_clause', 107 'PrepareStmtPREPAREnameprep_type_clauseASPreparableStmt' => 108 'PREPARE prepared_name prep_type_clause AS PreparableStmt', 109 'var_nameColId' => 'ECPGColId',); 110 111preload_addons(); 112 113main(); 114 115dump_buffer('header'); 116dump_buffer('tokens'); 117dump_buffer('types'); 118dump_buffer('ecpgtype'); 119dump_buffer('orig_tokens'); 120print '%%', "\n"; 121print 'prog: statements;', "\n"; 122dump_buffer('rules'); 123include_file('trailer', 'ecpg.trailer'); 124dump_buffer('trailer'); 125 126sub main 127{ 128 line: while (<>) 129 { 130 if (/ERRCODE_FEATURE_NOT_SUPPORTED/) 131 { 132 $feature_not_supported = 1; 133 next line; 134 } 135 136 chomp; 137 138 # comment out the line below to make the result file match (blank line wise) 139 # the prior version. 140 #next if ($_ eq ''); 141 142 # Dump the action for a rule - 143 # stmt_mode indicates if we are processing the 'stmt:' 144 # rule (mode==0 means normal, mode==1 means stmt:) 145 # flds are the fields to use. These may start with a '$' - in 146 # which case they are the result of a previous non-terminal 147 # 148 # if they don't start with a '$' then they are token name 149 # 150 # len is the number of fields in flds... 151 # leadin is the padding to apply at the beginning (just use for formatting) 152 153 if (/^%%/) 154 { 155 $tokenmode = 2; 156 $copymode = 1; 157 $yaccmode++; 158 $infield = 0; 159 } 160 161 my $prec = 0; 162 163 # Make sure any braces are split 164 s/{/ { /g; 165 s/}/ } /g; 166 167 # Any comments are split 168 s|\/\*| /* |g; 169 s|\*\/| */ |g; 170 171 # Now split the line into individual fields 172 my @arr = split(' '); 173 174 if ($arr[0] eq '%token' && $tokenmode == 0) 175 { 176 $tokenmode = 1; 177 include_file('tokens', 'ecpg.tokens'); 178 } 179 elsif ($arr[0] eq '%type' && $header_included == 0) 180 { 181 include_file('header', 'ecpg.header'); 182 include_file('ecpgtype', 'ecpg.type'); 183 $header_included = 1; 184 } 185 186 if ($tokenmode == 1) 187 { 188 my $str = ''; 189 my $prior = ''; 190 for my $a (@arr) 191 { 192 if ($a eq '/*') 193 { 194 $comment++; 195 next; 196 } 197 if ($a eq '*/') 198 { 199 $comment--; 200 next; 201 } 202 if ($comment) 203 { 204 next; 205 } 206 if (substr($a, 0, 1) eq '<') 207 { 208 next; 209 210 # its a type 211 } 212 $tokens{$a} = 1; 213 214 $str = $str . ' ' . $a; 215 if ($a eq 'IDENT' && $prior eq '%nonassoc') 216 { 217 218 # add two more tokens to the list 219 $str = $str . "\n%nonassoc CSTRING\n%nonassoc UIDENT"; 220 } 221 $prior = $a; 222 } 223 add_to_buffer('orig_tokens', $str); 224 next line; 225 } 226 227 # Don't worry about anything if we're not in the right section of gram.y 228 if ($yaccmode != 1) 229 { 230 next line; 231 } 232 233 234 # Go through each field in turn 235 for ( 236 my $fieldIndexer = 0; 237 $fieldIndexer < scalar(@arr); 238 $fieldIndexer++) 239 { 240 if ($arr[$fieldIndexer] eq '*/' && $comment) 241 { 242 $comment = 0; 243 next; 244 } 245 elsif ($comment) 246 { 247 next; 248 } 249 elsif ($arr[$fieldIndexer] eq '/*') 250 { 251 252 # start of a multiline comment 253 $comment = 1; 254 next; 255 } 256 elsif ($arr[$fieldIndexer] eq '//') 257 { 258 next line; 259 } 260 elsif ($arr[$fieldIndexer] eq '}') 261 { 262 $brace_indent--; 263 next; 264 } 265 elsif ($arr[$fieldIndexer] eq '{') 266 { 267 $brace_indent++; 268 next; 269 } 270 271 if ($brace_indent > 0) 272 { 273 next; 274 } 275 if ($arr[$fieldIndexer] eq ';') 276 { 277 if ($copymode) 278 { 279 if ($infield) 280 { 281 dump_line($stmt_mode, \@fields); 282 } 283 add_to_buffer('rules', ";\n\n"); 284 } 285 else 286 { 287 $copymode = 1; 288 } 289 @fields = (); 290 $infield = 0; 291 $line = ''; 292 $in_rule = 0; 293 next; 294 } 295 296 if ($arr[$fieldIndexer] eq '|') 297 { 298 if ($copymode) 299 { 300 if ($infield) 301 { 302 $infield = $infield + dump_line($stmt_mode, \@fields); 303 } 304 if ($infield > 1) 305 { 306 $line = '| '; 307 } 308 } 309 @fields = (); 310 next; 311 } 312 313 if (exists $replace_token{ $arr[$fieldIndexer] }) 314 { 315 $arr[$fieldIndexer] = $replace_token{ $arr[$fieldIndexer] }; 316 } 317 318 # Are we looking at a declaration of a non-terminal ? 319 if (($arr[$fieldIndexer] =~ /[A-Za-z0-9]+:/) 320 || $arr[ $fieldIndexer + 1 ] eq ':') 321 { 322 $non_term_id = $arr[$fieldIndexer]; 323 $non_term_id =~ tr/://d; 324 325 if (not defined $replace_types{$non_term_id}) 326 { 327 $replace_types{$non_term_id} = '<str>'; 328 $copymode = 1; 329 } 330 elsif ($replace_types{$non_term_id} eq 'ignore') 331 { 332 $copymode = 0; 333 $line = ''; 334 next line; 335 } 336 $line = $line . ' ' . $arr[$fieldIndexer]; 337 338 # Do we have the : attached already ? 339 # If yes, we'll have already printed the ':' 340 if (!($arr[$fieldIndexer] =~ '[A-Za-z0-9]+:')) 341 { 342 343 # Consume the ':' which is next... 344 $line = $line . ':'; 345 $fieldIndexer++; 346 } 347 348 # Special mode? 349 if ($non_term_id eq 'stmt') 350 { 351 $stmt_mode = 1; 352 } 353 else 354 { 355 $stmt_mode = 0; 356 } 357 my $tstr = 358 '%type ' 359 . $replace_types{$non_term_id} . ' ' 360 . $non_term_id; 361 add_to_buffer('types', $tstr); 362 363 if ($copymode) 364 { 365 add_to_buffer('rules', $line); 366 } 367 $line = ''; 368 @fields = (); 369 $infield = 1; 370 die "unterminated rule at grammar line $.\n" 371 if $in_rule; 372 $in_rule = 1; 373 next; 374 } 375 elsif ($copymode) 376 { 377 $line = $line . ' ' . $arr[$fieldIndexer]; 378 } 379 if ($arr[$fieldIndexer] eq '%prec') 380 { 381 $prec = 1; 382 next; 383 } 384 385 if ( $copymode 386 && !$prec 387 && !$comment 388 && length($arr[$fieldIndexer]) 389 && $infield) 390 { 391 if ($arr[$fieldIndexer] ne 'Op' 392 && ( $tokens{ $arr[$fieldIndexer] } > 0 393 || $arr[$fieldIndexer] =~ /'.+'/) 394 || $stmt_mode == 1) 395 { 396 my $S; 397 if (exists $replace_string{ $arr[$fieldIndexer] }) 398 { 399 $S = $replace_string{ $arr[$fieldIndexer] }; 400 } 401 else 402 { 403 $S = $arr[$fieldIndexer]; 404 } 405 $S =~ s/_P//g; 406 $S =~ tr/'//d; 407 if ($stmt_mode == 1) 408 { 409 push(@fields, $S); 410 } 411 else 412 { 413 push(@fields, lc($S)); 414 } 415 } 416 else 417 { 418 push(@fields, '$' . (scalar(@fields) + 1)); 419 } 420 } 421 } 422 } 423 die "unterminated rule at end of grammar\n" 424 if $in_rule; 425 return; 426} 427 428 429# append a file onto a buffer. 430# Arguments: buffer_name, filename (without path) 431sub include_file 432{ 433 my ($buffer, $filename) = @_; 434 my $full = "$path/$filename"; 435 open(my $fh, '<', $full) or die; 436 while (<$fh>) 437 { 438 chomp; 439 add_to_buffer($buffer, $_); 440 } 441 close($fh); 442 return; 443} 444 445sub include_addon 446{ 447 my ($buffer, $block, $fields, $stmt_mode) = @_; 448 my $rec = $addons{$block}; 449 return 0 unless $rec; 450 451 if ($rec->{type} eq 'rule') 452 { 453 dump_fields($stmt_mode, $fields, ' { '); 454 } 455 elsif ($rec->{type} eq 'addon') 456 { 457 add_to_buffer('rules', ' { '); 458 } 459 460 #add_to_buffer( $stream, $_ ); 461 #We have an array to add to the buffer, we'll add it ourself instead of 462 #calling add_to_buffer, which does not know about arrays 463 464 push(@{ $buff{$buffer} }, @{ $rec->{lines} }); 465 466 if ($rec->{type} eq 'addon') 467 { 468 dump_fields($stmt_mode, $fields, ''); 469 } 470 471 472 # if we added something (ie there are lines in our array), return 1 473 return 1 if (scalar(@{ $rec->{lines} }) > 0); 474 return 0; 475} 476 477 478# include_addon does this same thing, but does not call this 479# sub... so if you change this, you need to fix include_addon too 480# Pass: buffer_name, string_to_append 481sub add_to_buffer 482{ 483 push(@{ $buff{ $_[0] } }, "$_[1]\n"); 484 return; 485} 486 487sub dump_buffer 488{ 489 my ($buffer) = @_; 490 print '/* ', $buffer, ' */', "\n"; 491 my $ref = $buff{$buffer}; 492 print @$ref; 493 return; 494} 495 496sub dump_fields 497{ 498 my ($mode, $flds, $ln) = @_; 499 my $len = scalar(@$flds); 500 501 if ($mode == 0) 502 { 503 504 #Normal 505 add_to_buffer('rules', $ln); 506 if ($feature_not_supported == 1) 507 { 508 509 # we found an unsupported feature, but we have to 510 # filter out ExecuteStmt: CREATE OptTemp TABLE ... 511 # because the warning there is only valid in some situations 512 if ($flds->[0] ne 'create' || $flds->[2] ne 'table') 513 { 514 add_to_buffer('rules', 515 'mmerror(PARSE_ERROR, ET_WARNING, "unsupported feature will be passed to server");' 516 ); 517 } 518 $feature_not_supported = 0; 519 } 520 521 if ($len == 0) 522 { 523 524 # We have no fields ? 525 add_to_buffer('rules', ' $$=EMPTY; }'); 526 } 527 else 528 { 529 530 # Go through each field and try to 'aggregate' the tokens 531 # into a single 'mm_strdup' where possible 532 my @flds_new; 533 my $str; 534 for (my $z = 0; $z < $len; $z++) 535 { 536 if (substr($flds->[$z], 0, 1) eq '$') 537 { 538 push(@flds_new, $flds->[$z]); 539 next; 540 } 541 542 $str = $flds->[$z]; 543 544 while (1) 545 { 546 if ($z >= $len - 1 547 || substr($flds->[ $z + 1 ], 0, 1) eq '$') 548 { 549 550 # We're at the end... 551 push(@flds_new, "mm_strdup(\"$str\")"); 552 last; 553 } 554 $z++; 555 $str = $str . ' ' . $flds->[$z]; 556 } 557 } 558 559 # So - how many fields did we end up with ? 560 $len = scalar(@flds_new); 561 if ($len == 1) 562 { 563 564 # Straight assignment 565 $str = ' $$ = ' . $flds_new[0] . ';'; 566 add_to_buffer('rules', $str); 567 } 568 else 569 { 570 571 # Need to concatenate the results to form 572 # our final string 573 $str = 574 ' $$ = cat_str(' . $len . ',' . join(',', @flds_new) . ');'; 575 add_to_buffer('rules', $str); 576 } 577 add_to_buffer('rules', '}'); 578 } 579 } 580 else 581 { 582 583 # we're in the stmt: rule 584 if ($len) 585 { 586 587 # or just the statement ... 588 add_to_buffer('rules', 589 ' { output_statement($1, 0, ECPGst_normal); }'); 590 } 591 else 592 { 593 add_to_buffer('rules', ' { $$ = NULL; }'); 594 } 595 } 596 return; 597} 598 599 600sub dump_line 601{ 602 my ($stmt_mode, $fields) = @_; 603 my $block = $non_term_id . $line; 604 $block =~ tr/ |//d; 605 my $rep = $replace_line{$block}; 606 if ($rep) 607 { 608 if ($rep eq 'ignore') 609 { 610 return 0; 611 } 612 613 if (index($line, '|') != -1) 614 { 615 $line = '| ' . $rep; 616 } 617 else 618 { 619 $line = $rep; 620 } 621 $block = $non_term_id . $line; 622 $block =~ tr/ |//d; 623 } 624 add_to_buffer('rules', $line); 625 my $i = include_addon('rules', $block, $fields, $stmt_mode); 626 if ($i == 0) 627 { 628 dump_fields($stmt_mode, $fields, ' { '); 629 } 630 return 1; 631} 632 633=top 634 load addons into cache 635 %addons = { 636 stmtClosePortalStmt => { 'type' => 'block', 'lines' => [ "{", "if (INFORMIX_MODE)" ..., "}" ] }, 637 stmtViewStmt => { 'type' => 'rule', 'lines' => [ "| ECPGAllocateDescr", ... ] } 638 } 639 640=cut 641 642sub preload_addons 643{ 644 my $filename = $path . "/ecpg.addons"; 645 open(my $fh, '<', $filename) or die; 646 647 # there may be multiple lines starting ECPG: and then multiple lines of code. 648 # the code need to be add to all prior ECPG records. 649 my (@needsRules, @code, $record); 650 651 # there may be comments before the first ECPG line, skip them 652 my $skip = 1; 653 while (<$fh>) 654 { 655 if (/^ECPG:\s(\S+)\s?(\w+)?/) 656 { 657 $skip = 0; 658 if (@code) 659 { 660 for my $x (@needsRules) 661 { 662 push(@{ $x->{lines} }, @code); 663 } 664 @code = (); 665 @needsRules = (); 666 } 667 $record = {}; 668 $record->{type} = $2; 669 $record->{lines} = []; 670 if (exists $addons{$1}) { die "Ga! there are dups!\n"; } 671 $addons{$1} = $record; 672 push(@needsRules, $record); 673 } 674 else 675 { 676 next if $skip; 677 push(@code, $_); 678 } 679 } 680 close($fh); 681 if (@code) 682 { 683 for my $x (@needsRules) 684 { 685 push(@{ $x->{lines} }, @code); 686 } 687 } 688 return; 689} 690