1#!/usr/bin/perl
2# src/interfaces/ecpg/preproc/parse.pl
3# parser generater for ecpg version 2
4# call with backend parser as stdin
5#
6# Copyright (c) 2007-2018, PostgreSQL Global Development Group
7#
8# Written by Mike Aubury <mike.aubury@aubit.com>
9#            Michael Meskes <meskes@postgresql.org>
10#            Andy Colson <andy@squeakycode.net>
11#
12# Placed under the same license as PostgreSQL.
13#
14
15use strict;
16use warnings;
17no warnings 'uninitialized';
18
19my $path = shift @ARGV;
20$path = "." unless $path;
21
22my $copymode              = 0;
23my $brace_indent          = 0;
24my $yaccmode              = 0;
25my $in_rule               = 0;
26my $header_included       = 0;
27my $feature_not_supported = 0;
28my $tokenmode             = 0;
29
30my (%buff, $infield, $comment, %tokens, %addons);
31my ($stmt_mode, @fields);
32my ($line,      $non_term_id);
33
34
35# some token have to be replaced by other symbols
36# either in the rule
37my %replace_token = (
38	'BCONST' => 'ecpg_bconst',
39	'FCONST' => 'ecpg_fconst',
40	'Sconst' => 'ecpg_sconst',
41	'IDENT'  => 'ecpg_ident',
42	'PARAM'  => 'ecpg_param',);
43
44# or in the block
45my %replace_string = (
46	'NOT_LA'         => 'not',
47	'NULLS_LA'       => 'nulls',
48	'WITH_LA'        => 'with',
49	'TYPECAST'       => '::',
50	'DOT_DOT'        => '..',
51	'COLON_EQUALS'   => ':=',
52	'EQUALS_GREATER' => '=>',
53	'LESS_EQUALS'    => '<=',
54	'GREATER_EQUALS' => '>=',
55	'NOT_EQUALS'     => '<>',);
56
57# specific replace_types for specific non-terminals - never include the ':'
58# ECPG-only replace_types are defined in ecpg-replace_types
59my %replace_types = (
60	'PrepareStmt'      => '<prep>',
61	'opt_array_bounds' => '<index>',
62
63	# "ignore" means: do not create type and rules for this non-term-id
64	'stmtblock'          => 'ignore',
65	'stmtmulti'          => 'ignore',
66	'CreateAsStmt'       => 'ignore',
67	'DeallocateStmt'     => 'ignore',
68	'ColId'              => 'ignore',
69	'type_function_name' => 'ignore',
70	'ColLabel'           => 'ignore',
71	'Sconst'             => 'ignore',);
72
73# these replace_line commands excise certain keywords from the core keyword
74# lists.  Be sure to account for these in ColLabel and related productions.
75my %replace_line = (
76	'unreserved_keywordCONNECTION' => 'ignore',
77	'unreserved_keywordCURRENT_P'  => 'ignore',
78	'unreserved_keywordDAY_P'      => 'ignore',
79	'unreserved_keywordHOUR_P'     => 'ignore',
80	'unreserved_keywordINPUT_P'    => 'ignore',
81	'unreserved_keywordMINUTE_P'   => 'ignore',
82	'unreserved_keywordMONTH_P'    => 'ignore',
83	'unreserved_keywordSECOND_P'   => 'ignore',
84	'unreserved_keywordYEAR_P'     => 'ignore',
85	'col_name_keywordCHAR_P'       => 'ignore',
86	'col_name_keywordINT_P'        => 'ignore',
87	'col_name_keywordVALUES'       => 'ignore',
88	'reserved_keywordTO'           => 'ignore',
89	'reserved_keywordUNION'        => 'ignore',
90
91	# some other production rules have to be ignored or replaced
92	'fetch_argsFORWARDopt_from_incursor_name'      => 'ignore',
93	'fetch_argsBACKWARDopt_from_incursor_name'     => 'ignore',
94	"opt_array_boundsopt_array_bounds'['Iconst']'" => 'ignore',
95	'VariableShowStmtSHOWvar_name' => 'SHOW var_name ecpg_into',
96	'VariableShowStmtSHOWTIMEZONE' => 'SHOW TIME ZONE ecpg_into',
97	'VariableShowStmtSHOWTRANSACTIONISOLATIONLEVEL' =>
98	  'SHOW TRANSACTION ISOLATION LEVEL ecpg_into',
99	'VariableShowStmtSHOWSESSIONAUTHORIZATION' =>
100	  'SHOW SESSION AUTHORIZATION ecpg_into',
101	'returning_clauseRETURNINGtarget_list' =>
102	  'RETURNING target_list opt_ecpg_into',
103	'ExecuteStmtEXECUTEnameexecute_param_clause' =>
104	  'EXECUTE prepared_name execute_param_clause execute_rest',
105	'ExecuteStmtCREATEOptTempTABLEcreate_as_targetASEXECUTEnameexecute_param_clause'
106	  => 'CREATE OptTemp TABLE create_as_target AS EXECUTE prepared_name execute_param_clause',
107	'PrepareStmtPREPAREnameprep_type_clauseASPreparableStmt' =>
108	  'PREPARE prepared_name prep_type_clause AS PreparableStmt',
109	'var_nameColId' => 'ECPGColId',);
110
111preload_addons();
112
113main();
114
115dump_buffer('header');
116dump_buffer('tokens');
117dump_buffer('types');
118dump_buffer('ecpgtype');
119dump_buffer('orig_tokens');
120print '%%',                "\n";
121print 'prog: statements;', "\n";
122dump_buffer('rules');
123include_file('trailer', 'ecpg.trailer');
124dump_buffer('trailer');
125
126sub main
127{
128  line: while (<>)
129	{
130		if (/ERRCODE_FEATURE_NOT_SUPPORTED/)
131		{
132			$feature_not_supported = 1;
133			next line;
134		}
135
136		chomp;
137
138		# comment out the line below to make the result file match (blank line wise)
139		# the prior version.
140		#next if ($_ eq '');
141
142		# Dump the action for a rule -
143		# stmt_mode indicates if we are processing the 'stmt:'
144		# rule (mode==0 means normal,  mode==1 means stmt:)
145		# flds are the fields to use. These may start with a '$' - in
146		# which case they are the result of a previous non-terminal
147		#
148		# if they don't start with a '$' then they are token name
149		#
150		# len is the number of fields in flds...
151		# leadin is the padding to apply at the beginning (just use for formatting)
152
153		if (/^%%/)
154		{
155			$tokenmode = 2;
156			$copymode  = 1;
157			$yaccmode++;
158			$infield = 0;
159		}
160
161		my $prec = 0;
162
163		# Make sure any braces are split
164		s/{/ { /g;
165		s/}/ } /g;
166
167		# Any comments are split
168		s|\/\*| /* |g;
169		s|\*\/| */ |g;
170
171		# Now split the line into individual fields
172		my @arr = split(' ');
173
174		if ($arr[0] eq '%token' && $tokenmode == 0)
175		{
176			$tokenmode = 1;
177			include_file('tokens', 'ecpg.tokens');
178		}
179		elsif ($arr[0] eq '%type' && $header_included == 0)
180		{
181			include_file('header',   'ecpg.header');
182			include_file('ecpgtype', 'ecpg.type');
183			$header_included = 1;
184		}
185
186		if ($tokenmode == 1)
187		{
188			my $str   = '';
189			my $prior = '';
190			for my $a (@arr)
191			{
192				if ($a eq '/*')
193				{
194					$comment++;
195					next;
196				}
197				if ($a eq '*/')
198				{
199					$comment--;
200					next;
201				}
202				if ($comment)
203				{
204					next;
205				}
206				if (substr($a, 0, 1) eq '<')
207				{
208					next;
209
210					# its a type
211				}
212				$tokens{$a} = 1;
213
214				$str = $str . ' ' . $a;
215				if ($a eq 'IDENT' && $prior eq '%nonassoc')
216				{
217
218					# add two more tokens to the list
219					$str = $str . "\n%nonassoc CSTRING\n%nonassoc UIDENT";
220				}
221				$prior = $a;
222			}
223			add_to_buffer('orig_tokens', $str);
224			next line;
225		}
226
227		# Don't worry about anything if we're not in the right section of gram.y
228		if ($yaccmode != 1)
229		{
230			next line;
231		}
232
233
234		# Go through each field in turn
235		for (
236			my $fieldIndexer = 0;
237			$fieldIndexer < scalar(@arr);
238			$fieldIndexer++)
239		{
240			if ($arr[$fieldIndexer] eq '*/' && $comment)
241			{
242				$comment = 0;
243				next;
244			}
245			elsif ($comment)
246			{
247				next;
248			}
249			elsif ($arr[$fieldIndexer] eq '/*')
250			{
251
252				# start of a multiline comment
253				$comment = 1;
254				next;
255			}
256			elsif ($arr[$fieldIndexer] eq '//')
257			{
258				next line;
259			}
260			elsif ($arr[$fieldIndexer] eq '}')
261			{
262				$brace_indent--;
263				next;
264			}
265			elsif ($arr[$fieldIndexer] eq '{')
266			{
267				$brace_indent++;
268				next;
269			}
270
271			if ($brace_indent > 0)
272			{
273				next;
274			}
275			if ($arr[$fieldIndexer] eq ';')
276			{
277				if ($copymode)
278				{
279					if ($infield)
280					{
281						dump_line($stmt_mode, \@fields);
282					}
283					add_to_buffer('rules', ";\n\n");
284				}
285				else
286				{
287					$copymode = 1;
288				}
289				@fields  = ();
290				$infield = 0;
291				$line    = '';
292				$in_rule = 0;
293				next;
294			}
295
296			if ($arr[$fieldIndexer] eq '|')
297			{
298				if ($copymode)
299				{
300					if ($infield)
301					{
302						$infield = $infield + dump_line($stmt_mode, \@fields);
303					}
304					if ($infield > 1)
305					{
306						$line = '| ';
307					}
308				}
309				@fields = ();
310				next;
311			}
312
313			if (exists $replace_token{ $arr[$fieldIndexer] })
314			{
315				$arr[$fieldIndexer] = $replace_token{ $arr[$fieldIndexer] };
316			}
317
318			# Are we looking at a declaration of a non-terminal ?
319			if (($arr[$fieldIndexer] =~ /[A-Za-z0-9]+:/)
320				|| $arr[ $fieldIndexer + 1 ] eq ':')
321			{
322				$non_term_id = $arr[$fieldIndexer];
323				$non_term_id =~ tr/://d;
324
325				if (not defined $replace_types{$non_term_id})
326				{
327					$replace_types{$non_term_id} = '<str>';
328					$copymode = 1;
329				}
330				elsif ($replace_types{$non_term_id} eq 'ignore')
331				{
332					$copymode = 0;
333					$line     = '';
334					next line;
335				}
336				$line = $line . ' ' . $arr[$fieldIndexer];
337
338				# Do we have the : attached already ?
339				# If yes, we'll have already printed the ':'
340				if (!($arr[$fieldIndexer] =~ '[A-Za-z0-9]+:'))
341				{
342
343					# Consume the ':' which is next...
344					$line = $line . ':';
345					$fieldIndexer++;
346				}
347
348				# Special mode?
349				if ($non_term_id eq 'stmt')
350				{
351					$stmt_mode = 1;
352				}
353				else
354				{
355					$stmt_mode = 0;
356				}
357				my $tstr =
358				    '%type '
359				  . $replace_types{$non_term_id} . ' '
360				  . $non_term_id;
361				add_to_buffer('types', $tstr);
362
363				if ($copymode)
364				{
365					add_to_buffer('rules', $line);
366				}
367				$line    = '';
368				@fields  = ();
369				$infield = 1;
370				die "unterminated rule at grammar line $.\n"
371				  if $in_rule;
372				$in_rule = 1;
373				next;
374			}
375			elsif ($copymode)
376			{
377				$line = $line . ' ' . $arr[$fieldIndexer];
378			}
379			if ($arr[$fieldIndexer] eq '%prec')
380			{
381				$prec = 1;
382				next;
383			}
384
385			if (   $copymode
386				&& !$prec
387				&& !$comment
388				&& length($arr[$fieldIndexer])
389				&& $infield)
390			{
391				if ($arr[$fieldIndexer] ne 'Op'
392					&& (   $tokens{ $arr[$fieldIndexer] } > 0
393						|| $arr[$fieldIndexer] =~ /'.+'/)
394					|| $stmt_mode == 1)
395				{
396					my $S;
397					if (exists $replace_string{ $arr[$fieldIndexer] })
398					{
399						$S = $replace_string{ $arr[$fieldIndexer] };
400					}
401					else
402					{
403						$S = $arr[$fieldIndexer];
404					}
405					$S =~ s/_P//g;
406					$S =~ tr/'//d;
407					if ($stmt_mode == 1)
408					{
409						push(@fields, $S);
410					}
411					else
412					{
413						push(@fields, lc($S));
414					}
415				}
416				else
417				{
418					push(@fields, '$' . (scalar(@fields) + 1));
419				}
420			}
421		}
422	}
423	die "unterminated rule at end of grammar\n"
424	  if $in_rule;
425	return;
426}
427
428
429# append a file onto a buffer.
430# Arguments:  buffer_name, filename (without path)
431sub include_file
432{
433	my ($buffer, $filename) = @_;
434	my $full = "$path/$filename";
435	open(my $fh, '<', $full) or die;
436	while (<$fh>)
437	{
438		chomp;
439		add_to_buffer($buffer, $_);
440	}
441	close($fh);
442	return;
443}
444
445sub include_addon
446{
447	my ($buffer, $block, $fields, $stmt_mode) = @_;
448	my $rec = $addons{$block};
449	return 0 unless $rec;
450
451	if ($rec->{type} eq 'rule')
452	{
453		dump_fields($stmt_mode, $fields, ' { ');
454	}
455	elsif ($rec->{type} eq 'addon')
456	{
457		add_to_buffer('rules', ' { ');
458	}
459
460	#add_to_buffer( $stream, $_ );
461	#We have an array to add to the buffer, we'll add it ourself instead of
462	#calling add_to_buffer, which does not know about arrays
463
464	push(@{ $buff{$buffer} }, @{ $rec->{lines} });
465
466	if ($rec->{type} eq 'addon')
467	{
468		dump_fields($stmt_mode, $fields, '');
469	}
470
471
472	# if we added something (ie there are lines in our array), return 1
473	return 1 if (scalar(@{ $rec->{lines} }) > 0);
474	return 0;
475}
476
477
478# include_addon does this same thing, but does not call this
479# sub... so if you change this, you need to fix include_addon too
480#   Pass:  buffer_name, string_to_append
481sub add_to_buffer
482{
483	push(@{ $buff{ $_[0] } }, "$_[1]\n");
484	return;
485}
486
487sub dump_buffer
488{
489	my ($buffer) = @_;
490	print '/* ', $buffer, ' */', "\n";
491	my $ref = $buff{$buffer};
492	print @$ref;
493	return;
494}
495
496sub dump_fields
497{
498	my ($mode, $flds, $ln) = @_;
499	my $len = scalar(@$flds);
500
501	if ($mode == 0)
502	{
503
504		#Normal
505		add_to_buffer('rules', $ln);
506		if ($feature_not_supported == 1)
507		{
508
509			# we found an unsupported feature, but we have to
510			# filter out ExecuteStmt: CREATE OptTemp TABLE ...
511			# because the warning there is only valid in some situations
512			if ($flds->[0] ne 'create' || $flds->[2] ne 'table')
513			{
514				add_to_buffer('rules',
515					'mmerror(PARSE_ERROR, ET_WARNING, "unsupported feature will be passed to server");'
516				);
517			}
518			$feature_not_supported = 0;
519		}
520
521		if ($len == 0)
522		{
523
524			# We have no fields ?
525			add_to_buffer('rules', ' $$=EMPTY; }');
526		}
527		else
528		{
529
530			# Go through each field and try to 'aggregate' the tokens
531			# into a single 'mm_strdup' where possible
532			my @flds_new;
533			my $str;
534			for (my $z = 0; $z < $len; $z++)
535			{
536				if (substr($flds->[$z], 0, 1) eq '$')
537				{
538					push(@flds_new, $flds->[$z]);
539					next;
540				}
541
542				$str = $flds->[$z];
543
544				while (1)
545				{
546					if ($z >= $len - 1
547						|| substr($flds->[ $z + 1 ], 0, 1) eq '$')
548					{
549
550						# We're at the end...
551						push(@flds_new, "mm_strdup(\"$str\")");
552						last;
553					}
554					$z++;
555					$str = $str . ' ' . $flds->[$z];
556				}
557			}
558
559			# So - how many fields did we end up with ?
560			$len = scalar(@flds_new);
561			if ($len == 1)
562			{
563
564				# Straight assignment
565				$str = ' $$ = ' . $flds_new[0] . ';';
566				add_to_buffer('rules', $str);
567			}
568			else
569			{
570
571				# Need to concatenate the results to form
572				# our final string
573				$str =
574				  ' $$ = cat_str(' . $len . ',' . join(',', @flds_new) . ');';
575				add_to_buffer('rules', $str);
576			}
577			add_to_buffer('rules', '}');
578		}
579	}
580	else
581	{
582
583		# we're in the stmt: rule
584		if ($len)
585		{
586
587			# or just the statement ...
588			add_to_buffer('rules',
589				' { output_statement($1, 0, ECPGst_normal); }');
590		}
591		else
592		{
593			add_to_buffer('rules', ' { $$ = NULL; }');
594		}
595	}
596	return;
597}
598
599
600sub dump_line
601{
602	my ($stmt_mode, $fields) = @_;
603	my $block = $non_term_id . $line;
604	$block =~ tr/ |//d;
605	my $rep = $replace_line{$block};
606	if ($rep)
607	{
608		if ($rep eq 'ignore')
609		{
610			return 0;
611		}
612
613		if (index($line, '|') != -1)
614		{
615			$line = '| ' . $rep;
616		}
617		else
618		{
619			$line = $rep;
620		}
621		$block = $non_term_id . $line;
622		$block =~ tr/ |//d;
623	}
624	add_to_buffer('rules', $line);
625	my $i = include_addon('rules', $block, $fields, $stmt_mode);
626	if ($i == 0)
627	{
628		dump_fields($stmt_mode, $fields, ' { ');
629	}
630	return 1;
631}
632
633=top
634	load addons into cache
635	%addons = {
636		stmtClosePortalStmt => { 'type' => 'block', 'lines' => [ "{", "if (INFORMIX_MODE)" ..., "}" ] },
637		stmtViewStmt => { 'type' => 'rule', 'lines' => [ "| ECPGAllocateDescr", ... ] }
638	}
639
640=cut
641
642sub preload_addons
643{
644	my $filename = $path . "/ecpg.addons";
645	open(my $fh, '<', $filename) or die;
646
647	# there may be multiple lines starting ECPG: and then multiple lines of code.
648	# the code need to be add to all prior ECPG records.
649	my (@needsRules, @code, $record);
650
651	# there may be comments before the first ECPG line, skip them
652	my $skip = 1;
653	while (<$fh>)
654	{
655		if (/^ECPG:\s(\S+)\s?(\w+)?/)
656		{
657			$skip = 0;
658			if (@code)
659			{
660				for my $x (@needsRules)
661				{
662					push(@{ $x->{lines} }, @code);
663				}
664				@code       = ();
665				@needsRules = ();
666			}
667			$record          = {};
668			$record->{type}  = $2;
669			$record->{lines} = [];
670			if (exists $addons{$1}) { die "Ga! there are dups!\n"; }
671			$addons{$1} = $record;
672			push(@needsRules, $record);
673		}
674		else
675		{
676			next if $skip;
677			push(@code, $_);
678		}
679	}
680	close($fh);
681	if (@code)
682	{
683		for my $x (@needsRules)
684		{
685			push(@{ $x->{lines} }, @code);
686		}
687	}
688	return;
689}
690