1#!/usr/bin/perl
2
3# Check that the keyword lists in gram.y and kwlist.h are sane.
4# Usage: check_keywords.pl gram.y kwlist.h
5
6# src/backend/parser/check_keywords.pl
7# Copyright (c) 2009-2021, PostgreSQL Global Development Group
8
9use strict;
10use warnings;
11
12my $gram_filename   = $ARGV[0];
13my $kwlist_filename = $ARGV[1];
14
15my $errors = 0;
16
17sub error
18{
19	print STDERR @_;
20	$errors = 1;
21	return;
22}
23
24# Check alphabetical order of a set of keyword symbols
25# (note these are NOT the actual keyword strings)
26sub check_alphabetical_order
27{
28	my ($listname, $list) = @_;
29	my $prevkword = '';
30
31	foreach my $kword (@$list)
32	{
33		# Some symbols have a _P suffix. Remove it for the comparison.
34		my $bare_kword = $kword;
35		$bare_kword =~ s/_P$//;
36		if ($bare_kword le $prevkword)
37		{
38			error
39			  "'$bare_kword' after '$prevkword' in $listname list is misplaced";
40		}
41		$prevkword = $bare_kword;
42	}
43	return;
44}
45
46$, = ' ';     # set output field separator
47$\ = "\n";    # set output record separator
48
49my %keyword_categories;
50$keyword_categories{'unreserved_keyword'}     = 'UNRESERVED_KEYWORD';
51$keyword_categories{'col_name_keyword'}       = 'COL_NAME_KEYWORD';
52$keyword_categories{'type_func_name_keyword'} = 'TYPE_FUNC_NAME_KEYWORD';
53$keyword_categories{'reserved_keyword'}       = 'RESERVED_KEYWORD';
54
55open(my $gram, '<', $gram_filename) || die("Could not open : $gram_filename");
56
57my $kcat;
58my $in_bare_labels;
59my $comment;
60my @arr;
61my %keywords;
62my @bare_label_keywords;
63
64line: while (my $S = <$gram>)
65{
66	chomp $S;    # strip record separator
67
68	my $s;
69
70	# Make sure any braces are split
71	$s = '{', $S =~ s/$s/ { /g;
72	$s = '}', $S =~ s/$s/ } /g;
73
74	# Any comments are split
75	$s = '[/][*]', $S =~ s#$s# /* #g;
76	$s = '[*][/]', $S =~ s#$s# */ #g;
77
78	if (!($kcat) && !($in_bare_labels))
79	{
80
81		# Is this the beginning of a keyword list?
82		foreach my $k (keys %keyword_categories)
83		{
84			if ($S =~ m/^($k):/)
85			{
86				$kcat = $k;
87				next line;
88			}
89		}
90
91		# Is this the beginning of the bare_label_keyword list?
92		$in_bare_labels = 1 if ($S =~ m/^bare_label_keyword:/);
93
94		next line;
95	}
96
97	# Now split the line into individual fields
98	my $n = (@arr = split(' ', $S));
99
100	# Ok, we're in a keyword list. Go through each field in turn
101	for (my $fieldIndexer = 0; $fieldIndexer < $n; $fieldIndexer++)
102	{
103		if ($arr[$fieldIndexer] eq '*/' && $comment)
104		{
105			$comment = 0;
106			next;
107		}
108		elsif ($comment)
109		{
110			next;
111		}
112		elsif ($arr[$fieldIndexer] eq '/*')
113		{
114
115			# start of a multiline comment
116			$comment = 1;
117			next;
118		}
119		elsif ($arr[$fieldIndexer] eq '//')
120		{
121			next line;
122		}
123
124		if ($arr[$fieldIndexer] eq ';')
125		{
126
127			# end of keyword list
128			undef $kcat;
129			undef $in_bare_labels;
130			next;
131		}
132
133		if ($arr[$fieldIndexer] eq '|')
134		{
135			next;
136		}
137
138		# Put this keyword into the right list
139		if ($in_bare_labels)
140		{
141			push @bare_label_keywords, $arr[$fieldIndexer];
142		}
143		else
144		{
145			push @{ $keywords{$kcat} }, $arr[$fieldIndexer];
146		}
147	}
148}
149close $gram;
150
151# Check that each keyword list is in alphabetical order (just for neatnik-ism)
152check_alphabetical_order($_, $keywords{$_}) for (keys %keyword_categories);
153check_alphabetical_order('bare_label_keyword', \@bare_label_keywords);
154
155# Transform the keyword lists into hashes.
156# kwhashes is a hash of hashes, keyed by keyword category id,
157# e.g. UNRESERVED_KEYWORD.
158# Each inner hash is keyed by keyword id, e.g. ABORT_P, with a dummy value.
159my %kwhashes;
160while (my ($kcat, $kcat_id) = each(%keyword_categories))
161{
162	@arr = @{ $keywords{$kcat} };
163
164	my $hash;
165	foreach my $item (@arr) { $hash->{$item} = 1; }
166
167	$kwhashes{$kcat_id} = $hash;
168}
169my %bare_label_keywords = map { $_ => 1 } @bare_label_keywords;
170
171# Now read in kwlist.h
172
173open(my $kwlist, '<', $kwlist_filename)
174  || die("Could not open : $kwlist_filename");
175
176my $prevkwstring = '';
177my $bare_kwname;
178my %kwhash;
179kwlist_line: while (<$kwlist>)
180{
181	my ($line) = $_;
182
183	if ($line =~ /^PG_KEYWORD\(\"(.*)\", (.*), (.*), (.*)\)/)
184	{
185		my ($kwstring) = $1;
186		my ($kwname)   = $2;
187		my ($kwcat_id) = $3;
188		my ($collabel) = $4;
189
190		# Check that the list is in alphabetical order (critical!)
191		if ($kwstring le $prevkwstring)
192		{
193			error
194			  "'$kwstring' after '$prevkwstring' in kwlist.h is misplaced";
195		}
196		$prevkwstring = $kwstring;
197
198		# Check that the keyword string is valid: all lower-case ASCII chars
199		if ($kwstring !~ /^[a-z_]+$/)
200		{
201			error
202			  "'$kwstring' is not a valid keyword string, must be all lower-case ASCII chars";
203		}
204
205		# Check that the keyword name is valid: all upper-case ASCII chars
206		if ($kwname !~ /^[A-Z_]+$/)
207		{
208			error
209			  "'$kwname' is not a valid keyword name, must be all upper-case ASCII chars";
210		}
211
212		# Check that the keyword string matches keyword name
213		$bare_kwname = $kwname;
214		$bare_kwname =~ s/_P$//;
215		if ($bare_kwname ne uc($kwstring))
216		{
217			error
218			  "keyword name '$kwname' doesn't match keyword string '$kwstring'";
219		}
220
221		# Check that the keyword is present in the right category list
222		%kwhash = %{ $kwhashes{$kwcat_id} };
223
224		if (!(%kwhash))
225		{
226			error "Unknown keyword category: $kwcat_id";
227		}
228		else
229		{
230			if (!($kwhash{$kwname}))
231			{
232				error "'$kwname' not present in $kwcat_id section of gram.y";
233			}
234			else
235			{
236
237				# Remove it from the hash, so that we can
238				# complain at the end if there's keywords left
239				# that were not found in kwlist.h
240				delete $kwhashes{$kwcat_id}->{$kwname};
241			}
242		}
243
244		# Check that the keyword's collabel property matches gram.y
245		if ($collabel eq 'BARE_LABEL')
246		{
247			unless ($bare_label_keywords{$kwname})
248			{
249				error
250				  "'$kwname' is marked as BARE_LABEL in kwlist.h, but it is missing from gram.y's bare_label_keyword rule";
251			}
252		}
253		elsif ($collabel eq 'AS_LABEL')
254		{
255			if ($bare_label_keywords{$kwname})
256			{
257				error
258				  "'$kwname' is marked as AS_LABEL in kwlist.h, but it is listed in gram.y's bare_label_keyword rule";
259			}
260		}
261		else
262		{
263			error
264			  "'$collabel' not recognized in kwlist.h.  Expected either 'BARE_LABEL' or 'AS_LABEL'";
265		}
266	}
267}
268close $kwlist;
269
270# Check that we've paired up all keywords from gram.y with lines in kwlist.h
271while (my ($kwcat, $kwcat_id) = each(%keyword_categories))
272{
273	%kwhash = %{ $kwhashes{$kwcat_id} };
274
275	for my $kw (keys %kwhash)
276	{
277		error "'$kw' found in gram.y $kwcat category, but not in kwlist.h";
278	}
279}
280
281exit $errors;
282