1#!/usr/bin/perl 2 3# Check that the keyword lists in gram.y and kwlist.h are sane. 4# Usage: check_keywords.pl gram.y kwlist.h 5 6# src/backend/parser/check_keywords.pl 7# Copyright (c) 2009-2021, PostgreSQL Global Development Group 8 9use strict; 10use warnings; 11 12my $gram_filename = $ARGV[0]; 13my $kwlist_filename = $ARGV[1]; 14 15my $errors = 0; 16 17sub error 18{ 19 print STDERR @_; 20 $errors = 1; 21 return; 22} 23 24# Check alphabetical order of a set of keyword symbols 25# (note these are NOT the actual keyword strings) 26sub check_alphabetical_order 27{ 28 my ($listname, $list) = @_; 29 my $prevkword = ''; 30 31 foreach my $kword (@$list) 32 { 33 # Some symbols have a _P suffix. Remove it for the comparison. 34 my $bare_kword = $kword; 35 $bare_kword =~ s/_P$//; 36 if ($bare_kword le $prevkword) 37 { 38 error 39 "'$bare_kword' after '$prevkword' in $listname list is misplaced"; 40 } 41 $prevkword = $bare_kword; 42 } 43 return; 44} 45 46$, = ' '; # set output field separator 47$\ = "\n"; # set output record separator 48 49my %keyword_categories; 50$keyword_categories{'unreserved_keyword'} = 'UNRESERVED_KEYWORD'; 51$keyword_categories{'col_name_keyword'} = 'COL_NAME_KEYWORD'; 52$keyword_categories{'type_func_name_keyword'} = 'TYPE_FUNC_NAME_KEYWORD'; 53$keyword_categories{'reserved_keyword'} = 'RESERVED_KEYWORD'; 54 55open(my $gram, '<', $gram_filename) || die("Could not open : $gram_filename"); 56 57my $kcat; 58my $in_bare_labels; 59my $comment; 60my @arr; 61my %keywords; 62my @bare_label_keywords; 63 64line: while (my $S = <$gram>) 65{ 66 chomp $S; # strip record separator 67 68 my $s; 69 70 # Make sure any braces are split 71 $s = '{', $S =~ s/$s/ { /g; 72 $s = '}', $S =~ s/$s/ } /g; 73 74 # Any comments are split 75 $s = '[/][*]', $S =~ s#$s# /* #g; 76 $s = '[*][/]', $S =~ s#$s# */ #g; 77 78 if (!($kcat) && !($in_bare_labels)) 79 { 80 81 # Is this the beginning of a keyword list? 82 foreach my $k (keys %keyword_categories) 83 { 84 if ($S =~ m/^($k):/) 85 { 86 $kcat = $k; 87 next line; 88 } 89 } 90 91 # Is this the beginning of the bare_label_keyword list? 92 $in_bare_labels = 1 if ($S =~ m/^bare_label_keyword:/); 93 94 next line; 95 } 96 97 # Now split the line into individual fields 98 my $n = (@arr = split(' ', $S)); 99 100 # Ok, we're in a keyword list. Go through each field in turn 101 for (my $fieldIndexer = 0; $fieldIndexer < $n; $fieldIndexer++) 102 { 103 if ($arr[$fieldIndexer] eq '*/' && $comment) 104 { 105 $comment = 0; 106 next; 107 } 108 elsif ($comment) 109 { 110 next; 111 } 112 elsif ($arr[$fieldIndexer] eq '/*') 113 { 114 115 # start of a multiline comment 116 $comment = 1; 117 next; 118 } 119 elsif ($arr[$fieldIndexer] eq '//') 120 { 121 next line; 122 } 123 124 if ($arr[$fieldIndexer] eq ';') 125 { 126 127 # end of keyword list 128 undef $kcat; 129 undef $in_bare_labels; 130 next; 131 } 132 133 if ($arr[$fieldIndexer] eq '|') 134 { 135 next; 136 } 137 138 # Put this keyword into the right list 139 if ($in_bare_labels) 140 { 141 push @bare_label_keywords, $arr[$fieldIndexer]; 142 } 143 else 144 { 145 push @{ $keywords{$kcat} }, $arr[$fieldIndexer]; 146 } 147 } 148} 149close $gram; 150 151# Check that each keyword list is in alphabetical order (just for neatnik-ism) 152check_alphabetical_order($_, $keywords{$_}) for (keys %keyword_categories); 153check_alphabetical_order('bare_label_keyword', \@bare_label_keywords); 154 155# Transform the keyword lists into hashes. 156# kwhashes is a hash of hashes, keyed by keyword category id, 157# e.g. UNRESERVED_KEYWORD. 158# Each inner hash is keyed by keyword id, e.g. ABORT_P, with a dummy value. 159my %kwhashes; 160while (my ($kcat, $kcat_id) = each(%keyword_categories)) 161{ 162 @arr = @{ $keywords{$kcat} }; 163 164 my $hash; 165 foreach my $item (@arr) { $hash->{$item} = 1; } 166 167 $kwhashes{$kcat_id} = $hash; 168} 169my %bare_label_keywords = map { $_ => 1 } @bare_label_keywords; 170 171# Now read in kwlist.h 172 173open(my $kwlist, '<', $kwlist_filename) 174 || die("Could not open : $kwlist_filename"); 175 176my $prevkwstring = ''; 177my $bare_kwname; 178my %kwhash; 179kwlist_line: while (<$kwlist>) 180{ 181 my ($line) = $_; 182 183 if ($line =~ /^PG_KEYWORD\(\"(.*)\", (.*), (.*), (.*)\)/) 184 { 185 my ($kwstring) = $1; 186 my ($kwname) = $2; 187 my ($kwcat_id) = $3; 188 my ($collabel) = $4; 189 190 # Check that the list is in alphabetical order (critical!) 191 if ($kwstring le $prevkwstring) 192 { 193 error 194 "'$kwstring' after '$prevkwstring' in kwlist.h is misplaced"; 195 } 196 $prevkwstring = $kwstring; 197 198 # Check that the keyword string is valid: all lower-case ASCII chars 199 if ($kwstring !~ /^[a-z_]+$/) 200 { 201 error 202 "'$kwstring' is not a valid keyword string, must be all lower-case ASCII chars"; 203 } 204 205 # Check that the keyword name is valid: all upper-case ASCII chars 206 if ($kwname !~ /^[A-Z_]+$/) 207 { 208 error 209 "'$kwname' is not a valid keyword name, must be all upper-case ASCII chars"; 210 } 211 212 # Check that the keyword string matches keyword name 213 $bare_kwname = $kwname; 214 $bare_kwname =~ s/_P$//; 215 if ($bare_kwname ne uc($kwstring)) 216 { 217 error 218 "keyword name '$kwname' doesn't match keyword string '$kwstring'"; 219 } 220 221 # Check that the keyword is present in the right category list 222 %kwhash = %{ $kwhashes{$kwcat_id} }; 223 224 if (!(%kwhash)) 225 { 226 error "Unknown keyword category: $kwcat_id"; 227 } 228 else 229 { 230 if (!($kwhash{$kwname})) 231 { 232 error "'$kwname' not present in $kwcat_id section of gram.y"; 233 } 234 else 235 { 236 237 # Remove it from the hash, so that we can 238 # complain at the end if there's keywords left 239 # that were not found in kwlist.h 240 delete $kwhashes{$kwcat_id}->{$kwname}; 241 } 242 } 243 244 # Check that the keyword's collabel property matches gram.y 245 if ($collabel eq 'BARE_LABEL') 246 { 247 unless ($bare_label_keywords{$kwname}) 248 { 249 error 250 "'$kwname' is marked as BARE_LABEL in kwlist.h, but it is missing from gram.y's bare_label_keyword rule"; 251 } 252 } 253 elsif ($collabel eq 'AS_LABEL') 254 { 255 if ($bare_label_keywords{$kwname}) 256 { 257 error 258 "'$kwname' is marked as AS_LABEL in kwlist.h, but it is listed in gram.y's bare_label_keyword rule"; 259 } 260 } 261 else 262 { 263 error 264 "'$collabel' not recognized in kwlist.h. Expected either 'BARE_LABEL' or 'AS_LABEL'"; 265 } 266 } 267} 268close $kwlist; 269 270# Check that we've paired up all keywords from gram.y with lines in kwlist.h 271while (my ($kwcat, $kwcat_id) = each(%keyword_categories)) 272{ 273 %kwhash = %{ $kwhashes{$kwcat_id} }; 274 275 for my $kw (keys %kwhash) 276 { 277 error "'$kw' found in gram.y $kwcat category, but not in kwlist.h"; 278 } 279} 280 281exit $errors; 282