1#---------------------------------------------------------------------- 2# 3# Catalog.pm 4# Perl module that extracts info from catalog files into Perl 5# data structures 6# 7# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group 8# Portions Copyright (c) 1994, Regents of the University of California 9# 10# src/backend/catalog/Catalog.pm 11# 12#---------------------------------------------------------------------- 13 14package Catalog; 15 16use strict; 17use warnings; 18 19use File::Compare; 20 21 22# Parses a catalog header file into a data structure describing the schema 23# of the catalog. 24sub ParseHeader 25{ 26 my $input_file = shift; 27 28 # There are a few types which are given one name in the C source, but a 29 # different name at the SQL level. These are enumerated here. 30 my %RENAME_ATTTYPE = ( 31 'int16' => 'int2', 32 'int32' => 'int4', 33 'int64' => 'int8', 34 'Oid' => 'oid', 35 'NameData' => 'name', 36 'TransactionId' => 'xid', 37 'XLogRecPtr' => 'pg_lsn'); 38 39 my %catalog; 40 my $declaring_attributes = 0; 41 my $is_varlen = 0; 42 my $is_client_code = 0; 43 44 $catalog{columns} = []; 45 $catalog{toasting} = []; 46 $catalog{indexing} = []; 47 $catalog{client_code} = []; 48 49 open(my $ifh, '<', $input_file) || die "$input_file: $!"; 50 51 # Scan the input file. 52 while (<$ifh>) 53 { 54 55 # Set appropriate flag when we're in certain code sections. 56 if (/^#/) 57 { 58 $is_varlen = 1 if /^#ifdef\s+CATALOG_VARLEN/; 59 if (/^#ifdef\s+EXPOSE_TO_CLIENT_CODE/) 60 { 61 $is_client_code = 1; 62 next; 63 } 64 next if !$is_client_code; 65 } 66 67 if (!$is_client_code) 68 { 69 # Strip C-style comments. 70 s;/\*(.|\n)*\*/;;g; 71 if (m;/\*;) 72 { 73 74 # handle multi-line comments properly. 75 my $next_line = <$ifh>; 76 die "$input_file: ends within C-style comment\n" 77 if !defined $next_line; 78 $_ .= $next_line; 79 redo; 80 } 81 82 # Strip useless whitespace and trailing semicolons. 83 chomp; 84 s/^\s+//; 85 s/;\s*$//; 86 s/\s+/ /g; 87 } 88 89 # Push the data into the appropriate data structure. 90 # Caution: when adding new recognized OID-defining macros, 91 # also update src/include/catalog/renumber_oids.pl. 92 if (/^DECLARE_TOAST\(\s*(\w+),\s*(\d+),\s*(\d+)\)/) 93 { 94 push @{ $catalog{toasting} }, 95 { parent_table => $1, toast_oid => $2, toast_index_oid => $3 }; 96 } 97 elsif (/^DECLARE_(UNIQUE_)?INDEX\(\s*(\w+),\s*(\d+),\s*(.+)\)/) 98 { 99 push @{ $catalog{indexing} }, 100 { 101 is_unique => $1 ? 1 : 0, 102 index_name => $2, 103 index_oid => $3, 104 index_decl => $4 105 }; 106 } 107 elsif (/^CATALOG\((\w+),(\d+),(\w+)\)/) 108 { 109 $catalog{catname} = $1; 110 $catalog{relation_oid} = $2; 111 $catalog{relation_oid_macro} = $3; 112 113 $catalog{bootstrap} = /BKI_BOOTSTRAP/ ? ' bootstrap' : ''; 114 $catalog{shared_relation} = 115 /BKI_SHARED_RELATION/ ? ' shared_relation' : ''; 116 if (/BKI_ROWTYPE_OID\((\d+),(\w+)\)/) 117 { 118 $catalog{rowtype_oid} = $1; 119 $catalog{rowtype_oid_clause} = " rowtype_oid $1"; 120 $catalog{rowtype_oid_macro} = $2; 121 } 122 else 123 { 124 $catalog{rowtype_oid} = ''; 125 $catalog{rowtype_oid_clause} = ''; 126 $catalog{rowtype_oid_macro} = ''; 127 } 128 $catalog{schema_macro} = /BKI_SCHEMA_MACRO/ ? 1 : 0; 129 $declaring_attributes = 1; 130 } 131 elsif ($is_client_code) 132 { 133 if (/^#endif/) 134 { 135 $is_client_code = 0; 136 } 137 else 138 { 139 push @{ $catalog{client_code} }, $_; 140 } 141 } 142 elsif ($declaring_attributes) 143 { 144 next if (/^{|^$/); 145 if (/^}/) 146 { 147 $declaring_attributes = 0; 148 } 149 else 150 { 151 my %column; 152 my @attopts = split /\s+/, $_; 153 my $atttype = shift @attopts; 154 my $attname = shift @attopts; 155 die "parse error ($input_file)" 156 unless ($attname and $atttype); 157 158 if (exists $RENAME_ATTTYPE{$atttype}) 159 { 160 $atttype = $RENAME_ATTTYPE{$atttype}; 161 } 162 163 # If the C name ends with '[]' or '[digits]', we have 164 # an array type, so we discard that from the name and 165 # prepend '_' to the type. 166 if ($attname =~ /(\w+)\[\d*\]/) 167 { 168 $attname = $1; 169 $atttype = '_' . $atttype; 170 } 171 172 $column{type} = $atttype; 173 $column{name} = $attname; 174 $column{is_varlen} = 1 if $is_varlen; 175 176 foreach my $attopt (@attopts) 177 { 178 if ($attopt eq 'BKI_FORCE_NULL') 179 { 180 $column{forcenull} = 1; 181 } 182 elsif ($attopt eq 'BKI_FORCE_NOT_NULL') 183 { 184 $column{forcenotnull} = 1; 185 } 186 187 # We use quotes for values like \0 and \054, to 188 # make sure all compilers and syntax highlighters 189 # can recognize them properly. 190 elsif ($attopt =~ /BKI_DEFAULT\(['"]?([^'"]+)['"]?\)/) 191 { 192 $column{default} = $1; 193 } 194 elsif ( 195 $attopt =~ /BKI_ARRAY_DEFAULT\(['"]?([^'"]+)['"]?\)/) 196 { 197 $column{array_default} = $1; 198 } 199 elsif ($attopt =~ /BKI_LOOKUP\((\w+)\)/) 200 { 201 $column{lookup} = $1; 202 } 203 else 204 { 205 die 206 "unknown or misformatted column option $attopt on column $attname"; 207 } 208 209 if ($column{forcenull} and $column{forcenotnull}) 210 { 211 die "$attname is forced both null and not null"; 212 } 213 } 214 push @{ $catalog{columns} }, \%column; 215 } 216 } 217 } 218 close $ifh; 219 return \%catalog; 220} 221 222# Parses a file containing Perl data structure literals, returning live data. 223# 224# The parameter $preserve_formatting needs to be set for callers that want 225# to work with non-data lines in the data files, such as comments and blank 226# lines. If a caller just wants to consume the data, leave it unset. 227sub ParseData 228{ 229 my ($input_file, $schema, $preserve_formatting) = @_; 230 231 open(my $ifd, '<', $input_file) || die "$input_file: $!"; 232 $input_file =~ /(\w+)\.dat$/ 233 or die "Input file $input_file needs to be a .dat file.\n"; 234 my $catname = $1; 235 my $data = []; 236 237 # Scan the input file. 238 while (<$ifd>) 239 { 240 my $hash_ref; 241 242 if (/{/) 243 { 244 # Capture the hash ref 245 # NB: Assumes that the next hash ref can't start on the 246 # same line where the present one ended. 247 # Not foolproof, but we shouldn't need a full parser, 248 # since we expect relatively well-behaved input. 249 250 # Quick hack to detect when we have a full hash ref to 251 # parse. We can't just use a regex because of values in 252 # pg_aggregate and pg_proc like '{0,0}'. This will need 253 # work if we ever need to allow unbalanced braces within 254 # a field value. 255 my $lcnt = tr/{//; 256 my $rcnt = tr/}//; 257 258 if ($lcnt == $rcnt) 259 { 260 # We're treating the input line as a piece of Perl, so we 261 # need to use string eval here. Tell perlcritic we know what 262 # we're doing. 263 eval '$hash_ref = ' . $_; ## no critic (ProhibitStringyEval) 264 if (!ref $hash_ref) 265 { 266 die "$input_file: error parsing line $.:\n$_\n"; 267 } 268 269 # Annotate each hash with the source line number. 270 $hash_ref->{line_number} = $.; 271 272 # Expand tuples to their full representation. 273 AddDefaultValues($hash_ref, $schema, $catname); 274 } 275 else 276 { 277 my $next_line = <$ifd>; 278 die "$input_file: file ends within Perl hash\n" 279 if !defined $next_line; 280 $_ .= $next_line; 281 redo; 282 } 283 } 284 285 # If we found a hash reference, keep it, unless it is marked as 286 # autogenerated; in that case it'd duplicate an entry we'll 287 # autogenerate below. (This makes it safe for reformat_dat_file.pl 288 # with --full-tuples to print autogenerated entries, which seems like 289 # useful behavior for debugging.) 290 # 291 # Only keep non-data strings if we are told to preserve formatting. 292 if (defined $hash_ref) 293 { 294 push @$data, $hash_ref if !$hash_ref->{autogenerated}; 295 } 296 elsif ($preserve_formatting) 297 { 298 push @$data, $_; 299 } 300 } 301 close $ifd; 302 303 # If this is pg_type, auto-generate array types too. 304 GenerateArrayTypes($schema, $data) if $catname eq 'pg_type'; 305 306 return $data; 307} 308 309# Fill in default values of a record using the given schema. 310# It's the caller's responsibility to specify other values beforehand. 311sub AddDefaultValues 312{ 313 my ($row, $schema, $catname) = @_; 314 my @missing_fields; 315 316 # Compute special-case column values. 317 # Note: If you add new cases here, you must also teach 318 # strip_default_values() in include/catalog/reformat_dat_file.pl 319 # to delete them. 320 if ($catname eq 'pg_proc') 321 { 322 # pg_proc.pronargs can be derived from proargtypes. 323 if (defined $row->{proargtypes}) 324 { 325 my @proargtypes = split /\s+/, $row->{proargtypes}; 326 $row->{pronargs} = scalar(@proargtypes); 327 } 328 } 329 330 # Now fill in defaults, and note any columns that remain undefined. 331 foreach my $column (@$schema) 332 { 333 my $attname = $column->{name}; 334 335 # No work if field already has a value. 336 next if defined $row->{$attname}; 337 338 # Ignore 'oid' columns, they're handled elsewhere. 339 next if $attname eq 'oid'; 340 341 # If column has a default value, fill that in. 342 if (defined $column->{default}) 343 { 344 $row->{$attname} = $column->{default}; 345 next; 346 } 347 348 # Failed to find a value for this field. 349 push @missing_fields, $attname; 350 } 351 352 # Failure to provide all columns is a hard error. 353 if (@missing_fields) 354 { 355 die sprintf "missing values for field(s) %s in %s.dat line %s\n", 356 join(', ', @missing_fields), $catname, $row->{line_number}; 357 } 358} 359 360# If a pg_type entry has an array_type_oid metadata field, 361# auto-generate an entry for its array type. 362sub GenerateArrayTypes 363{ 364 my $pgtype_schema = shift; 365 my $types = shift; 366 my @array_types; 367 368 foreach my $elem_type (@$types) 369 { 370 next if !(ref $elem_type eq 'HASH'); 371 next if !defined($elem_type->{array_type_oid}); 372 373 my %array_type; 374 375 # Set up metadata fields for array type. 376 $array_type{oid} = $elem_type->{array_type_oid}; 377 $array_type{autogenerated} = 1; 378 $array_type{line_number} = $elem_type->{line_number}; 379 380 # Set up column values derived from the element type. 381 $array_type{typname} = '_' . $elem_type->{typname}; 382 $array_type{typelem} = $elem_type->{typname}; 383 384 # Arrays require INT alignment, unless the element type requires 385 # DOUBLE alignment. 386 $array_type{typalign} = $elem_type->{typalign} eq 'd' ? 'd' : 'i'; 387 388 # Fill in the rest of the array entry's fields. 389 foreach my $column (@$pgtype_schema) 390 { 391 my $attname = $column->{name}; 392 393 # Skip if we already set it above. 394 next if defined $array_type{$attname}; 395 396 # Apply the BKI_ARRAY_DEFAULT setting if there is one, 397 # otherwise copy the field from the element type. 398 if (defined $column->{array_default}) 399 { 400 $array_type{$attname} = $column->{array_default}; 401 } 402 else 403 { 404 $array_type{$attname} = $elem_type->{$attname}; 405 } 406 } 407 408 # Lastly, cross-link the array to the element type. 409 $elem_type->{typarray} = $array_type{typname}; 410 411 push @array_types, \%array_type; 412 } 413 414 push @$types, @array_types; 415 416 return; 417} 418 419# Rename temporary files to final names. 420# Call this function with the final file name and the .tmp extension. 421# 422# If the final file already exists and has identical contents, don't 423# overwrite it; this behavior avoids unnecessary recompiles due to 424# updating the mod date on unchanged header files. 425# 426# Note: recommended extension is ".tmp$$", so that parallel make steps 427# can't use the same temp files. 428sub RenameTempFile 429{ 430 my $final_name = shift; 431 my $extension = shift; 432 my $temp_name = $final_name . $extension; 433 434 if (-f $final_name 435 && compare($temp_name, $final_name) == 0) 436 { 437 unlink($temp_name) || die "unlink: $temp_name: $!"; 438 } 439 else 440 { 441 rename($temp_name, $final_name) || die "rename: $temp_name: $!"; 442 } 443 return; 444} 445 446# Find a symbol defined in a particular header file and extract the value. 447# include_path should be the path to src/include/. 448sub FindDefinedSymbol 449{ 450 my ($catalog_header, $include_path, $symbol) = @_; 451 my $value; 452 453 # Make sure include path ends in a slash. 454 if (substr($include_path, -1) ne '/') 455 { 456 $include_path .= '/'; 457 } 458 my $file = $include_path . $catalog_header; 459 open(my $find_defined_symbol, '<', $file) || die "$file: $!"; 460 while (<$find_defined_symbol>) 461 { 462 if (/^#define\s+\Q$symbol\E\s+(\S+)/) 463 { 464 $value = $1; 465 last; 466 } 467 } 468 close $find_defined_symbol; 469 return $value if defined $value; 470 die "$file: no definition found for $symbol\n"; 471} 472 473# Similar to FindDefinedSymbol, but looks in the bootstrap metadata. 474sub FindDefinedSymbolFromData 475{ 476 my ($data, $symbol) = @_; 477 foreach my $row (@{$data}) 478 { 479 if ($row->{oid_symbol} eq $symbol) 480 { 481 return $row->{oid}; 482 } 483 } 484 die "no definition found for $symbol\n"; 485} 486 487# Extract an array of all the OIDs assigned in the specified catalog headers 488# and their associated data files (if any). 489# Caution: genbki.pl contains equivalent logic; change it too if you need to 490# touch this. 491sub FindAllOidsFromHeaders 492{ 493 my @input_files = @_; 494 495 my @oids = (); 496 497 foreach my $header (@input_files) 498 { 499 $header =~ /(.+)\.h$/ 500 or die "Input files need to be header files.\n"; 501 my $datfile = "$1.dat"; 502 503 my $catalog = Catalog::ParseHeader($header); 504 505 # We ignore the pg_class OID and rowtype OID of bootstrap catalogs, 506 # as those are expected to appear in the initial data for pg_class 507 # and pg_type. For regular catalogs, include these OIDs. 508 if (!$catalog->{bootstrap}) 509 { 510 push @oids, $catalog->{relation_oid} 511 if ($catalog->{relation_oid}); 512 push @oids, $catalog->{rowtype_oid} if ($catalog->{rowtype_oid}); 513 } 514 515 # Not all catalogs have a data file. 516 if (-e $datfile) 517 { 518 my $catdata = 519 Catalog::ParseData($datfile, $catalog->{columns}, 0); 520 521 foreach my $row (@$catdata) 522 { 523 push @oids, $row->{oid} if defined $row->{oid}; 524 } 525 } 526 527 foreach my $toast (@{ $catalog->{toasting} }) 528 { 529 push @oids, $toast->{toast_oid}, $toast->{toast_index_oid}; 530 } 531 foreach my $index (@{ $catalog->{indexing} }) 532 { 533 push @oids, $index->{index_oid}; 534 } 535 } 536 537 return \@oids; 538} 539 5401; 541