1#----------------------------------------------------------------------
2#
3# Catalog.pm
4#    Perl module that extracts info from catalog files into Perl
5#    data structures
6#
7# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
8# Portions Copyright (c) 1994, Regents of the University of California
9#
10# src/backend/catalog/Catalog.pm
11#
12#----------------------------------------------------------------------
13
14package Catalog;
15
16use strict;
17use warnings;
18
19use File::Compare;
20
21
22# Parses a catalog header file into a data structure describing the schema
23# of the catalog.
24sub ParseHeader
25{
26	my $input_file = shift;
27
28	# There are a few types which are given one name in the C source, but a
29	# different name at the SQL level.  These are enumerated here.
30	my %RENAME_ATTTYPE = (
31		'int16'         => 'int2',
32		'int32'         => 'int4',
33		'int64'         => 'int8',
34		'Oid'           => 'oid',
35		'NameData'      => 'name',
36		'TransactionId' => 'xid',
37		'XLogRecPtr'    => 'pg_lsn');
38
39	my %catalog;
40	my $declaring_attributes = 0;
41	my $is_varlen            = 0;
42	my $is_client_code       = 0;
43
44	$catalog{columns}     = [];
45	$catalog{toasting}    = [];
46	$catalog{indexing}    = [];
47	$catalog{client_code} = [];
48
49	open(my $ifh, '<', $input_file) || die "$input_file: $!";
50
51	# Scan the input file.
52	while (<$ifh>)
53	{
54
55		# Set appropriate flag when we're in certain code sections.
56		if (/^#/)
57		{
58			$is_varlen = 1 if /^#ifdef\s+CATALOG_VARLEN/;
59			if (/^#ifdef\s+EXPOSE_TO_CLIENT_CODE/)
60			{
61				$is_client_code = 1;
62				next;
63			}
64			next if !$is_client_code;
65		}
66
67		if (!$is_client_code)
68		{
69			# Strip C-style comments.
70			s;/\*(.|\n)*\*/;;g;
71			if (m;/\*;)
72			{
73
74				# handle multi-line comments properly.
75				my $next_line = <$ifh>;
76				die "$input_file: ends within C-style comment\n"
77				  if !defined $next_line;
78				$_ .= $next_line;
79				redo;
80			}
81
82			# Strip useless whitespace and trailing semicolons.
83			chomp;
84			s/^\s+//;
85			s/;\s*$//;
86			s/\s+/ /g;
87		}
88
89		# Push the data into the appropriate data structure.
90		# Caution: when adding new recognized OID-defining macros,
91		# also update src/include/catalog/renumber_oids.pl.
92		if (/^DECLARE_TOAST\(\s*(\w+),\s*(\d+),\s*(\d+)\)/)
93		{
94			push @{ $catalog{toasting} },
95			  { parent_table => $1, toast_oid => $2, toast_index_oid => $3 };
96		}
97		elsif (/^DECLARE_(UNIQUE_)?INDEX\(\s*(\w+),\s*(\d+),\s*(.+)\)/)
98		{
99			push @{ $catalog{indexing} },
100			  {
101				is_unique => $1 ? 1 : 0,
102				index_name => $2,
103				index_oid  => $3,
104				index_decl => $4
105			  };
106		}
107		elsif (/^CATALOG\((\w+),(\d+),(\w+)\)/)
108		{
109			$catalog{catname}            = $1;
110			$catalog{relation_oid}       = $2;
111			$catalog{relation_oid_macro} = $3;
112
113			$catalog{bootstrap} = /BKI_BOOTSTRAP/ ? ' bootstrap' : '';
114			$catalog{shared_relation} =
115			  /BKI_SHARED_RELATION/ ? ' shared_relation' : '';
116			if (/BKI_ROWTYPE_OID\((\d+),(\w+)\)/)
117			{
118				$catalog{rowtype_oid}        = $1;
119				$catalog{rowtype_oid_clause} = " rowtype_oid $1";
120				$catalog{rowtype_oid_macro}  = $2;
121			}
122			else
123			{
124				$catalog{rowtype_oid}        = '';
125				$catalog{rowtype_oid_clause} = '';
126				$catalog{rowtype_oid_macro}  = '';
127			}
128			$catalog{schema_macro} = /BKI_SCHEMA_MACRO/ ? 1 : 0;
129			$declaring_attributes = 1;
130		}
131		elsif ($is_client_code)
132		{
133			if (/^#endif/)
134			{
135				$is_client_code = 0;
136			}
137			else
138			{
139				push @{ $catalog{client_code} }, $_;
140			}
141		}
142		elsif ($declaring_attributes)
143		{
144			next if (/^{|^$/);
145			if (/^}/)
146			{
147				$declaring_attributes = 0;
148			}
149			else
150			{
151				my %column;
152				my @attopts = split /\s+/, $_;
153				my $atttype = shift @attopts;
154				my $attname = shift @attopts;
155				die "parse error ($input_file)"
156				  unless ($attname and $atttype);
157
158				if (exists $RENAME_ATTTYPE{$atttype})
159				{
160					$atttype = $RENAME_ATTTYPE{$atttype};
161				}
162
163				# If the C name ends with '[]' or '[digits]', we have
164				# an array type, so we discard that from the name and
165				# prepend '_' to the type.
166				if ($attname =~ /(\w+)\[\d*\]/)
167				{
168					$attname = $1;
169					$atttype = '_' . $atttype;
170				}
171
172				$column{type}      = $atttype;
173				$column{name}      = $attname;
174				$column{is_varlen} = 1 if $is_varlen;
175
176				foreach my $attopt (@attopts)
177				{
178					if ($attopt eq 'BKI_FORCE_NULL')
179					{
180						$column{forcenull} = 1;
181					}
182					elsif ($attopt eq 'BKI_FORCE_NOT_NULL')
183					{
184						$column{forcenotnull} = 1;
185					}
186
187					# We use quotes for values like \0 and \054, to
188					# make sure all compilers and syntax highlighters
189					# can recognize them properly.
190					elsif ($attopt =~ /BKI_DEFAULT\(['"]?([^'"]+)['"]?\)/)
191					{
192						$column{default} = $1;
193					}
194					elsif (
195						$attopt =~ /BKI_ARRAY_DEFAULT\(['"]?([^'"]+)['"]?\)/)
196					{
197						$column{array_default} = $1;
198					}
199					elsif ($attopt =~ /BKI_LOOKUP\((\w+)\)/)
200					{
201						$column{lookup} = $1;
202					}
203					else
204					{
205						die
206						  "unknown or misformatted column option $attopt on column $attname";
207					}
208
209					if ($column{forcenull} and $column{forcenotnull})
210					{
211						die "$attname is forced both null and not null";
212					}
213				}
214				push @{ $catalog{columns} }, \%column;
215			}
216		}
217	}
218	close $ifh;
219	return \%catalog;
220}
221
222# Parses a file containing Perl data structure literals, returning live data.
223#
224# The parameter $preserve_formatting needs to be set for callers that want
225# to work with non-data lines in the data files, such as comments and blank
226# lines. If a caller just wants to consume the data, leave it unset.
227sub ParseData
228{
229	my ($input_file, $schema, $preserve_formatting) = @_;
230
231	open(my $ifd, '<', $input_file) || die "$input_file: $!";
232	$input_file =~ /(\w+)\.dat$/
233	  or die "Input file $input_file needs to be a .dat file.\n";
234	my $catname = $1;
235	my $data    = [];
236
237	# Scan the input file.
238	while (<$ifd>)
239	{
240		my $hash_ref;
241
242		if (/{/)
243		{
244			# Capture the hash ref
245			# NB: Assumes that the next hash ref can't start on the
246			# same line where the present one ended.
247			# Not foolproof, but we shouldn't need a full parser,
248			# since we expect relatively well-behaved input.
249
250			# Quick hack to detect when we have a full hash ref to
251			# parse. We can't just use a regex because of values in
252			# pg_aggregate and pg_proc like '{0,0}'.  This will need
253			# work if we ever need to allow unbalanced braces within
254			# a field value.
255			my $lcnt = tr/{//;
256			my $rcnt = tr/}//;
257
258			if ($lcnt == $rcnt)
259			{
260				# We're treating the input line as a piece of Perl, so we
261				# need to use string eval here. Tell perlcritic we know what
262				# we're doing.
263				eval '$hash_ref = ' . $_;   ## no critic (ProhibitStringyEval)
264				if (!ref $hash_ref)
265				{
266					die "$input_file: error parsing line $.:\n$_\n";
267				}
268
269				# Annotate each hash with the source line number.
270				$hash_ref->{line_number} = $.;
271
272				# Expand tuples to their full representation.
273				AddDefaultValues($hash_ref, $schema, $catname);
274			}
275			else
276			{
277				my $next_line = <$ifd>;
278				die "$input_file: file ends within Perl hash\n"
279				  if !defined $next_line;
280				$_ .= $next_line;
281				redo;
282			}
283		}
284
285		# If we found a hash reference, keep it, unless it is marked as
286		# autogenerated; in that case it'd duplicate an entry we'll
287		# autogenerate below.  (This makes it safe for reformat_dat_file.pl
288		# with --full-tuples to print autogenerated entries, which seems like
289		# useful behavior for debugging.)
290		#
291		# Only keep non-data strings if we are told to preserve formatting.
292		if (defined $hash_ref)
293		{
294			push @$data, $hash_ref if !$hash_ref->{autogenerated};
295		}
296		elsif ($preserve_formatting)
297		{
298			push @$data, $_;
299		}
300	}
301	close $ifd;
302
303	# If this is pg_type, auto-generate array types too.
304	GenerateArrayTypes($schema, $data) if $catname eq 'pg_type';
305
306	return $data;
307}
308
309# Fill in default values of a record using the given schema.
310# It's the caller's responsibility to specify other values beforehand.
311sub AddDefaultValues
312{
313	my ($row, $schema, $catname) = @_;
314	my @missing_fields;
315
316	# Compute special-case column values.
317	# Note: If you add new cases here, you must also teach
318	# strip_default_values() in include/catalog/reformat_dat_file.pl
319	# to delete them.
320	if ($catname eq 'pg_proc')
321	{
322		# pg_proc.pronargs can be derived from proargtypes.
323		if (defined $row->{proargtypes})
324		{
325			my @proargtypes = split /\s+/, $row->{proargtypes};
326			$row->{pronargs} = scalar(@proargtypes);
327		}
328	}
329
330	# Now fill in defaults, and note any columns that remain undefined.
331	foreach my $column (@$schema)
332	{
333		my $attname = $column->{name};
334
335		# No work if field already has a value.
336		next if defined $row->{$attname};
337
338		# Ignore 'oid' columns, they're handled elsewhere.
339		next if $attname eq 'oid';
340
341		# If column has a default value, fill that in.
342		if (defined $column->{default})
343		{
344			$row->{$attname} = $column->{default};
345			next;
346		}
347
348		# Failed to find a value for this field.
349		push @missing_fields, $attname;
350	}
351
352	# Failure to provide all columns is a hard error.
353	if (@missing_fields)
354	{
355		die sprintf "missing values for field(s) %s in %s.dat line %s\n",
356		  join(', ', @missing_fields), $catname, $row->{line_number};
357	}
358}
359
360# If a pg_type entry has an array_type_oid metadata field,
361# auto-generate an entry for its array type.
362sub GenerateArrayTypes
363{
364	my $pgtype_schema = shift;
365	my $types         = shift;
366	my @array_types;
367
368	foreach my $elem_type (@$types)
369	{
370		next if !(ref $elem_type eq 'HASH');
371		next if !defined($elem_type->{array_type_oid});
372
373		my %array_type;
374
375		# Set up metadata fields for array type.
376		$array_type{oid}           = $elem_type->{array_type_oid};
377		$array_type{autogenerated} = 1;
378		$array_type{line_number}   = $elem_type->{line_number};
379
380		# Set up column values derived from the element type.
381		$array_type{typname} = '_' . $elem_type->{typname};
382		$array_type{typelem} = $elem_type->{typname};
383
384		# Arrays require INT alignment, unless the element type requires
385		# DOUBLE alignment.
386		$array_type{typalign} = $elem_type->{typalign} eq 'd' ? 'd' : 'i';
387
388		# Fill in the rest of the array entry's fields.
389		foreach my $column (@$pgtype_schema)
390		{
391			my $attname = $column->{name};
392
393			# Skip if we already set it above.
394			next if defined $array_type{$attname};
395
396			# Apply the BKI_ARRAY_DEFAULT setting if there is one,
397			# otherwise copy the field from the element type.
398			if (defined $column->{array_default})
399			{
400				$array_type{$attname} = $column->{array_default};
401			}
402			else
403			{
404				$array_type{$attname} = $elem_type->{$attname};
405			}
406		}
407
408		# Lastly, cross-link the array to the element type.
409		$elem_type->{typarray} = $array_type{typname};
410
411		push @array_types, \%array_type;
412	}
413
414	push @$types, @array_types;
415
416	return;
417}
418
419# Rename temporary files to final names.
420# Call this function with the final file name and the .tmp extension.
421#
422# If the final file already exists and has identical contents, don't
423# overwrite it; this behavior avoids unnecessary recompiles due to
424# updating the mod date on unchanged header files.
425#
426# Note: recommended extension is ".tmp$$", so that parallel make steps
427# can't use the same temp files.
428sub RenameTempFile
429{
430	my $final_name = shift;
431	my $extension  = shift;
432	my $temp_name  = $final_name . $extension;
433
434	if (-f $final_name
435		&& compare($temp_name, $final_name) == 0)
436	{
437		unlink($temp_name) || die "unlink: $temp_name: $!";
438	}
439	else
440	{
441		rename($temp_name, $final_name) || die "rename: $temp_name: $!";
442	}
443	return;
444}
445
446# Find a symbol defined in a particular header file and extract the value.
447# include_path should be the path to src/include/.
448sub FindDefinedSymbol
449{
450	my ($catalog_header, $include_path, $symbol) = @_;
451	my $value;
452
453	# Make sure include path ends in a slash.
454	if (substr($include_path, -1) ne '/')
455	{
456		$include_path .= '/';
457	}
458	my $file = $include_path . $catalog_header;
459	open(my $find_defined_symbol, '<', $file) || die "$file: $!";
460	while (<$find_defined_symbol>)
461	{
462		if (/^#define\s+\Q$symbol\E\s+(\S+)/)
463		{
464			$value = $1;
465			last;
466		}
467	}
468	close $find_defined_symbol;
469	return $value if defined $value;
470	die "$file: no definition found for $symbol\n";
471}
472
473# Similar to FindDefinedSymbol, but looks in the bootstrap metadata.
474sub FindDefinedSymbolFromData
475{
476	my ($data, $symbol) = @_;
477	foreach my $row (@{$data})
478	{
479		if ($row->{oid_symbol} eq $symbol)
480		{
481			return $row->{oid};
482		}
483	}
484	die "no definition found for $symbol\n";
485}
486
487# Extract an array of all the OIDs assigned in the specified catalog headers
488# and their associated data files (if any).
489# Caution: genbki.pl contains equivalent logic; change it too if you need to
490# touch this.
491sub FindAllOidsFromHeaders
492{
493	my @input_files = @_;
494
495	my @oids = ();
496
497	foreach my $header (@input_files)
498	{
499		$header =~ /(.+)\.h$/
500		  or die "Input files need to be header files.\n";
501		my $datfile = "$1.dat";
502
503		my $catalog = Catalog::ParseHeader($header);
504
505		# We ignore the pg_class OID and rowtype OID of bootstrap catalogs,
506		# as those are expected to appear in the initial data for pg_class
507		# and pg_type.  For regular catalogs, include these OIDs.
508		if (!$catalog->{bootstrap})
509		{
510			push @oids, $catalog->{relation_oid}
511			  if ($catalog->{relation_oid});
512			push @oids, $catalog->{rowtype_oid} if ($catalog->{rowtype_oid});
513		}
514
515		# Not all catalogs have a data file.
516		if (-e $datfile)
517		{
518			my $catdata =
519			  Catalog::ParseData($datfile, $catalog->{columns}, 0);
520
521			foreach my $row (@$catdata)
522			{
523				push @oids, $row->{oid} if defined $row->{oid};
524			}
525		}
526
527		foreach my $toast (@{ $catalog->{toasting} })
528		{
529			push @oids, $toast->{toast_oid}, $toast->{toast_index_oid};
530		}
531		foreach my $index (@{ $catalog->{indexing} })
532		{
533			push @oids, $index->{index_oid};
534		}
535	}
536
537	return \@oids;
538}
539
5401;
541