postgresql/src/backend/catalog/Catalog.pm

#----------------------------------------------------------------------
#
# Catalog.pm
#    Perl module that extracts info from catalog headers into Perl
#    data structures
#
# Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
# Portions Copyright (c) 1994, Regents of the University of California
#
# src/backend/catalog/Catalog.pm
#
#----------------------------------------------------------------------

package Catalog;

use strict;
use warnings;

# Call this function with an array of names of header files to parse.
# Returns a nested data structure describing the data in the headers.
sub Catalogs
{
	my (%catalogs, $catname, $declaring_attributes, $most_recent);
	$catalogs{names} = [];

	# There are a few types which are given one name in the C source, but a
	# different name at the SQL level.  These are enumerated here.
	my %RENAME_ATTTYPE = (
		'int16'         => 'int2',
		'int32'         => 'int4',
		'int64'         => 'int8',
		'Oid'           => 'oid',
		'NameData'      => 'name',
		'TransactionId' => 'xid',
		'XLogRecPtr'    => 'pg_lsn');

	foreach my $input_file (@_)
	{
		my %catalog;
		$catalog{columns} = [];
		$catalog{data}    = [];

		open(my $ifh, '<', $input_file) || die "$input_file: $!";

		my ($filename) = ($input_file =~ m/(\w+)\.h$/);
		my $natts_pat = "Natts_$filename";

		# Scan the input file.
		while (<$ifh>)
		{

			# Strip C-style comments.
			s;/\*(.|\n)*\*/;;g;
			if (m;/\*;)
			{

				# handle multi-line comments properly.
				my $next_line = <$ifh>;
				die "$input_file: ends within C-style comment\n"
				  if !defined $next_line;
				$_ .= $next_line;
				redo;
			}

			# Remember input line number for later.
			my $input_line_number = $.;

			# Strip useless whitespace and trailing semicolons.
			chomp;
			s/^\s+//;
			s/;\s*$//;
			s/\s+/ /g;

			# Push the data into the appropriate data structure.
			if (/$natts_pat\s+(\d+)/)
			{
				$catalog{natts} = $1;
			}
			elsif (
				/^DATA\(insert(\s+OID\s+=\s+(\d+))?\s+\(\s*(.*)\s*\)\s*\)$/)
			{
				check_natts($filename, $catalog{natts}, $3, $input_file,
					$input_line_number);

				push @{ $catalog{data} }, { oid => $2, bki_values => $3 };
			}
			elsif (/^DESCR\(\"(.*)\"\)$/)
			{
				$most_recent = $catalog{data}->[-1];

				# this tests if most recent line is not a DATA() statement
				if (ref $most_recent ne 'HASH')
				{
					die "DESCR() does not apply to any catalog ($input_file)";
				}
				if (!defined $most_recent->{oid})
				{
					die "DESCR() does not apply to any oid ($input_file)";
				}
				elsif ($1 ne '')
				{
					$most_recent->{descr} = $1;
				}
			}
			elsif (/^SHDESCR\(\"(.*)\"\)$/)
			{
				$most_recent = $catalog{data}->[-1];

				# this tests if most recent line is not a DATA() statement
				if (ref $most_recent ne 'HASH')
				{
					die
					  "SHDESCR() does not apply to any catalog ($input_file)";
				}
				if (!defined $most_recent->{oid})
				{
					die "SHDESCR() does not apply to any oid ($input_file)";
				}
				elsif ($1 ne '')
				{
					$most_recent->{shdescr} = $1;
				}
			}
			elsif (/^DECLARE_TOAST\(\s*(\w+),\s*(\d+),\s*(\d+)\)/)
			{
				$catname = 'toasting';
				my ($toast_name, $toast_oid, $index_oid) = ($1, $2, $3);
				push @{ $catalog{data} },
				  "declare toast $toast_oid $index_oid on $toast_name\n";
			}
			elsif (/^DECLARE_(UNIQUE_)?INDEX\(\s*(\w+),\s*(\d+),\s*(.+)\)/)
			{
				$catname = 'indexing';
				my ($is_unique, $index_name, $index_oid, $using) =
				  ($1, $2, $3, $4);
				push @{ $catalog{data} },
				  sprintf(
					"declare %sindex %s %s %s\n",
					$is_unique ? 'unique ' : '',
					$index_name, $index_oid, $using);
			}
			elsif (/^BUILD_INDICES/)
			{
				push @{ $catalog{data} }, "build indices\n";
			}
			elsif (/^CATALOG\(([^,]*),(\d+)\)/)
			{
				$catname = $1;
				$catalog{relation_oid} = $2;

				# Store pg_* catalog names in the same order we receive them
				push @{ $catalogs{names} }, $catname;

				$catalog{bootstrap} = /BKI_BOOTSTRAP/ ? ' bootstrap' : '';
				$catalog{shared_relation} =
				  /BKI_SHARED_RELATION/ ? ' shared_relation' : '';
				$catalog{without_oids} =
				  /BKI_WITHOUT_OIDS/ ? ' without_oids' : '';
				$catalog{rowtype_oid} =
				  /BKI_ROWTYPE_OID\((\d+)\)/ ? " rowtype_oid $1" : '';
				$catalog{schema_macro} = /BKI_SCHEMA_MACRO/ ? 1 : 0;
				$declaring_attributes = 1;
			}
			elsif ($declaring_attributes)
			{
				next if (/^{|^$/);
				next if (/^#/);
				if (/^}/)
				{
					undef $declaring_attributes;
				}
				else
				{
					my %column;
					my ($atttype, $attname, $attopt) = split /\s+/, $_;
					die "parse error ($input_file)" unless $attname;
					if (exists $RENAME_ATTTYPE{$atttype})
					{
						$atttype = $RENAME_ATTTYPE{$atttype};
					}
					if ($attname =~ /(.*)\[.*\]/)    # array attribute
					{
						$attname = $1;
						$atttype .= '[]';            # variable-length only
					}

					$column{type} = $atttype;
					$column{name} = $attname;

					if (defined $attopt)
					{
						if ($attopt eq 'BKI_FORCE_NULL')
						{
							$column{forcenull} = 1;
						}
						elsif ($attopt eq 'BKI_FORCE_NOT_NULL')
						{
							$column{forcenotnull} = 1;
						}
						else
						{
							die
"unknown column option $attopt on column $attname";
						}
					}
					push @{ $catalog{columns} }, \%column;
				}
			}
		}
		$catalogs{$catname} = \%catalog;
		close $ifh;
	}
	return \%catalogs;
}

# Split a DATA line into fields.
# Call this on the bki_values element of a DATA item returned by Catalogs();
# it returns a list of field values.  We don't strip quoting from the fields.
# Note: it should be safe to assign the result to a list of length equal to
# the nominal number of catalog fields, because check_natts already checked
# the number of fields.
sub SplitDataLine
{
	my $bki_values = shift;

	# This handling of quoted strings might look too simplistic, but it
	# matches what bootscanner.l does: that has no provision for quote marks
	# inside quoted strings, either.  If we don't have a quoted string, just
	# snarf everything till next whitespace.  That will accept some things
	# that bootscanner.l will see as erroneous tokens; but it seems wiser
	# to do that and let bootscanner.l complain than to silently drop
	# non-whitespace characters.
	my @result = $bki_values =~ /"[^"]*"|\S+/g;

	return @result;
}

# Rename temporary files to final names.
# Call this function with the final file name and the .tmp extension
# Note: recommended extension is ".tmp$$", so that parallel make steps
# can't use the same temp files
sub RenameTempFile
{
	my $final_name = shift;
	my $extension  = shift;
	my $temp_name  = $final_name . $extension;
	print "Writing $final_name\n";
	rename($temp_name, $final_name) || die "rename: $temp_name: $!";
}


# Find a symbol defined in a particular header file and extract the value.
#
# The include path has to be passed as a reference to an array.
sub FindDefinedSymbol
{
	my ($catalog_header, $include_path, $symbol) = @_;

	for my $path (@$include_path)
	{

		# Make sure include path ends in a slash.
		if (substr($path, -1) ne '/')
		{
			$path .= '/';
		}
		my $file = $path . $catalog_header;
		next if !-f $file;
		open(my $find_defined_symbol, '<', $file) || die "$file: $!";
		while (<$find_defined_symbol>)
		{
			if (/^#define\s+\Q$symbol\E\s+(\S+)/)
			{
				return $1;
			}
		}
		close $find_defined_symbol;
		die "$file: no definition found for $symbol\n";
	}
	die "$catalog_header: not found in any include directory\n";
}


# verify the number of fields in the passed-in DATA line
sub check_natts
{
	my ($catname, $natts, $bki_val, $file, $line) = @_;

	die
"Could not find definition for Natts_${catname} before start of DATA() in $file\n"
	  unless defined $natts;

	my $nfields = scalar(SplitDataLine($bki_val));

	die sprintf
"Wrong number of attributes in DATA() entry at %s:%d (expected %d but got %d)\n",
	  $file, $line, $natts, $nfields
	  unless $natts == $nfields;
}

1;