postgresql/src/backend/catalog/Catalog.pm

#----------------------------------------------------------------------
#
# Catalog.pm
#    Perl module that extracts info from catalog files into Perl
#    data structures
#
# Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
# Portions Copyright (c) 1994, Regents of the University of California
#
# src/backend/catalog/Catalog.pm
#
#----------------------------------------------------------------------

package Catalog;

use strict;
use warnings;

use File::Compare;


# Parses a catalog header file into a data structure describing the schema
# of the catalog.
sub ParseHeader
{
	my $input_file = shift;

	# There are a few types which are given one name in the C source, but a
	# different name at the SQL level.  These are enumerated here.
	my %RENAME_ATTTYPE = (
		'int16' => 'int2',
		'int32' => 'int4',
		'int64' => 'int8',
		'Oid' => 'oid',
		'NameData' => 'name',
		'TransactionId' => 'xid',
		'XLogRecPtr' => 'pg_lsn');

	my %catalog;
	my $declaring_attributes = 0;
	my $is_varlen = 0;
	my $is_client_code = 0;

	$catalog{columns} = [];
	$catalog{toasting} = [];
	$catalog{indexing} = [];
	$catalog{other_oids} = [];
	$catalog{foreign_keys} = [];
	$catalog{client_code} = [];

	open(my $ifh, '<', $input_file) || die "$input_file: $!";

	# Scan the input file.
	while (<$ifh>)
	{

		# Set appropriate flag when we're in certain code sections.
		if (/^#/)
		{
			$is_varlen = 1 if /^#ifdef\s+CATALOG_VARLEN/;
			if (/^#ifdef\s+EXPOSE_TO_CLIENT_CODE/)
			{
				$is_client_code = 1;
				next;
			}
			next if !$is_client_code;
		}

		if (!$is_client_code)
		{
			# Strip C-style comments.
			s;/\*(.|\n)*\*/;;g;
			if (m;/\*;)
			{

				# handle multi-line comments properly.
				my $next_line = <$ifh>;
				die "$input_file: ends within C-style comment\n"
				  if !defined $next_line;
				$_ .= $next_line;
				redo;
			}

			# Strip useless whitespace and trailing semicolons.
			chomp;
			s/^\s+//;
			s/;\s*$//;
			s/\s+/ /g;
		}

		# Push the data into the appropriate data structure.
		# Caution: when adding new recognized OID-defining macros,
		# also update src/include/catalog/renumber_oids.pl.
		if (/^DECLARE_TOAST\(\s*(\w+),\s*(\d+),\s*(\d+)\)/)
		{
			push @{ $catalog{toasting} },
			  { parent_table => $1, toast_oid => $2, toast_index_oid => $3 };
		}
		elsif (
			/^DECLARE_TOAST_WITH_MACRO\(\s*(\w+),\s*(\d+),\s*(\d+),\s*(\w+),\s*(\w+)\)/
		  )
		{
			push @{ $catalog{toasting} },
			  {
				parent_table => $1,
				toast_oid => $2,
				toast_index_oid => $3,
				toast_oid_macro => $4,
				toast_index_oid_macro => $5
			  };
		}
		elsif (
			/^DECLARE_(UNIQUE_)?INDEX(_PKEY)?\(\s*(\w+),\s*(\d+),\s*(\w+),\s*(.+)\)/
		  )
		{
			push @{ $catalog{indexing} },
			  {
				is_unique => $1 ? 1 : 0,
				is_pkey => $2 ? 1 : 0,
				index_name => $3,
				index_oid => $4,
				index_oid_macro => $5,
				index_decl => $6
			  };
		}
		elsif (/^DECLARE_OID_DEFINING_MACRO\(\s*(\w+),\s*(\d+)\)/)
		{
			push @{ $catalog{other_oids} },
			  {
				other_name => $1,
				other_oid => $2
			  };
		}
		elsif (
			/^DECLARE_(ARRAY_)?FOREIGN_KEY(_OPT)?\(\s*\(([^)]+)\),\s*(\w+),\s*\(([^)]+)\)\)/
		  )
		{
			push @{ $catalog{foreign_keys} },
			  {
				is_array => $1 ? 1 : 0,
				is_opt => $2 ? 1 : 0,
				fk_cols => $3,
				pk_table => $4,
				pk_cols => $5
			  };
		}
		elsif (/^CATALOG\((\w+),(\d+),(\w+)\)/)
		{
			$catalog{catname} = $1;
			$catalog{relation_oid} = $2;
			$catalog{relation_oid_macro} = $3;

			$catalog{bootstrap} = /BKI_BOOTSTRAP/ ? ' bootstrap' : '';
			$catalog{shared_relation} =
			  /BKI_SHARED_RELATION/ ? ' shared_relation' : '';
			if (/BKI_ROWTYPE_OID\((\d+),(\w+)\)/)
			{
				$catalog{rowtype_oid} = $1;
				$catalog{rowtype_oid_clause} = " rowtype_oid $1";
				$catalog{rowtype_oid_macro} = $2;
			}
			else
			{
				$catalog{rowtype_oid} = '';
				$catalog{rowtype_oid_clause} = '';
				$catalog{rowtype_oid_macro} = '';
			}
			$catalog{schema_macro} = /BKI_SCHEMA_MACRO/ ? 1 : 0;
			$declaring_attributes = 1;
		}
		elsif ($is_client_code)
		{
			if (/^#endif/)
			{
				$is_client_code = 0;
			}
			else
			{
				push @{ $catalog{client_code} }, $_;
			}
		}
		elsif ($declaring_attributes)
		{
			next if (/^{|^$/);
			if (/^}/)
			{
				$declaring_attributes = 0;
			}
			else
			{
				my %column;
				my @attopts = split /\s+/, $_;
				my $atttype = shift @attopts;
				my $attname = shift @attopts;
				die "parse error ($input_file)"
				  unless ($attname and $atttype);

				if (exists $RENAME_ATTTYPE{$atttype})
				{
					$atttype = $RENAME_ATTTYPE{$atttype};
				}

				# If the C name ends with '[]' or '[digits]', we have
				# an array type, so we discard that from the name and
				# prepend '_' to the type.
				if ($attname =~ /(\w+)\[\d*\]/)
				{
					$attname = $1;
					$atttype = '_' . $atttype;
				}

				$column{type} = $atttype;
				$column{name} = $attname;
				$column{is_varlen} = 1 if $is_varlen;

				foreach my $attopt (@attopts)
				{
					if ($attopt eq 'BKI_FORCE_NULL')
					{
						$column{forcenull} = 1;
					}
					elsif ($attopt eq 'BKI_FORCE_NOT_NULL')
					{
						$column{forcenotnull} = 1;
					}

					# We use quotes for values like \0 and \054, to
					# make sure all compilers and syntax highlighters
					# can recognize them properly.
					elsif ($attopt =~ /BKI_DEFAULT\(['"]?([^'"]+)['"]?\)/)
					{
						$column{default} = $1;
					}
					elsif (
						$attopt =~ /BKI_ARRAY_DEFAULT\(['"]?([^'"]+)['"]?\)/)
					{
						$column{array_default} = $1;
					}
					elsif ($attopt =~ /BKI_LOOKUP(_OPT)?\((\w+)\)/)
					{
						$column{lookup} = $2;
						$column{lookup_opt} = $1 ? 1 : 0;
						# BKI_LOOKUP implicitly makes an FK reference
						push @{ $catalog{foreign_keys} },
						  {
							is_array => (
								$atttype eq 'oidvector' || $atttype eq '_oid')
							? 1
							: 0,
							is_opt => $column{lookup_opt},
							fk_cols => $attname,
							pk_table => $column{lookup},
							pk_cols => 'oid'
						  };
					}
					else
					{
						die
						  "unknown or misformatted column option $attopt on column $attname";
					}

					if ($column{forcenull} and $column{forcenotnull})
					{
						die "$attname is forced both null and not null";
					}
				}
				push @{ $catalog{columns} }, \%column;
			}
		}
	}
	close $ifh;
	return \%catalog;
}

# Parses a file containing Perl data structure literals, returning live data.
#
# The parameter $preserve_formatting needs to be set for callers that want
# to work with non-data lines in the data files, such as comments and blank
# lines. If a caller just wants to consume the data, leave it unset.
sub ParseData
{
	my ($input_file, $schema, $preserve_formatting) = @_;

	open(my $ifd, '<', $input_file) || die "$input_file: $!";
	$input_file =~ /(\w+)\.dat$/
	  or die "Input file $input_file needs to be a .dat file.\n";
	my $catname = $1;
	my $data = [];

	if ($preserve_formatting)
	{
		# Scan the input file.
		while (<$ifd>)
		{
			my $hash_ref;

			if (/{/)
			{
				# Capture the hash ref
				# NB: Assumes that the next hash ref can't start on the
				# same line where the present one ended.
				# Not foolproof, but we shouldn't need a full parser,
				# since we expect relatively well-behaved input.

				# Quick hack to detect when we have a full hash ref to
				# parse. We can't just use a regex because of values in
				# pg_aggregate and pg_proc like '{0,0}'.  This will need
				# work if we ever need to allow unbalanced braces within
				# a field value.
				my $lcnt = tr/{//;
				my $rcnt = tr/}//;

				if ($lcnt == $rcnt)
				{
					# We're treating the input line as a piece of Perl, so we
					# need to use string eval here. Tell perlcritic we know what
					# we're doing.
					eval "\$hash_ref = $_"; ## no critic (ProhibitStringyEval)
					if (!ref $hash_ref)
					{
						die "$input_file: error parsing line $.:\n$_\n";
					}

					# Annotate each hash with the source line number.
					$hash_ref->{line_number} = $.;

					# Expand tuples to their full representation.
					AddDefaultValues($hash_ref, $schema, $catname);
				}
				else
				{
					my $next_line = <$ifd>;
					die "$input_file: file ends within Perl hash\n"
					  if !defined $next_line;
					$_ .= $next_line;
					redo;
				}
			}

			# If we found a hash reference, keep it, unless it is marked as
			# autogenerated; in that case it'd duplicate an entry we'll
			# autogenerate below.  (This makes it safe for reformat_dat_file.pl
			# with --full-tuples to print autogenerated entries, which seems like
			# useful behavior for debugging.)
			#
			# Otherwise, we have a non-data string, which we need to keep in
			# order to preserve formatting.
			if (defined $hash_ref)
			{
				push @$data, $hash_ref if !$hash_ref->{autogenerated};
			}
			else
			{
				push @$data, $_;
			}
		}
	}
	else
	{
		# When we only care about the contents, it's faster to read and eval
		# the whole file at once.
		local $/;
		my $full_file = <$ifd>;
		eval "\$data = $full_file"    ## no critic (ProhibitStringyEval)
		  or die "error parsing $input_file\n";
		foreach my $hash_ref (@{$data})
		{
			AddDefaultValues($hash_ref, $schema, $catname);
		}
	}

	close $ifd;

	# If this is pg_type, auto-generate array types too.
	GenerateArrayTypes($schema, $data) if $catname eq 'pg_type';

	return $data;
}

# Fill in default values of a record using the given schema.
# It's the caller's responsibility to specify other values beforehand.
sub AddDefaultValues
{
	my ($row, $schema, $catname) = @_;
	my @missing_fields;

	# Compute special-case column values.
	# Note: If you add new cases here, you must also teach
	# strip_default_values() in include/catalog/reformat_dat_file.pl
	# to delete them.
	if ($catname eq 'pg_proc')
	{
		# pg_proc.pronargs can be derived from proargtypes.
		if (defined $row->{proargtypes})
		{
			my @proargtypes = split /\s+/, $row->{proargtypes};
			$row->{pronargs} = scalar(@proargtypes);
		}
	}

	# Now fill in defaults, and note any columns that remain undefined.
	foreach my $column (@$schema)
	{
		my $attname = $column->{name};

		# No work if field already has a value.
		next if defined $row->{$attname};

		# Ignore 'oid' columns, they're handled elsewhere.
		next if $attname eq 'oid';

		# If column has a default value, fill that in.
		if (defined $column->{default})
		{
			$row->{$attname} = $column->{default};
			next;
		}

		# Failed to find a value for this field.
		push @missing_fields, $attname;
	}

	# Failure to provide all columns is a hard error.
	if (@missing_fields)
	{
		die sprintf "missing values for field(s) %s in %s.dat line %s\n",
		  join(', ', @missing_fields), $catname, $row->{line_number};
	}
}

# If a pg_type entry has an array_type_oid metadata field,
# auto-generate an entry for its array type.
sub GenerateArrayTypes
{
	my $pgtype_schema = shift;
	my $types = shift;
	my @array_types;

	foreach my $elem_type (@$types)
	{
		next if !(ref $elem_type eq 'HASH');
		next if !defined($elem_type->{array_type_oid});

		my %array_type;

		# Set up metadata fields for array type.
		$array_type{oid} = $elem_type->{array_type_oid};
		$array_type{autogenerated} = 1;
		$array_type{line_number} = $elem_type->{line_number};

		# Set up column values derived from the element type.
		$array_type{typname} = '_' . $elem_type->{typname};
		$array_type{typelem} = $elem_type->{typname};

		# Arrays require INT alignment, unless the element type requires
		# DOUBLE alignment.
		$array_type{typalign} = $elem_type->{typalign} eq 'd' ? 'd' : 'i';

		# Fill in the rest of the array entry's fields.
		foreach my $column (@$pgtype_schema)
		{
			my $attname = $column->{name};

			# Skip if we already set it above.
			next if defined $array_type{$attname};

			# Apply the BKI_ARRAY_DEFAULT setting if there is one,
			# otherwise copy the field from the element type.
			if (defined $column->{array_default})
			{
				$array_type{$attname} = $column->{array_default};
			}
			else
			{
				$array_type{$attname} = $elem_type->{$attname};
			}
		}

		# Lastly, cross-link the array to the element type.
		$elem_type->{typarray} = $array_type{typname};

		push @array_types, \%array_type;
	}

	push @$types, @array_types;

	return;
}

# Rename temporary files to final names.
# Call this function with the final file name and the .tmp extension.
#
# If the final file already exists and has identical contents, don't
# overwrite it; this behavior avoids unnecessary recompiles due to
# updating the mod date on unchanged header files.
#
# Note: recommended extension is ".tmp$$", so that parallel make steps
# can't use the same temp files.
sub RenameTempFile
{
	my $final_name = shift;
	my $extension = shift;
	my $temp_name = $final_name . $extension;

	if (-f $final_name
		&& compare($temp_name, $final_name) == 0)
	{
		unlink($temp_name) || die "unlink: $temp_name: $!";
	}
	else
	{
		rename($temp_name, $final_name) || die "rename: $temp_name: $!";
	}
	return;
}

# Find a symbol defined in a particular header file and extract the value.
# include_path should be the path to src/include/.
sub FindDefinedSymbol
{
	my ($catalog_header, $include_path, $symbol) = @_;
	my $value;

	# Make sure include path ends in a slash.
	if (substr($include_path, -1) ne '/')
	{
		$include_path .= '/';
	}
	my $file = $include_path . $catalog_header;
	open(my $find_defined_symbol, '<', $file) || die "$file: $!";
	while (<$find_defined_symbol>)
	{
		if (/^#define\s+\Q$symbol\E\s+(\S+)/)
		{
			$value = $1;
			last;
		}
	}
	close $find_defined_symbol;
	return $value if defined $value;
	die "$file: no definition found for $symbol\n";
}

# Similar to FindDefinedSymbol, but looks in the bootstrap metadata.
sub FindDefinedSymbolFromData
{
	my ($data, $symbol) = @_;
	foreach my $row (@{$data})
	{
		if ($row->{oid_symbol} eq $symbol)
		{
			return $row->{oid};
		}
	}
	die "no definition found for $symbol\n";
}

# Extract an array of all the OIDs assigned in the specified catalog headers
# and their associated data files (if any).
# Caution: genbki.pl contains equivalent logic; change it too if you need to
# touch this.
sub FindAllOidsFromHeaders
{
	my @input_files = @_;

	my @oids = ();

	foreach my $header (@input_files)
	{
		$header =~ /(.+)\.h$/
		  or die "Input files need to be header files.\n";
		my $datfile = "$1.dat";

		my $catalog = Catalog::ParseHeader($header);

		# We ignore the pg_class OID and rowtype OID of bootstrap catalogs,
		# as those are expected to appear in the initial data for pg_class
		# and pg_type.  For regular catalogs, include these OIDs.
		if (!$catalog->{bootstrap})
		{
			push @oids, $catalog->{relation_oid}
			  if ($catalog->{relation_oid});
			push @oids, $catalog->{rowtype_oid} if ($catalog->{rowtype_oid});
		}

		# Not all catalogs have a data file.
		if (-e $datfile)
		{
			my $catdata =
			  Catalog::ParseData($datfile, $catalog->{columns}, 0);

			foreach my $row (@$catdata)
			{
				push @oids, $row->{oid} if defined $row->{oid};
			}
		}

		foreach my $toast (@{ $catalog->{toasting} })
		{
			push @oids, $toast->{toast_oid}, $toast->{toast_index_oid};
		}
		foreach my $index (@{ $catalog->{indexing} })
		{
			push @oids, $index->{index_oid};
		}
		foreach my $other (@{ $catalog->{other_oids} })
		{
			push @oids, $other->{other_oid};
		}
	}

	return \@oids;
}

1;