postgresql/src/include/catalog/reformat_dat_file.pl

#!/usr/bin/perl
#----------------------------------------------------------------------
#
# reformat_dat_file.pl
#    Perl script that reads in catalog data file(s) and writes out
#    functionally equivalent file(s) in a standard format.
#
#    In each entry of a reformatted file, metadata fields (if present)
#    come first, with normal attributes starting on the following line,
#    in the same order as the columns of the corresponding catalog.
#    Comments and blank lines are preserved.
#
# Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
# Portions Copyright (c) 1994, Regents of the University of California
#
# src/include/catalog/reformat_dat_file.pl
#
#----------------------------------------------------------------------

use strict;
use warnings FATAL => 'all';

use FindBin;
use Getopt::Long;

# If you copy this script to somewhere other than src/include/catalog,
# you'll need to modify this "use lib" or provide a suitable -I switch.
use lib "$FindBin::RealBin/../../backend/catalog/";
use Catalog;

# Names of the metadata fields of a catalog entry.
# Note: oid is a normal column from a storage perspective, but it's more
# important than the rest, so it's listed first among the metadata fields.
# Note: line_number is also a metadata field, but we never write it out,
# so it's not listed here.
my @METADATA =
  ('oid', 'oid_symbol', 'array_type_oid', 'descr', 'autogenerated');

# Process command line switches.
my $output_path = '';
my $full_tuples = 0;

GetOptions(
	'output=s' => \$output_path,
	'full-tuples' => \$full_tuples) || usage();

# Sanity check arguments.
die "No input files.\n" unless @ARGV;

# Make sure output_path ends in a slash.
if ($output_path ne '' && substr($output_path, -1) ne '/')
{
	$output_path .= '/';
}

# Read all the input files into internal data structures.
# We pass data file names as arguments and then look for matching
# headers to parse the schema from.
my %catalogs;
my %catalog_data;
my @catnames;
foreach my $datfile (@ARGV)
{
	$datfile =~ /(.+)\.dat$/
	  or die "Input files need to be data (.dat) files.\n";

	my $header = "$1.h";
	die "There in no header file corresponding to $datfile"
	  if !-e $header;

	my $catalog = Catalog::ParseHeader($header);
	my $catname = $catalog->{catname};
	my $schema = $catalog->{columns};

	push @catnames, $catname;
	$catalogs{$catname} = $catalog;

	$catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 1);
}

########################################################################
# At this point, we have read all the data. If you are modifying this
# script for bulk editing, this is a good place to build lookup tables,
# if you need to. In the following example, the "next if !ref $row"
# check below is a hack to filter out non-hash objects. This is because
# we build the lookup tables from data that we read using the
# "preserve_formatting" parameter.
#
##Index access method lookup.
#my %amnames;
#foreach my $row (@{ $catalog_data{pg_am} })
#{
#	next if !ref $row;
#	$amnames{$row->{oid}} = $row->{amname};
#}
########################################################################

# Write the data.
foreach my $catname (@catnames)
{
	my $catalog = $catalogs{$catname};
	my @attnames;
	my $schema = $catalog->{columns};

	foreach my $column (@$schema)
	{
		my $attname = $column->{name};

		# We may have ordinary columns at the storage level that we still
		# want to format as a special value. Exclude these from the column
		# list so they are not written twice.
		push @attnames, $attname
		  if !(grep { $_ eq $attname } @METADATA);
	}

	# Write output files to specified directory.
	my $datfile = "$output_path$catname.dat";
	open my $dat, '>', $datfile
	  or die "can't open $datfile: $!";

	foreach my $data (@{ $catalog_data{$catname} })
	{

		# Hash ref representing a data entry.
		if (ref $data eq 'HASH')
		{
			my %values = %$data;

			############################################################
			# At this point we have the full tuple in memory as a hash
			# and can do any operations we want. As written, it only
			# removes default values, but this script can be adapted to
			# do one-off bulk-editing.
			############################################################

			if (!$full_tuples)
			{
				# If it's an autogenerated entry, drop it completely.
				next if $values{autogenerated};
				# Else, just drop any default/computed fields.
				strip_default_values(\%values, $schema, $catname);
			}

			print $dat "{";

			# Separate out metadata fields for readability.
			my $metadata_str = format_hash(\%values, @METADATA);
			if ($metadata_str)
			{
				print $dat $metadata_str;

				# User attributes start on next line.
				print $dat ",\n ";
			}

			my $data_str = format_hash(\%values, @attnames);
			print $dat $data_str;
			print $dat " },\n";
		}

		# Preserve blank lines.
		elsif ($data =~ /^\s*$/)
		{
			print $dat "\n";
		}

		# Preserve comments or brackets that are on their own line.
		elsif ($data =~ /^\s*(\[|\]|#.*?)\s*$/)
		{
			print $dat "$1\n";
		}
	}
	close $dat;
}

# Remove column values for which there is a matching default,
# or if the value can be computed from other columns.
sub strip_default_values
{
	my ($row, $schema, $catname) = @_;

	# Delete values that match defaults.
	foreach my $column (@$schema)
	{
		my $attname = $column->{name};

		# It's okay if we have no oid value, since it will be assigned
		# automatically before bootstrap.
		die "strip_default_values: $catname.$attname undefined\n"
		  if !defined $row->{$attname} and $attname ne 'oid';

		if (defined $column->{default}
			and ($row->{$attname} eq $column->{default}))
		{
			delete $row->{$attname};
		}
	}

	# Delete computed values.  See AddDefaultValues() in Catalog.pm.
	# Note: This must be done after deleting values matching defaults.
	if ($catname eq 'pg_proc')
	{
		delete $row->{pronargs} if defined $row->{proargtypes};
	}

	# If a pg_type entry has an auto-generated array type, then its
	# typarray field is a computed value too (see GenerateArrayTypes).
	if ($catname eq 'pg_type')
	{
		delete $row->{typarray} if defined $row->{array_type_oid};
	}

	return;
}

# Format the individual elements of a Perl hash into a valid string
# representation. We do this ourselves, rather than use native Perl
# facilities, so we can keep control over the exact formatting of the
# data files.
sub format_hash
{
	my $data = shift;
	my @orig_attnames = @_;

	# Copy attname to new array if it has a value, so we can determine
	# the last populated element. We do this because we may have default
	# values or empty metadata fields.
	my @attnames;
	foreach my $orig_attname (@orig_attnames)
	{
		push @attnames, $orig_attname
		  if defined $data->{$orig_attname};
	}

	# When calling this function, we ether have an open-bracket or a
	# leading space already.
	my $char_count = 1;

	my $threshold;
	my $hash_str = '';
	my $element_count = 0;

	foreach my $attname (@attnames)
	{
		$element_count++;

		# To limit the line to 80 chars, we need to account for the
		# trailing characters.
		if ($element_count == $#attnames + 1)
		{
			# Last element, so allow space for ' },'
			$threshold = 77;
		}
		else
		{
			# Just need space for trailing comma
			$threshold = 79;
		}

		if ($element_count > 1)
		{
			$hash_str .= ',';
			$char_count++;
		}

		my $value = $data->{$attname};

		# Escape single quotes.
		$value =~ s/'/\\'/g;

		# Include a leading space in the key-value pair, since this will
		# always go after either a comma or an additional padding space on
		# the next line.
		my $element = " $attname => '$value'";
		my $element_length = length($element);

		# If adding the element to the current line would expand the line
		# beyond 80 chars, put it on the next line. We don't do this for
		# the first element, since that would create a blank line.
		if ($element_count > 1 and $char_count + $element_length > $threshold)
		{

			# Put on next line with an additional space preceding. There
			# are now two spaces in front of the key-value pair, lining
			# it up with the line above it.
			$hash_str .= "\n $element";
			$char_count = $element_length + 1;
		}
		else
		{
			$hash_str .= $element;
			$char_count += $element_length;
		}
	}
	return $hash_str;
}

sub usage
{
	die <<EOM;
Usage: reformat_dat_file.pl [options] datafile...

Options:
    --output PATH    output directory (default '.')
    --full-tuples    write out full tuples, including default values

Non-option arguments are the names of input .dat files.
Updated files are written to the output directory,
possibly overwriting the input files.

EOM
}