#!/usr/bin/perl #---------------------------------------------------------------------- # # reformat_dat_file.pl # Perl script that reads in catalog data file(s) and writes out # functionally equivalent file(s) in a standard format. # # In each entry of a reformatted file, metadata fields (if present) # come first, with normal attributes starting on the following line, # in the same order as the columns of the corresponding catalog. # Comments and blank lines are preserved. # # Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/include/catalog/reformat_dat_file.pl # #---------------------------------------------------------------------- use strict; use warnings FATAL => 'all'; use FindBin; use Getopt::Long; # If you copy this script to somewhere other than src/include/catalog, # you'll need to modify this "use lib" or provide a suitable -I switch. use lib "$FindBin::RealBin/../../backend/catalog/"; use Catalog; # Names of the metadata fields of a catalog entry. # Note: oid is a normal column from a storage perspective, but it's more # important than the rest, so it's listed first among the metadata fields. # Note: line_number is also a metadata field, but we never write it out, # so it's not listed here. my @METADATA = ('oid', 'oid_symbol', 'array_type_oid', 'descr', 'autogenerated'); # Process command line switches. my $output_path = ''; my $full_tuples = 0; GetOptions( 'output=s' => \$output_path, 'full-tuples' => \$full_tuples) || usage(); # Sanity check arguments. die "No input files.\n" unless @ARGV; # Make sure output_path ends in a slash. if ($output_path ne '' && substr($output_path, -1) ne '/') { $output_path .= '/'; } # Read all the input files into internal data structures. # We pass data file names as arguments and then look for matching # headers to parse the schema from. my %catalogs; my %catalog_data; my @catnames; foreach my $datfile (@ARGV) { $datfile =~ /(.+)\.dat$/ or die "Input files need to be data (.dat) files.\n"; my $header = "$1.h"; die "There in no header file corresponding to $datfile" if !-e $header; my $catalog = Catalog::ParseHeader($header); my $catname = $catalog->{catname}; my $schema = $catalog->{columns}; push @catnames, $catname; $catalogs{$catname} = $catalog; $catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 1); } ######################################################################## # At this point, we have read all the data. If you are modifying this # script for bulk editing, this is a good place to build lookup tables, # if you need to. In the following example, the "next if !ref $row" # check below is a hack to filter out non-hash objects. This is because # we build the lookup tables from data that we read using the # "preserve_formatting" parameter. # ##Index access method lookup. #my %amnames; #foreach my $row (@{ $catalog_data{pg_am} }) #{ # next if !ref $row; # $amnames{$row->{oid}} = $row->{amname}; #} ######################################################################## # Write the data. foreach my $catname (@catnames) { my $catalog = $catalogs{$catname}; my @attnames; my $schema = $catalog->{columns}; foreach my $column (@$schema) { my $attname = $column->{name}; # We may have ordinary columns at the storage level that we still # want to format as a special value. Exclude these from the column # list so they are not written twice. push @attnames, $attname if !(grep { $_ eq $attname } @METADATA); } # Write output files to specified directory. my $datfile = "$output_path$catname.dat"; open my $dat, '>', $datfile or die "can't open $datfile: $!"; foreach my $data (@{ $catalog_data{$catname} }) { # Hash ref representing a data entry. if (ref $data eq 'HASH') { my %values = %$data; ############################################################ # At this point we have the full tuple in memory as a hash # and can do any operations we want. As written, it only # removes default values, but this script can be adapted to # do one-off bulk-editing. ############################################################ if (!$full_tuples) { # If it's an autogenerated entry, drop it completely. next if $values{autogenerated}; # Else, just drop any default/computed fields. strip_default_values(\%values, $schema, $catname); } print $dat "{"; # Separate out metadata fields for readability. my $metadata_str = format_hash(\%values, @METADATA); if ($metadata_str) { print $dat $metadata_str; # User attributes start on next line. print $dat ",\n "; } my $data_str = format_hash(\%values, @attnames); print $dat $data_str; print $dat " },\n"; } # Preserve blank lines. elsif ($data =~ /^\s*$/) { print $dat "\n"; } # Preserve comments or brackets that are on their own line. elsif ($data =~ /^\s*(\[|\]|#.*?)\s*$/) { print $dat "$1\n"; } } close $dat; } # Remove column values for which there is a matching default, # or if the value can be computed from other columns. sub strip_default_values { my ($row, $schema, $catname) = @_; # Delete values that match defaults. foreach my $column (@$schema) { my $attname = $column->{name}; # It's okay if we have no oid value, since it will be assigned # automatically before bootstrap. die "strip_default_values: $catname.$attname undefined\n" if !defined $row->{$attname} and $attname ne 'oid'; if (defined $column->{default} and ($row->{$attname} eq $column->{default})) { delete $row->{$attname}; } } # Delete computed values. See AddDefaultValues() in Catalog.pm. # Note: This must be done after deleting values matching defaults. if ($catname eq 'pg_proc') { delete $row->{pronargs} if defined $row->{proargtypes}; } # If a pg_type entry has an auto-generated array type, then its # typarray field is a computed value too (see GenerateArrayTypes). if ($catname eq 'pg_type') { delete $row->{typarray} if defined $row->{array_type_oid}; } return; } # Format the individual elements of a Perl hash into a valid string # representation. We do this ourselves, rather than use native Perl # facilities, so we can keep control over the exact formatting of the # data files. sub format_hash { my $data = shift; my @orig_attnames = @_; # Copy attname to new array if it has a value, so we can determine # the last populated element. We do this because we may have default # values or empty metadata fields. my @attnames; foreach my $orig_attname (@orig_attnames) { push @attnames, $orig_attname if defined $data->{$orig_attname}; } # When calling this function, we ether have an open-bracket or a # leading space already. my $char_count = 1; my $threshold; my $hash_str = ''; my $element_count = 0; foreach my $attname (@attnames) { $element_count++; # To limit the line to 80 chars, we need to account for the # trailing characters. if ($element_count == $#attnames + 1) { # Last element, so allow space for ' },' $threshold = 77; } else { # Just need space for trailing comma $threshold = 79; } if ($element_count > 1) { $hash_str .= ','; $char_count++; } my $value = $data->{$attname}; # Escape single quotes. $value =~ s/'/\\'/g; # Include a leading space in the key-value pair, since this will # always go after either a comma or an additional padding space on # the next line. my $element = " $attname => '$value'"; my $element_length = length($element); # If adding the element to the current line would expand the line # beyond 80 chars, put it on the next line. We don't do this for # the first element, since that would create a blank line. if ($element_count > 1 and $char_count + $element_length > $threshold) { # Put on next line with an additional space preceding. There # are now two spaces in front of the key-value pair, lining # it up with the line above it. $hash_str .= "\n $element"; $char_count = $element_length + 1; } else { $hash_str .= $element; $char_count += $element_length; } } return $hash_str; } sub usage { die <