From 995e0fbc1c57c9b705c57de456d25c6e448bc5dd Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Wed, 20 Mar 2024 18:02:50 -0400
Subject: [PATCH] Un-break genbki.pl's error reporting capabilities.

This essentially reverts commit 69eb643b2, which added a fast path
in Catalog::ParseData, but neglected to preserve the behavior of
adding a line_number field in each hash.  That makes it impossible
for genbki.pl to provide any localization of error reports, which is
bad enough; but actually the affected error reports failed entirely,
producing useless bleats like "use of undefined value in sprintf".

69eb643b2 claimed to get a 15% speedup, but I'm not sure I believe
that: the time to rebuild the bki files changes by less than 1% for
me.  In any case, making debugging of mistakes in .dat files more
difficult would not be justified by even an order of magnitude
speedup here; it's just not that big a chunk of the total build time.

Per report from David Wheeler.  Although it's also broken in v16,
I don't think this is worth a back-patch, since we're very unlikely
to touch the v16 catalog data again.

Discussion: https://postgr.es/m/19238.1710953049@sss.pgh.pa.us
---
 src/backend/catalog/Catalog.pm | 28 +++++++---------------------
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm
index 55a8877aed..7168c90562 100644
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@@ -300,12 +300,14 @@ sub ParseHeader
 
 # Parses a file containing Perl data structure literals, returning live data.
 #
-# The parameter $preserve_formatting needs to be set for callers that want
+# The parameter $preserve_comments needs to be set for callers that want
 # to work with non-data lines in the data files, such as comments and blank
 # lines. If a caller just wants to consume the data, leave it unset.
+# (When requested, non-data lines will be returned as array entries that
+# are strings not hashes, so extra code is needed to deal with that.)
 sub ParseData
 {
-	my ($input_file, $schema, $preserve_formatting) = @_;
+	my ($input_file, $schema, $preserve_comments) = @_;
 
 	open(my $ifd, '<', $input_file) || die "$input_file: $!";
 	$input_file =~ /(\w+)\.dat$/
@@ -313,8 +315,6 @@ sub ParseData
 	my $catname = $1;
 	my $data = [];
 
-	if ($preserve_formatting)
-	{
 		# Scan the input file.
 		while (<$ifd>)
 		{
@@ -369,31 +369,17 @@ sub ParseData
 			# with --full-tuples to print autogenerated entries, which seems like
 			# useful behavior for debugging.)
 			#
-			# Otherwise, we have a non-data string, which we need to keep in
-			# order to preserve formatting.
+			# Otherwise, we have a non-data string, which we keep only if
+			# the caller requested it.
 			if (defined $hash_ref)
 			{
 				push @$data, $hash_ref if !$hash_ref->{autogenerated};
 			}
 			else
 			{
-				push @$data, $_;
+				push @$data, $_ if $preserve_comments;
 			}
 		}
-	}
-	else
-	{
-		# When we only care about the contents, it's faster to read and eval
-		# the whole file at once.
-		local $/;
-		my $full_file = <$ifd>;
-		eval "\$data = $full_file"    ## no critic (ProhibitStringyEval)
-		  or die "error parsing $input_file\n";
-		foreach my $hash_ref (@{$data})
-		{
-			AddDefaultValues($hash_ref, $schema, $catname);
-		}
-	}
 
 	close $ifd;