Parse catalog .dat files as a whole when compiling the backend

Previously Catalog.pm eval'd each individual hash reference so that comments and whitespace can be preserved when running reformat-dat-files. This is unnecessary when building, and we can save ~15% off the run time of genbki.pl by simply slurping and eval'-ing the whole file at once. This saves a bit of time, especially in highly parallel builds, since most build targets depend on this script's outputs. Report and review by Andres Freund Discussion: https://www.postgresql.org/message-id/CAFBsxsGW%3DWRbnxXrc8UqqR479XuxtukSFWV-hnmtgsbuNAUO6w%40mail.gmail.com
2022-09-08 13:23:13 +07:00 · 2022-09-08 13:23:13 +07:00 · 69eb643b25
parent 0324651573
commit 69eb643b25
1 changed files with 20 additions and 2 deletions
--- a/src/backend/catalog/Catalog.pm
+++ b/src/backend/catalog/Catalog.pm
@ -287,6 +287,8 @@ sub ParseData
 	my $catname = $1;
 	my $data    = [];

+	if ($preserve_formatting)
+	{
 	# Scan the input file.
 	while (<$ifd>)
 	{
@ -341,16 +343,32 @@ sub ParseData
 		# with --full-tuples to print autogenerated entries, which seems like
 		# useful behavior for debugging.)
 		#
-		# Only keep non-data strings if we are told to preserve formatting.
+		# Otherwise, we have a non-data string, which we need to keep in
+		# order to preserve formatting.
 		if (defined $hash_ref)
 		{
 			push @$data, $hash_ref if !$hash_ref->{autogenerated};
 		}
-		elsif ($preserve_formatting)
+		else
 		{
 			push @$data, $_;
 		}
 	}
+	}
+	else
+	{
+		# When we only care about the contents, it's faster to read and eval
+		# the whole file at once.
+		local $/;
+		my $full_file = <$ifd>;
+		eval '$data = ' . $full_file    ## no critic (ProhibitStringyEval)
+		  or die "error parsing $input_file\n";
+		foreach my $hash_ref (@{$data})
+		{
+			AddDefaultValues($hash_ref, $schema, $catname);
+		}
+	}
+
 	close $ifd;

 	# If this is pg_type, auto-generate array types too.