Move bootstrap-time lookup of regproc OIDs into genbki.pl.

Formerly, the bootstrap backend looked up the OIDs corresponding to
names in regproc catalog entries using brute-force searches of pg_proc.
It was somewhat remarkable that that worked at all, since it was used
while populating other pretty-fundamental catalogs like pg_operator.
And it was also quite slow, and getting slower as pg_proc gets bigger.

This patch moves the lookup work into genbki.pl, so that the values in
postgres.bki for regproc columns are always numeric OIDs, an option
that regprocin() already supported.  Perl isn't the world's speediest
language, so this about doubles the time needed to run genbki.pl (from
0.3 to 0.6 sec on my machine).  But we only do that at most once per
build.  The time needed to run initdb drops significantly --- on my
machine, initdb --no-sync goes from 1.8 to 1.3 seconds.  So this is
a small net win even for just one initdb per build, and it becomes
quite a nice win for test sequences requiring many initdb runs.

Strip out the now-dead code for brute-force catalog searching in
regprocin.  We'd also cargo-culted similar logic into regoperin
and some (not all) of the other reg*in functions.  That is all
dead code too since we currently have no need to load such values
during bootstrap.  I removed it all, reasoning that if we ever
need such functionality it'd be much better to do it in a similar
way to this patch.

There might be some simplifications possible in the backend now that
regprocin doesn't require doing catalog reads so early in bootstrap.
I've not looked into that, though.

Andreas Karlsson, with some small adjustments by me

Discussion: https://postgr.es/m/30896.1492006367@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2017-04-13 12:07:47 -04:00
parent a9254e675b
commit 5e39f06cfe
4 changed files with 120 additions and 219 deletions

View File

@ -19,7 +19,7 @@ use warnings;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = ();
our @EXPORT_OK = qw(Catalogs RenameTempFile);
our @EXPORT_OK = qw(Catalogs SplitDataLine RenameTempFile);
# Call this function with an array of names of header files to parse.
# Returns a nested data structure describing the data in the headers.
@ -216,6 +216,28 @@ sub Catalogs
return \%catalogs;
}
# Split a DATA line into fields.
# Call this on the bki_values element of a DATA item returned by Catalogs();
# it returns a list of field values. We don't strip quoting from the fields.
# Note: it should be safe to assign the result to a list of length equal to
# the nominal number of catalog fields, because check_natts already checked
# the number of fields.
sub SplitDataLine
{
my $bki_values = shift;
# This handling of quoted strings might look too simplistic, but it
# matches what bootscanner.l does: that has no provision for quote marks
# inside quoted strings, either. If we don't have a quoted string, just
# snarf everything till next whitespace. That will accept some things
# that bootscanner.l will see as erroneous tokens; but it seems wiser
# to do that and let bootscanner.l complain than to silently drop
# non-whitespace characters.
my @result = $bki_values =~ /"[^"]*"|\S+/g;
return @result;
}
# Rename temporary files to final names.
# Call this function with the final file name and the .tmp extension
# Note: recommended extension is ".tmp$$", so that parallel make steps
@ -229,21 +251,20 @@ sub RenameTempFile
rename($temp_name, $final_name) || die "rename: $temp_name: $!";
}
# verify the number of fields in the passed-in bki structure
# verify the number of fields in the passed-in DATA line
sub check_natts
{
my ($catname, $natts, $bki_val, $file, $line) = @_;
die "Could not find definition for Natts_${catname} before start of DATA() in $file\n"
unless defined $natts;
# we're working with a copy and need to count the fields only, so collapse
$bki_val =~ s/"[^"]*?"/xxx/g;
my @atts = split /\s+/, $bki_val;
my $nfields = scalar(SplitDataLine($bki_val));
die sprintf
"Wrong number of attributes in DATA() entry at %s:%d (expected %d but got %d)\n",
$file, $line, $natts, scalar @atts
unless $natts == @atts;
$file, $line, $natts, $nfields
unless $natts == $nfields;
}
1;

View File

@ -102,6 +102,7 @@ print $bki "# PostgreSQL $major_version\n";
# vars to hold data needed for schemapg.h
my %schemapg_entries;
my @tables_needing_macros;
my %regprocoids;
our @types;
# produce output, one catalog at a time
@ -160,24 +161,57 @@ foreach my $catname (@{ $catalogs->{names} })
foreach my $row (@{ $catalog->{data} })
{
# substitute constant values we acquired above
$row->{bki_values} =~ s/\bPGUID\b/$BOOTSTRAP_SUPERUSERID/g;
$row->{bki_values} =~ s/\bPGNSP\b/$PG_CATALOG_NAMESPACE/g;
# Split line into tokens without interpreting their meaning.
my %bki_values;
@bki_values{@attnames} = Catalog::SplitDataLine($row->{bki_values});
# Perform required substitutions on fields
foreach my $att (keys %bki_values)
{
# Substitute constant values we acquired above.
# (It's intentional that this can apply to parts of a field).
$bki_values{$att} =~ s/\bPGUID\b/$BOOTSTRAP_SUPERUSERID/g;
$bki_values{$att} =~ s/\bPGNSP\b/$PG_CATALOG_NAMESPACE/g;
# Replace regproc columns' values with OIDs.
# If we don't have a unique value to substitute,
# just do nothing (regprocin will complain).
if ($bki_attr{$att}->{type} eq 'regproc')
{
my $procoid = $regprocoids{$bki_values{$att}};
$bki_values{$att} = $procoid
if defined($procoid) && $procoid ne 'MULTIPLE';
}
}
# Save pg_proc oids for use in later regproc substitutions.
# This relies on the order we process the files in!
if ($catname eq 'pg_proc')
{
if (defined($regprocoids{$bki_values{proname}}))
{
$regprocoids{$bki_values{proname}} = 'MULTIPLE';
}
else
{
$regprocoids{$bki_values{proname}} = $row->{oid};
}
}
# Save pg_type info for pg_attribute processing below
if ($catname eq 'pg_type')
{
my %type;
my %type = %bki_values;
$type{oid} = $row->{oid};
@type{@attnames} = split /\s+/, $row->{bki_values};
push @types, \%type;
}
# Write to postgres.bki
my $oid = $row->{oid} ? "OID = $row->{oid} " : '';
printf $bki "insert %s( %s)\n", $oid, $row->{bki_values};
printf $bki "insert %s( %s )\n", $oid,
join(' ', @bki_values{@attnames});
# Write comments to postgres.description and postgres.shdescription
# Write comments to postgres.description and postgres.shdescription
if (defined $row->{descr})
{
printf $descr "%s\t%s\t0\t%s\n", $row->{oid}, $catname,
@ -426,7 +460,7 @@ sub bki_insert
my @attnames = @_;
my $oid = $row->{oid} ? "OID = $row->{oid} " : '';
my $bki_values = join ' ', map { $_ eq '' ? '""' : $_ } map $row->{$_}, @attnames;
printf $bki "insert %s( %s)\n", $oid, $bki_values;
printf $bki "insert %s( %s )\n", $oid, $bki_values;
}
# The field values of a Schema_pg_xxx declaration are similar, but not

View File

@ -58,30 +58,20 @@ foreach my $column (@{ $catalogs->{pg_proc}->{columns} })
my $data = $catalogs->{pg_proc}->{data};
foreach my $row (@$data)
{
# To construct fmgroids.h and fmgrtab.c, we need to inspect some
# of the individual data fields. Just splitting on whitespace
# won't work, because some quoted fields might contain internal
# whitespace. We handle this by folding them all to a simple
# "xxx". Fortunately, this script doesn't need to look at any
# fields that might need quoting, so this simple hack is
# sufficient.
$row->{bki_values} =~ s/"[^"]*"/"xxx"/g;
@{$row}{@attnames} = split /\s+/, $row->{bki_values};
# Split line into tokens without interpreting their meaning.
my %bki_values;
@bki_values{@attnames} = Catalog::SplitDataLine($row->{bki_values});
# Select out just the rows for internal-language procedures.
# Note assumption here that INTERNALlanguageId is 12.
next if $row->{prolang} ne '12';
next if $bki_values{prolang} ne '12';
push @fmgr,
{ oid => $row->{oid},
strict => $row->{proisstrict},
retset => $row->{proretset},
nargs => $row->{pronargs},
prosrc => $row->{prosrc}, };
# Hack to work around memory leak in some versions of Perl
$row = undef;
strict => $bki_values{proisstrict},
retset => $bki_values{proretset},
nargs => $bki_values{pronargs},
prosrc => $bki_values{prosrc}, };
}
# Emit headers for both files

View File

@ -21,10 +21,7 @@
#include <ctype.h>
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/pg_class.h"
#include "catalog/pg_operator.h"
@ -36,10 +33,8 @@
#include "miscadmin.h"
#include "parser/parse_type.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "utils/tqual.h"
#include "utils/acl.h"
#include "utils/regproc.h"
#include "utils/varlena.h"
@ -87,51 +82,11 @@ regprocin(PG_FUNCTION_ARGS)
/* Else it's a name, possibly schema-qualified */
/*
* In bootstrap mode we assume the given name is not schema-qualified, and
* just search pg_proc for a unique match. This is needed for
* initializing other system catalogs (pg_namespace may not exist yet, and
* certainly there are no schemas other than pg_catalog).
* We should never get here in bootstrap mode, as all references should
* have been resolved by genbki.pl.
*/
if (IsBootstrapProcessingMode())
{
int matches = 0;
Relation hdesc;
ScanKeyData skey[1];
SysScanDesc sysscan;
HeapTuple tuple;
ScanKeyInit(&skey[0],
Anum_pg_proc_proname,
BTEqualStrategyNumber, F_NAMEEQ,
CStringGetDatum(pro_name_or_oid));
hdesc = heap_open(ProcedureRelationId, AccessShareLock);
sysscan = systable_beginscan(hdesc, ProcedureNameArgsNspIndexId, true,
NULL, 1, skey);
while (HeapTupleIsValid(tuple = systable_getnext(sysscan)))
{
result = (RegProcedure) HeapTupleGetOid(tuple);
if (++matches > 1)
break;
}
systable_endscan(sysscan);
heap_close(hdesc, AccessShareLock);
if (matches == 0)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("function \"%s\" does not exist", pro_name_or_oid)));
else if (matches > 1)
ereport(ERROR,
(errcode(ERRCODE_AMBIGUOUS_FUNCTION),
errmsg("more than one function named \"%s\"",
pro_name_or_oid)));
PG_RETURN_OID(result);
}
elog(ERROR, "regproc values must be OIDs in bootstrap mode");
/*
* Normal case: parse the name into components and see if it matches any
@ -295,15 +250,15 @@ regprocedurein(PG_FUNCTION_ARGS)
PG_RETURN_OID(result);
}
/* The rest of this wouldn't work in bootstrap mode */
if (IsBootstrapProcessingMode())
elog(ERROR, "regprocedure values must be OIDs in bootstrap mode");
/*
* Else it's a name and arguments. Parse the name and arguments, look up
* potential matches in the current namespace search list, and scan to see
* which one exactly matches the given argument types. (There will not be
* more than one match.)
*
* XXX at present, this code will not work in bootstrap mode, hence this
* datatype cannot be used for any system column that needs to receive
* data during bootstrap.
*/
parseNameAndArgTypes(pro_name_or_oid, false, &names, &nargs, argtypes);
@ -400,6 +355,7 @@ format_procedure_internal(Oid procedure_oid, bool force_qualify)
StringInfoData buf;
/* XXX no support here for bootstrap mode */
Assert(!IsBootstrapProcessingMode());
initStringInfo(&buf);
@ -546,51 +502,9 @@ regoperin(PG_FUNCTION_ARGS)
/* Else it's a name, possibly schema-qualified */
/*
* In bootstrap mode we assume the given name is not schema-qualified, and
* just search pg_operator for a unique match. This is needed for
* initializing other system catalogs (pg_namespace may not exist yet, and
* certainly there are no schemas other than pg_catalog).
*/
/* The rest of this wouldn't work in bootstrap mode */
if (IsBootstrapProcessingMode())
{
int matches = 0;
Relation hdesc;
ScanKeyData skey[1];
SysScanDesc sysscan;
HeapTuple tuple;
ScanKeyInit(&skey[0],
Anum_pg_operator_oprname,
BTEqualStrategyNumber, F_NAMEEQ,
CStringGetDatum(opr_name_or_oid));
hdesc = heap_open(OperatorRelationId, AccessShareLock);
sysscan = systable_beginscan(hdesc, OperatorNameNspIndexId, true,
NULL, 1, skey);
while (HeapTupleIsValid(tuple = systable_getnext(sysscan)))
{
result = HeapTupleGetOid(tuple);
if (++matches > 1)
break;
}
systable_endscan(sysscan);
heap_close(hdesc, AccessShareLock);
if (matches == 0)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("operator does not exist: %s", opr_name_or_oid)));
else if (matches > 1)
ereport(ERROR,
(errcode(ERRCODE_AMBIGUOUS_FUNCTION),
errmsg("more than one operator named %s",
opr_name_or_oid)));
PG_RETURN_OID(result);
}
elog(ERROR, "regoper values must be OIDs in bootstrap mode");
/*
* Normal case: parse the name into components and see if it matches any
@ -759,15 +673,15 @@ regoperatorin(PG_FUNCTION_ARGS)
PG_RETURN_OID(result);
}
/* The rest of this wouldn't work in bootstrap mode */
if (IsBootstrapProcessingMode())
elog(ERROR, "regoperator values must be OIDs in bootstrap mode");
/*
* Else it's a name and arguments. Parse the name and arguments, look up
* potential matches in the current namespace search list, and scan to see
* which one exactly matches the given argument types. (There will not be
* more than one match.)
*
* XXX at present, this code will not work in bootstrap mode, hence this
* datatype cannot be used for any system column that needs to receive
* data during bootstrap.
*/
parseNameAndArgTypes(opr_name_or_oid, true, &names, &nargs, argtypes);
if (nargs == 1)
@ -852,6 +766,7 @@ format_operator_internal(Oid operator_oid, bool force_qualify)
StringInfoData buf;
/* XXX no support here for bootstrap mode */
Assert(!IsBootstrapProcessingMode());
initStringInfo(&buf);
@ -1006,42 +921,9 @@ regclassin(PG_FUNCTION_ARGS)
/* Else it's a name, possibly schema-qualified */
/*
* In bootstrap mode we assume the given name is not schema-qualified, and
* just search pg_class for a match. This is needed for initializing
* other system catalogs (pg_namespace may not exist yet, and certainly
* there are no schemas other than pg_catalog).
*/
/* The rest of this wouldn't work in bootstrap mode */
if (IsBootstrapProcessingMode())
{
Relation hdesc;
ScanKeyData skey[1];
SysScanDesc sysscan;
HeapTuple tuple;
ScanKeyInit(&skey[0],
Anum_pg_class_relname,
BTEqualStrategyNumber, F_NAMEEQ,
CStringGetDatum(class_name_or_oid));
hdesc = heap_open(RelationRelationId, AccessShareLock);
sysscan = systable_beginscan(hdesc, ClassNameNspIndexId, true,
NULL, 1, skey);
if (HeapTupleIsValid(tuple = systable_getnext(sysscan)))
result = HeapTupleGetOid(tuple);
else
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_TABLE),
errmsg("relation \"%s\" does not exist", class_name_or_oid)));
/* We assume there can be only one match */
systable_endscan(sysscan);
heap_close(hdesc, AccessShareLock);
PG_RETURN_OID(result);
}
elog(ERROR, "regclass values must be OIDs in bootstrap mode");
/*
* Normal case: parse the name into components and see if it matches any
@ -1163,16 +1045,16 @@ regclasssend(PG_FUNCTION_ARGS)
/*
* regtypein - converts "typename" to type OID
*
* We also accept a numeric OID, for symmetry with the output routine.
* The type name can be specified using the full type syntax recognized by
* the parser; for example, DOUBLE PRECISION and INTEGER[] will work and be
* translated to the correct type names. (We ignore any typmod info
* generated by the parser, however.)
*
* We also accept a numeric OID, for symmetry with the output routine,
* and for possible use in bootstrap mode.
*
* '-' signifies unknown (OID 0). In all other cases, the input must
* match an existing pg_type entry.
*
* In bootstrap mode the name must just equal some existing name in pg_type.
* In normal mode the type name can be specified using the full type syntax
* recognized by the parser; for example, DOUBLE PRECISION and INTEGER[] will
* work and be translated to the correct type names. (We ignore any typmod
* info generated by the parser, however.)
*/
Datum
regtypein(PG_FUNCTION_ARGS)
@ -1197,42 +1079,9 @@ regtypein(PG_FUNCTION_ARGS)
/* Else it's a type name, possibly schema-qualified or decorated */
/*
* In bootstrap mode we assume the given name is not schema-qualified, and
* just search pg_type for a match. This is needed for initializing other
* system catalogs (pg_namespace may not exist yet, and certainly there
* are no schemas other than pg_catalog).
*/
/* The rest of this wouldn't work in bootstrap mode */
if (IsBootstrapProcessingMode())
{
Relation hdesc;
ScanKeyData skey[1];
SysScanDesc sysscan;
HeapTuple tuple;
ScanKeyInit(&skey[0],
Anum_pg_type_typname,
BTEqualStrategyNumber, F_NAMEEQ,
CStringGetDatum(typ_name_or_oid));
hdesc = heap_open(TypeRelationId, AccessShareLock);
sysscan = systable_beginscan(hdesc, TypeNameNspIndexId, true,
NULL, 1, skey);
if (HeapTupleIsValid(tuple = systable_getnext(sysscan)))
result = HeapTupleGetOid(tuple);
else
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("type \"%s\" does not exist", typ_name_or_oid)));
/* We assume there can be only one match */
systable_endscan(sysscan);
heap_close(hdesc, AccessShareLock);
PG_RETURN_OID(result);
}
elog(ERROR, "regtype values must be OIDs in bootstrap mode");
/*
* Normal case: invoke the full parser to deal with special cases such as
@ -1342,9 +1191,6 @@ regtypesend(PG_FUNCTION_ARGS)
*
* '-' signifies unknown (OID 0). In all other cases, the input must
* match an existing pg_ts_config entry.
*
* This function is not needed in bootstrap mode, so we don't worry about
* making it work then.
*/
Datum
regconfigin(PG_FUNCTION_ARGS)
@ -1367,6 +1213,10 @@ regconfigin(PG_FUNCTION_ARGS)
PG_RETURN_OID(result);
}
/* The rest of this wouldn't work in bootstrap mode */
if (IsBootstrapProcessingMode())
elog(ERROR, "regconfig values must be OIDs in bootstrap mode");
/*
* Normal case: parse the name into components and see if it matches any
* pg_ts_config entries in the current search path.
@ -1452,9 +1302,6 @@ regconfigsend(PG_FUNCTION_ARGS)
*
* '-' signifies unknown (OID 0). In all other cases, the input must
* match an existing pg_ts_dict entry.
*
* This function is not needed in bootstrap mode, so we don't worry about
* making it work then.
*/
Datum
regdictionaryin(PG_FUNCTION_ARGS)
@ -1477,6 +1324,10 @@ regdictionaryin(PG_FUNCTION_ARGS)
PG_RETURN_OID(result);
}
/* The rest of this wouldn't work in bootstrap mode */
if (IsBootstrapProcessingMode())
elog(ERROR, "regdictionary values must be OIDs in bootstrap mode");
/*
* Normal case: parse the name into components and see if it matches any
* pg_ts_dict entries in the current search path.
@ -1562,9 +1413,6 @@ regdictionarysend(PG_FUNCTION_ARGS)
*
* '-' signifies unknown (OID 0). In all other cases, the input must
* match an existing pg_authid entry.
*
* This function is not needed in bootstrap mode, so we don't worry about
* making it work then.
*/
Datum
regrolein(PG_FUNCTION_ARGS)
@ -1587,6 +1435,10 @@ regrolein(PG_FUNCTION_ARGS)
PG_RETURN_OID(result);
}
/* The rest of this wouldn't work in bootstrap mode */
if (IsBootstrapProcessingMode())
elog(ERROR, "regrole values must be OIDs in bootstrap mode");
/* Normal case: see if the name matches any pg_authid entry. */
names = stringToQualifiedNameList(role_name_or_oid);
@ -1708,6 +1560,10 @@ regnamespacein(PG_FUNCTION_ARGS)
PG_RETURN_OID(result);
}
/* The rest of this wouldn't work in bootstrap mode */
if (IsBootstrapProcessingMode())
elog(ERROR, "regnamespace values must be OIDs in bootstrap mode");
/* Normal case: see if the name matches any pg_namespace entry. */
names = stringToQualifiedNameList(nsp_name_or_oid);