2010-01-05 02:06:57 +01:00
|
|
|
#----------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# Catalog.pm
|
|
|
|
# Perl module that extracts info from catalog headers into Perl
|
|
|
|
# data structures
|
|
|
|
#
|
2018-01-03 05:30:12 +01:00
|
|
|
# Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
2010-01-05 02:06:57 +01:00
|
|
|
# Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
#
|
2010-09-20 22:08:53 +02:00
|
|
|
# src/backend/catalog/Catalog.pm
|
2010-01-05 02:06:57 +01:00
|
|
|
#
|
|
|
|
#----------------------------------------------------------------------
|
|
|
|
|
|
|
|
package Catalog;
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
|
|
|
|
# Call this function with an array of names of header files to parse.
|
|
|
|
# Returns a nested data structure describing the data in the headers.
|
|
|
|
sub Catalogs
|
|
|
|
{
|
2012-07-05 03:47:49 +02:00
|
|
|
my (%catalogs, $catname, $declaring_attributes, $most_recent);
|
|
|
|
$catalogs{names} = [];
|
|
|
|
|
|
|
|
# There are a few types which are given one name in the C source, but a
|
|
|
|
# different name at the SQL level. These are enumerated here.
|
|
|
|
my %RENAME_ATTTYPE = (
|
|
|
|
'int16' => 'int2',
|
|
|
|
'int32' => 'int4',
|
2014-11-25 17:48:16 +01:00
|
|
|
'int64' => 'int8',
|
2012-07-05 03:47:49 +02:00
|
|
|
'Oid' => 'oid',
|
|
|
|
'NameData' => 'name',
|
2017-12-21 23:07:32 +01:00
|
|
|
'TransactionId' => 'xid',
|
|
|
|
'XLogRecPtr' => 'pg_lsn');
|
2012-07-05 03:47:49 +02:00
|
|
|
|
|
|
|
foreach my $input_file (@_)
|
|
|
|
{
|
|
|
|
my %catalog;
|
|
|
|
$catalog{columns} = [];
|
|
|
|
$catalog{data} = [];
|
|
|
|
|
2017-03-27 04:24:13 +02:00
|
|
|
open(my $ifh, '<', $input_file) || die "$input_file: $!";
|
2012-07-05 03:47:49 +02:00
|
|
|
|
2017-03-10 00:17:58 +01:00
|
|
|
my ($filename) = ($input_file =~ m/(\w+)\.h$/);
|
|
|
|
my $natts_pat = "Natts_$filename";
|
|
|
|
|
2012-07-05 03:47:49 +02:00
|
|
|
# Scan the input file.
|
2017-03-27 04:24:13 +02:00
|
|
|
while (<$ifh>)
|
2012-07-05 03:47:49 +02:00
|
|
|
{
|
|
|
|
|
|
|
|
# Strip C-style comments.
|
|
|
|
s;/\*(.|\n)*\*/;;g;
|
|
|
|
if (m;/\*;)
|
|
|
|
{
|
|
|
|
|
|
|
|
# handle multi-line comments properly.
|
2017-03-27 04:24:13 +02:00
|
|
|
my $next_line = <$ifh>;
|
2012-07-05 03:47:49 +02:00
|
|
|
die "$input_file: ends within C-style comment\n"
|
|
|
|
if !defined $next_line;
|
|
|
|
$_ .= $next_line;
|
|
|
|
redo;
|
|
|
|
}
|
|
|
|
|
2017-03-10 02:45:52 +01:00
|
|
|
# Remember input line number for later.
|
|
|
|
my $input_line_number = $.;
|
|
|
|
|
2012-07-05 03:47:49 +02:00
|
|
|
# Strip useless whitespace and trailing semicolons.
|
|
|
|
chomp;
|
|
|
|
s/^\s+//;
|
|
|
|
s/;\s*$//;
|
|
|
|
s/\s+/ /g;
|
|
|
|
|
|
|
|
# Push the data into the appropriate data structure.
|
2017-03-10 00:17:58 +01:00
|
|
|
if (/$natts_pat\s+(\d+)/)
|
|
|
|
{
|
|
|
|
$catalog{natts} = $1;
|
|
|
|
}
|
2017-05-18 01:01:23 +02:00
|
|
|
elsif (
|
|
|
|
/^DATA\(insert(\s+OID\s+=\s+(\d+))?\s+\(\s*(.*)\s*\)\s*\)$/)
|
2012-07-05 03:47:49 +02:00
|
|
|
{
|
2017-05-18 01:01:23 +02:00
|
|
|
check_natts($filename, $catalog{natts}, $3, $input_file,
|
|
|
|
$input_line_number);
|
2017-03-10 00:17:58 +01:00
|
|
|
|
2012-07-05 03:47:49 +02:00
|
|
|
push @{ $catalog{data} }, { oid => $2, bki_values => $3 };
|
|
|
|
}
|
|
|
|
elsif (/^DESCR\(\"(.*)\"\)$/)
|
|
|
|
{
|
|
|
|
$most_recent = $catalog{data}->[-1];
|
|
|
|
|
|
|
|
# this tests if most recent line is not a DATA() statement
|
|
|
|
if (ref $most_recent ne 'HASH')
|
|
|
|
{
|
|
|
|
die "DESCR() does not apply to any catalog ($input_file)";
|
|
|
|
}
|
|
|
|
if (!defined $most_recent->{oid})
|
|
|
|
{
|
|
|
|
die "DESCR() does not apply to any oid ($input_file)";
|
|
|
|
}
|
|
|
|
elsif ($1 ne '')
|
|
|
|
{
|
|
|
|
$most_recent->{descr} = $1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
elsif (/^SHDESCR\(\"(.*)\"\)$/)
|
|
|
|
{
|
|
|
|
$most_recent = $catalog{data}->[-1];
|
|
|
|
|
|
|
|
# this tests if most recent line is not a DATA() statement
|
|
|
|
if (ref $most_recent ne 'HASH')
|
|
|
|
{
|
|
|
|
die
|
|
|
|
"SHDESCR() does not apply to any catalog ($input_file)";
|
|
|
|
}
|
|
|
|
if (!defined $most_recent->{oid})
|
|
|
|
{
|
|
|
|
die "SHDESCR() does not apply to any oid ($input_file)";
|
|
|
|
}
|
|
|
|
elsif ($1 ne '')
|
|
|
|
{
|
|
|
|
$most_recent->{shdescr} = $1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
elsif (/^DECLARE_TOAST\(\s*(\w+),\s*(\d+),\s*(\d+)\)/)
|
|
|
|
{
|
|
|
|
$catname = 'toasting';
|
|
|
|
my ($toast_name, $toast_oid, $index_oid) = ($1, $2, $3);
|
|
|
|
push @{ $catalog{data} },
|
|
|
|
"declare toast $toast_oid $index_oid on $toast_name\n";
|
|
|
|
}
|
|
|
|
elsif (/^DECLARE_(UNIQUE_)?INDEX\(\s*(\w+),\s*(\d+),\s*(.+)\)/)
|
|
|
|
{
|
|
|
|
$catname = 'indexing';
|
|
|
|
my ($is_unique, $index_name, $index_oid, $using) =
|
|
|
|
($1, $2, $3, $4);
|
|
|
|
push @{ $catalog{data} },
|
|
|
|
sprintf(
|
|
|
|
"declare %sindex %s %s %s\n",
|
|
|
|
$is_unique ? 'unique ' : '',
|
|
|
|
$index_name, $index_oid, $using);
|
|
|
|
}
|
|
|
|
elsif (/^BUILD_INDICES/)
|
|
|
|
{
|
|
|
|
push @{ $catalog{data} }, "build indices\n";
|
|
|
|
}
|
|
|
|
elsif (/^CATALOG\(([^,]*),(\d+)\)/)
|
|
|
|
{
|
|
|
|
$catname = $1;
|
|
|
|
$catalog{relation_oid} = $2;
|
|
|
|
|
|
|
|
# Store pg_* catalog names in the same order we receive them
|
|
|
|
push @{ $catalogs{names} }, $catname;
|
|
|
|
|
|
|
|
$catalog{bootstrap} = /BKI_BOOTSTRAP/ ? ' bootstrap' : '';
|
|
|
|
$catalog{shared_relation} =
|
|
|
|
/BKI_SHARED_RELATION/ ? ' shared_relation' : '';
|
|
|
|
$catalog{without_oids} =
|
|
|
|
/BKI_WITHOUT_OIDS/ ? ' without_oids' : '';
|
|
|
|
$catalog{rowtype_oid} =
|
|
|
|
/BKI_ROWTYPE_OID\((\d+)\)/ ? " rowtype_oid $1" : '';
|
2017-12-21 23:07:32 +01:00
|
|
|
$catalog{schema_macro} = /BKI_SCHEMA_MACRO/ ? 1 : 0;
|
2012-07-05 03:47:49 +02:00
|
|
|
$declaring_attributes = 1;
|
|
|
|
}
|
|
|
|
elsif ($declaring_attributes)
|
|
|
|
{
|
|
|
|
next if (/^{|^$/);
|
|
|
|
next if (/^#/);
|
|
|
|
if (/^}/)
|
|
|
|
{
|
|
|
|
undef $declaring_attributes;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2017-12-21 23:07:32 +01:00
|
|
|
my %column;
|
2015-02-21 22:25:49 +01:00
|
|
|
my ($atttype, $attname, $attopt) = split /\s+/, $_;
|
2012-07-05 03:47:49 +02:00
|
|
|
die "parse error ($input_file)" unless $attname;
|
|
|
|
if (exists $RENAME_ATTTYPE{$atttype})
|
|
|
|
{
|
|
|
|
$atttype = $RENAME_ATTTYPE{$atttype};
|
|
|
|
}
|
|
|
|
if ($attname =~ /(.*)\[.*\]/) # array attribute
|
|
|
|
{
|
|
|
|
$attname = $1;
|
|
|
|
$atttype .= '[]'; # variable-length only
|
|
|
|
}
|
2015-02-21 22:25:49 +01:00
|
|
|
|
2017-12-21 23:07:32 +01:00
|
|
|
$column{type} = $atttype;
|
|
|
|
$column{name} = $attname;
|
2015-02-21 22:25:49 +01:00
|
|
|
|
|
|
|
if (defined $attopt)
|
|
|
|
{
|
2015-04-09 13:29:22 +02:00
|
|
|
if ($attopt eq 'BKI_FORCE_NULL')
|
2015-02-21 22:25:49 +01:00
|
|
|
{
|
2017-12-21 23:07:32 +01:00
|
|
|
$column{forcenull} = 1;
|
2015-02-21 22:25:49 +01:00
|
|
|
}
|
|
|
|
elsif ($attopt eq 'BKI_FORCE_NOT_NULL')
|
|
|
|
{
|
2017-12-21 23:07:32 +01:00
|
|
|
$column{forcenotnull} = 1;
|
2015-02-21 22:25:49 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2015-05-24 03:35:49 +02:00
|
|
|
die
|
|
|
|
"unknown column option $attopt on column $attname";
|
2015-02-21 22:25:49 +01:00
|
|
|
}
|
|
|
|
}
|
2017-12-21 23:07:32 +01:00
|
|
|
push @{ $catalog{columns} }, \%column;
|
2012-07-05 03:47:49 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-12-23 19:35:49 +01:00
|
|
|
$catalogs{$catname} = \%catalog;
|
2017-03-27 04:24:13 +02:00
|
|
|
close $ifh;
|
2012-07-05 03:47:49 +02:00
|
|
|
}
|
|
|
|
return \%catalogs;
|
2010-01-05 02:06:57 +01:00
|
|
|
}
|
|
|
|
|
Move bootstrap-time lookup of regproc OIDs into genbki.pl.
Formerly, the bootstrap backend looked up the OIDs corresponding to
names in regproc catalog entries using brute-force searches of pg_proc.
It was somewhat remarkable that that worked at all, since it was used
while populating other pretty-fundamental catalogs like pg_operator.
And it was also quite slow, and getting slower as pg_proc gets bigger.
This patch moves the lookup work into genbki.pl, so that the values in
postgres.bki for regproc columns are always numeric OIDs, an option
that regprocin() already supported. Perl isn't the world's speediest
language, so this about doubles the time needed to run genbki.pl (from
0.3 to 0.6 sec on my machine). But we only do that at most once per
build. The time needed to run initdb drops significantly --- on my
machine, initdb --no-sync goes from 1.8 to 1.3 seconds. So this is
a small net win even for just one initdb per build, and it becomes
quite a nice win for test sequences requiring many initdb runs.
Strip out the now-dead code for brute-force catalog searching in
regprocin. We'd also cargo-culted similar logic into regoperin
and some (not all) of the other reg*in functions. That is all
dead code too since we currently have no need to load such values
during bootstrap. I removed it all, reasoning that if we ever
need such functionality it'd be much better to do it in a similar
way to this patch.
There might be some simplifications possible in the backend now that
regprocin doesn't require doing catalog reads so early in bootstrap.
I've not looked into that, though.
Andreas Karlsson, with some small adjustments by me
Discussion: https://postgr.es/m/30896.1492006367@sss.pgh.pa.us
2017-04-13 18:07:47 +02:00
|
|
|
# Split a DATA line into fields.
|
|
|
|
# Call this on the bki_values element of a DATA item returned by Catalogs();
|
|
|
|
# it returns a list of field values. We don't strip quoting from the fields.
|
|
|
|
# Note: it should be safe to assign the result to a list of length equal to
|
|
|
|
# the nominal number of catalog fields, because check_natts already checked
|
|
|
|
# the number of fields.
|
|
|
|
sub SplitDataLine
|
|
|
|
{
|
|
|
|
my $bki_values = shift;
|
|
|
|
|
|
|
|
# This handling of quoted strings might look too simplistic, but it
|
|
|
|
# matches what bootscanner.l does: that has no provision for quote marks
|
|
|
|
# inside quoted strings, either. If we don't have a quoted string, just
|
|
|
|
# snarf everything till next whitespace. That will accept some things
|
|
|
|
# that bootscanner.l will see as erroneous tokens; but it seems wiser
|
|
|
|
# to do that and let bootscanner.l complain than to silently drop
|
|
|
|
# non-whitespace characters.
|
|
|
|
my @result = $bki_values =~ /"[^"]*"|\S+/g;
|
|
|
|
|
|
|
|
return @result;
|
|
|
|
}
|
|
|
|
|
2010-01-05 03:34:03 +01:00
|
|
|
# Rename temporary files to final names.
|
2010-01-05 21:23:32 +01:00
|
|
|
# Call this function with the final file name and the .tmp extension
|
|
|
|
# Note: recommended extension is ".tmp$$", so that parallel make steps
|
|
|
|
# can't use the same temp files
|
2010-01-05 02:06:57 +01:00
|
|
|
sub RenameTempFile
|
|
|
|
{
|
2012-07-05 03:47:49 +02:00
|
|
|
my $final_name = shift;
|
|
|
|
my $extension = shift;
|
|
|
|
my $temp_name = $final_name . $extension;
|
|
|
|
print "Writing $final_name\n";
|
|
|
|
rename($temp_name, $final_name) || die "rename: $temp_name: $!";
|
2010-01-05 02:06:57 +01:00
|
|
|
}
|
|
|
|
|
2017-10-04 09:11:36 +02:00
|
|
|
|
|
|
|
# Find a symbol defined in a particular header file and extract the value.
|
|
|
|
#
|
|
|
|
# The include path has to be passed as a reference to an array.
|
|
|
|
sub FindDefinedSymbol
|
|
|
|
{
|
|
|
|
my ($catalog_header, $include_path, $symbol) = @_;
|
|
|
|
|
|
|
|
for my $path (@$include_path)
|
|
|
|
{
|
|
|
|
|
|
|
|
# Make sure include path ends in a slash.
|
|
|
|
if (substr($path, -1) ne '/')
|
|
|
|
{
|
|
|
|
$path .= '/';
|
|
|
|
}
|
|
|
|
my $file = $path . $catalog_header;
|
|
|
|
next if !-f $file;
|
|
|
|
open(my $find_defined_symbol, '<', $file) || die "$file: $!";
|
|
|
|
while (<$find_defined_symbol>)
|
|
|
|
{
|
|
|
|
if (/^#define\s+\Q$symbol\E\s+(\S+)/)
|
|
|
|
{
|
|
|
|
return $1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
close $find_defined_symbol;
|
|
|
|
die "$file: no definition found for $symbol\n";
|
|
|
|
}
|
|
|
|
die "$catalog_header: not found in any include directory\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Move bootstrap-time lookup of regproc OIDs into genbki.pl.
Formerly, the bootstrap backend looked up the OIDs corresponding to
names in regproc catalog entries using brute-force searches of pg_proc.
It was somewhat remarkable that that worked at all, since it was used
while populating other pretty-fundamental catalogs like pg_operator.
And it was also quite slow, and getting slower as pg_proc gets bigger.
This patch moves the lookup work into genbki.pl, so that the values in
postgres.bki for regproc columns are always numeric OIDs, an option
that regprocin() already supported. Perl isn't the world's speediest
language, so this about doubles the time needed to run genbki.pl (from
0.3 to 0.6 sec on my machine). But we only do that at most once per
build. The time needed to run initdb drops significantly --- on my
machine, initdb --no-sync goes from 1.8 to 1.3 seconds. So this is
a small net win even for just one initdb per build, and it becomes
quite a nice win for test sequences requiring many initdb runs.
Strip out the now-dead code for brute-force catalog searching in
regprocin. We'd also cargo-culted similar logic into regoperin
and some (not all) of the other reg*in functions. That is all
dead code too since we currently have no need to load such values
during bootstrap. I removed it all, reasoning that if we ever
need such functionality it'd be much better to do it in a similar
way to this patch.
There might be some simplifications possible in the backend now that
regprocin doesn't require doing catalog reads so early in bootstrap.
I've not looked into that, though.
Andreas Karlsson, with some small adjustments by me
Discussion: https://postgr.es/m/30896.1492006367@sss.pgh.pa.us
2017-04-13 18:07:47 +02:00
|
|
|
# verify the number of fields in the passed-in DATA line
|
2017-03-10 00:17:58 +01:00
|
|
|
sub check_natts
|
|
|
|
{
|
|
|
|
my ($catname, $natts, $bki_val, $file, $line) = @_;
|
Move bootstrap-time lookup of regproc OIDs into genbki.pl.
Formerly, the bootstrap backend looked up the OIDs corresponding to
names in regproc catalog entries using brute-force searches of pg_proc.
It was somewhat remarkable that that worked at all, since it was used
while populating other pretty-fundamental catalogs like pg_operator.
And it was also quite slow, and getting slower as pg_proc gets bigger.
This patch moves the lookup work into genbki.pl, so that the values in
postgres.bki for regproc columns are always numeric OIDs, an option
that regprocin() already supported. Perl isn't the world's speediest
language, so this about doubles the time needed to run genbki.pl (from
0.3 to 0.6 sec on my machine). But we only do that at most once per
build. The time needed to run initdb drops significantly --- on my
machine, initdb --no-sync goes from 1.8 to 1.3 seconds. So this is
a small net win even for just one initdb per build, and it becomes
quite a nice win for test sequences requiring many initdb runs.
Strip out the now-dead code for brute-force catalog searching in
regprocin. We'd also cargo-culted similar logic into regoperin
and some (not all) of the other reg*in functions. That is all
dead code too since we currently have no need to load such values
during bootstrap. I removed it all, reasoning that if we ever
need such functionality it'd be much better to do it in a similar
way to this patch.
There might be some simplifications possible in the backend now that
regprocin doesn't require doing catalog reads so early in bootstrap.
I've not looked into that, though.
Andreas Karlsson, with some small adjustments by me
Discussion: https://postgr.es/m/30896.1492006367@sss.pgh.pa.us
2017-04-13 18:07:47 +02:00
|
|
|
|
2017-05-18 01:01:23 +02:00
|
|
|
die
|
|
|
|
"Could not find definition for Natts_${catname} before start of DATA() in $file\n"
|
|
|
|
unless defined $natts;
|
2017-03-10 00:17:58 +01:00
|
|
|
|
Move bootstrap-time lookup of regproc OIDs into genbki.pl.
Formerly, the bootstrap backend looked up the OIDs corresponding to
names in regproc catalog entries using brute-force searches of pg_proc.
It was somewhat remarkable that that worked at all, since it was used
while populating other pretty-fundamental catalogs like pg_operator.
And it was also quite slow, and getting slower as pg_proc gets bigger.
This patch moves the lookup work into genbki.pl, so that the values in
postgres.bki for regproc columns are always numeric OIDs, an option
that regprocin() already supported. Perl isn't the world's speediest
language, so this about doubles the time needed to run genbki.pl (from
0.3 to 0.6 sec on my machine). But we only do that at most once per
build. The time needed to run initdb drops significantly --- on my
machine, initdb --no-sync goes from 1.8 to 1.3 seconds. So this is
a small net win even for just one initdb per build, and it becomes
quite a nice win for test sequences requiring many initdb runs.
Strip out the now-dead code for brute-force catalog searching in
regprocin. We'd also cargo-culted similar logic into regoperin
and some (not all) of the other reg*in functions. That is all
dead code too since we currently have no need to load such values
during bootstrap. I removed it all, reasoning that if we ever
need such functionality it'd be much better to do it in a similar
way to this patch.
There might be some simplifications possible in the backend now that
regprocin doesn't require doing catalog reads so early in bootstrap.
I've not looked into that, though.
Andreas Karlsson, with some small adjustments by me
Discussion: https://postgr.es/m/30896.1492006367@sss.pgh.pa.us
2017-04-13 18:07:47 +02:00
|
|
|
my $nfields = scalar(SplitDataLine($bki_val));
|
2017-03-10 00:17:58 +01:00
|
|
|
|
|
|
|
die sprintf
|
2017-05-18 01:01:23 +02:00
|
|
|
"Wrong number of attributes in DATA() entry at %s:%d (expected %d but got %d)\n",
|
|
|
|
$file, $line, $natts, $nfields
|
Move bootstrap-time lookup of regproc OIDs into genbki.pl.
Formerly, the bootstrap backend looked up the OIDs corresponding to
names in regproc catalog entries using brute-force searches of pg_proc.
It was somewhat remarkable that that worked at all, since it was used
while populating other pretty-fundamental catalogs like pg_operator.
And it was also quite slow, and getting slower as pg_proc gets bigger.
This patch moves the lookup work into genbki.pl, so that the values in
postgres.bki for regproc columns are always numeric OIDs, an option
that regprocin() already supported. Perl isn't the world's speediest
language, so this about doubles the time needed to run genbki.pl (from
0.3 to 0.6 sec on my machine). But we only do that at most once per
build. The time needed to run initdb drops significantly --- on my
machine, initdb --no-sync goes from 1.8 to 1.3 seconds. So this is
a small net win even for just one initdb per build, and it becomes
quite a nice win for test sequences requiring many initdb runs.
Strip out the now-dead code for brute-force catalog searching in
regprocin. We'd also cargo-culted similar logic into regoperin
and some (not all) of the other reg*in functions. That is all
dead code too since we currently have no need to load such values
during bootstrap. I removed it all, reasoning that if we ever
need such functionality it'd be much better to do it in a similar
way to this patch.
There might be some simplifications possible in the backend now that
regprocin doesn't require doing catalog reads so early in bootstrap.
I've not looked into that, though.
Andreas Karlsson, with some small adjustments by me
Discussion: https://postgr.es/m/30896.1492006367@sss.pgh.pa.us
2017-04-13 18:07:47 +02:00
|
|
|
unless $natts == $nfields;
|
2017-03-10 00:17:58 +01:00
|
|
|
}
|
2017-03-10 02:45:52 +01:00
|
|
|
|
2010-01-05 02:06:57 +01:00
|
|
|
1;
|