Add a test framework for recovery

This long-awaited framework is an expansion of the existing PostgresNode
stuff to support additional features for recovery testing; the recovery
tests included in this commit are a starting point that cover some of
the recovery features we have.  More scripts are expected to be added
later.

Author: Michaël Paquier, a bit of help from Amir Rohan
Reviewed by: Amir Rohan, Stas Kelvich, Kyotaro Horiguchi, Victor Wagner,
Craig Ringer, Álvaro Herrera
Discussion: http://www.postgresql.org/message-id/CAB7nPqTf7V6rswrFa=q_rrWeETUWagP=h8LX8XAov2Jcxw0DRg@mail.gmail.com
Discussion: http://www.postgresql.org/message-id/trinity-b4a8035d-59af-4c42-a37e-258f0f28e44a-1443795007012@3capp-mailcom-lxa08
This commit is contained in:
Alvaro Herrera 2016-02-26 16:13:30 -03:00
parent 89ac7004da
commit 49148645f7
12 changed files with 501 additions and 5 deletions

View File

@ -440,6 +440,7 @@ $ENV{CONFIG}="Debug";
<userinput>vcregress ecpgcheck</userinput>
<userinput>vcregress isolationcheck</userinput>
<userinput>vcregress bincheck</userinput>
<userinput>vcregress recoverycheck</userinput>
<userinput>vcregress upgradecheck</userinput>
</screen>
@ -455,7 +456,8 @@ $ENV{CONFIG}="Debug";
<para>
Running the regression tests on client programs, with "vcregress bincheck",
requires an additional Perl module to be installed:
or on recovery tests, with "vcregress recoverycheck" requires an additional
Perl module to be installed:
<variablelist>
<varlistentry>
<term><productname>IPC::Run</productname></term>

View File

@ -12,7 +12,7 @@ subdir = src/test
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
SUBDIRS = regress isolation modules
SUBDIRS = regress isolation modules recovery
# We don't build or execute examples/, locale/, or thread/ by default,
# but we do want "make clean" etc to recurse into them. Likewise for ssl/,

View File

@ -346,6 +346,9 @@ On Windows, we use SSPI authentication to ensure the same (by pg_regress
pg_hba.conf is configured to allow replication connections. Pass the keyword
parameter hba_permit_replication => 0 to disable this.
WAL archiving can be enabled on this node by passing the keyword parameter
has_archiving => 1. This is disabled by default.
postgresql.conf can be set up for replication by passing the keyword
parameter allows_streaming => 1. This is disabled by default.
@ -364,6 +367,7 @@ sub init
$params{hba_permit_replication} = 1
unless defined $params{hba_permit_replication};
$params{allows_streaming} = 0 unless defined $params{allows_streaming};
$params{has_archiving} = 0 unless defined $params{has_archiving};
mkdir $self->backup_dir;
mkdir $self->archive_dir;
@ -401,6 +405,7 @@ sub init
close $conf;
$self->set_replication_conf if $params{hba_permit_replication};
$self->enable_archiving if $params{has_archiving};
}
=pod
@ -458,13 +463,20 @@ Initialize a node from a backup, which may come from this node or a different
node. root_node must be a PostgresNode reference, backup_name the string name
of a backup previously created on that node with $node->backup.
Does not start the node after init.
Does not start the node after initializing it.
A recovery.conf is not created.
pg_hba.conf is configured to allow replication connections. Pass the keyword
parameter hba_permit_replication => 0 to disable this.
Streaming replication can be enabled on this node by passing the keyword
parameter has_streaming => 1. This is disabled by default.
Restoring WAL segments from archives using restore_command can be enabled
by passiong the keyword parameter has_restoring => 1. This is disabled by
default.
The backup is copied, leaving the original unmodified. pg_hba.conf is
unconditionally set to enable replication connections.
@ -479,6 +491,10 @@ sub init_from_backup
my $root_name = $root_node->name;
$params{has_streaming} = 0 unless defined $params{has_streaming};
$params{hba_permit_replication} = 1
unless defined $params{hba_permit_replication};
$params{has_restoring} = 0 unless defined $params{has_restoring};
print
"# Initializing node \"$node_name\" from backup \"$backup_name\" of node \"$root_name\"\n";
die "Backup \"$backup_name\" does not exist at $backup_path"
@ -498,8 +514,9 @@ sub init_from_backup
qq(
port = $port
));
$self->set_replication_conf;
$self->set_replication_conf if $params{hba_permit_replication};
$self->enable_streaming($root_node) if $params{has_streaming};
$self->enable_restoring($root_node) if $params{has_restoring};
}
=pod
@ -608,6 +625,59 @@ standby_mode=on
));
}
# Internal routine to enable archive recovery command on a standby node
sub enable_restoring
{
my ($self, $root_node) = @_;
my $path = $root_node->archive_dir;
my $name = $self->name;
print "### Enabling WAL restore for node \"$name\"\n";
# On Windows, the path specified in the restore command needs to use
# double back-slashes to work properly and to be able to detect properly
# the file targeted by the copy command, so the directory value used
# in this routine, using only one back-slash, need to be properly changed
# first. Paths also need to be double-quoted to prevent failures where
# the path contains spaces.
$path =~ s{\\}{\\\\}g if ($TestLib::windows_os);
my $copy_command = $TestLib::windows_os ?
qq{copy "$path\\\\%f" "%p"} :
qq{cp $path/%f %p};
$self->append_conf('recovery.conf', qq(
restore_command = '$copy_command'
standby_mode = on
));
}
# Internal routine to enable archiving
sub enable_archiving
{
my ($self) = @_;
my $path = $self->archive_dir;
my $name = $self->name;
print "### Enabling WAL archiving for node \"$name\"\n";
# On Windows, the path specified in the restore command needs to use
# double back-slashes to work properly and to be able to detect properly
# the file targeted by the copy command, so the directory value used
# in this routine, using only one back-slash, need to be properly changed
# first. Paths also need to be double-quoted to prevent failures where
# the path contains spaces.
$path =~ s{\\}{\\\\}g if ($TestLib::windows_os);
my $copy_command = $TestLib::windows_os ?
qq{copy "%p" "$path\\\\%f"} :
qq{cp %p $path/%f};
# Enable archive_mode and archive_command on node
$self->append_conf('postgresql.conf', qq(
archive_mode = on
archive_command = '$copy_command'
));
}
# Internal method
sub _update_pid
{

3
src/test/recovery/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
# Generated by test suite
/regress_log/
/tmp_check/

View File

@ -0,0 +1,17 @@
#-------------------------------------------------------------------------
#
# Makefile for src/test/recovery
#
# Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
# Portions Copyright (c) 1994, Regents of the University of California
#
# src/test/recovery/Makefile
#
#-------------------------------------------------------------------------
subdir = src/test/recovery
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
check:
$(prove_check)

21
src/test/recovery/README Normal file
View File

@ -0,0 +1,21 @@
src/test/recovery/README
Regression tests for recovery and replication
=============================================
This directory contains a test suite for recovery and replication,
testing mainly the interactions of recovery.conf with cluster
instances by providing a simple set of routines that can be used
to define a custom cluster for a test, including backup, archiving,
and streaming configuration.
Running the tests
=================
make check
NOTE: This creates a temporary installation, and some tests may
create one or multiple nodes, be they master or standby(s) for the
purpose of the tests.
NOTE: This requires the --enable-tap-tests argument to configure.

View File

@ -0,0 +1,70 @@
# Minimal test testing streaming replication
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 4;
# Initialize master node
my $node_master = get_new_node('master');
$node_master->init(allows_streaming => 1);
$node_master->start;
my $backup_name = 'my_backup';
# Take backup
$node_master->backup($backup_name);
# Create streaming standby linking to master
my $node_standby_1 = get_new_node('standby_1');
$node_standby_1->init_from_backup($node_master, $backup_name,
has_streaming => 1);
$node_standby_1->start;
# Take backup of standby 1 (not mandatory, but useful to check if
# pg_basebackup works on a standby).
$node_standby_1->backup($backup_name);
# Create second standby node linking to standby 1
my $node_standby_2 = get_new_node('standby_2');
$node_standby_2->init_from_backup($node_standby_1, $backup_name,
has_streaming => 1);
$node_standby_2->start;
# Create some content on master and check its presence in standby 1
$node_master->psql('postgres',
"CREATE TABLE tab_int AS SELECT generate_series(1,1002) AS a");
# Wait for standbys to catch up
my $applname_1 = $node_standby_1->name;
my $applname_2 = $node_standby_2->name;
my $caughtup_query =
"SELECT pg_current_xlog_location() = write_location FROM pg_stat_replication WHERE application_name = '$applname_1';";
$node_master->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby 1 to catch up";
$caughtup_query =
"SELECT pg_last_xlog_replay_location() = write_location FROM pg_stat_replication WHERE application_name = '$applname_2';";
$node_standby_1->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby 2 to catch up";
my $result =
$node_standby_1->psql('postgres', "SELECT count(*) FROM tab_int");
print "standby 1: $result\n";
is($result, qq(1002), 'check streamed content on standby 1');
$result = $node_standby_2->psql('postgres', "SELECT count(*) FROM tab_int");
print "standby 2: $result\n";
is($result, qq(1002), 'check streamed content on standby 2');
# Check that only READ-only queries can run on standbys
$node_standby_1->command_fails(
[ 'psql', '-A',
'-t', '--no-psqlrc',
'-d', $node_standby_1->connstr,
'-c', "INSERT INTO tab_int VALUES (1)" ],
'Read-only queries on standby 1');
$node_standby_2->command_fails(
[ 'psql', '-A',
'-t', '--no-psqlrc',
'-d', $node_standby_2->connstr,
'-c', "INSERT INTO tab_int VALUES (1)" ],
'Read-only queries on standby 2');

View File

@ -0,0 +1,52 @@
# test for archiving with warm standby
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 1;
use File::Copy;
# Initialize master node, doing archives
my $node_master = get_new_node('master');
$node_master->init(
has_archiving => 1,
allows_streaming => 1);
my $backup_name = 'my_backup';
# Start it
$node_master->start;
# Take backup for slave
$node_master->backup($backup_name);
# Initialize standby node from backup, fetching WAL from archives
my $node_standby = get_new_node('standby');
$node_standby->init_from_backup($node_master, $backup_name,
has_restoring => 1);
$node_standby->append_conf(
'postgresql.conf', qq(
wal_retrieve_retry_interval = '100ms'
));
$node_standby->start;
# Create some content on master
$node_master->psql('postgres',
"CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
my $current_lsn =
$node_master->psql('postgres', "SELECT pg_current_xlog_location();");
# Force archiving of WAL file to make it present on master
$node_master->psql('postgres', "SELECT pg_switch_xlog()");
# Add some more content, it should not be present on standby
$node_master->psql('postgres',
"INSERT INTO tab_int VALUES (generate_series(1001,2000))");
# Wait until necessary replay has been done on standby
my $caughtup_query =
"SELECT '$current_lsn'::pg_lsn <= pg_last_xlog_replay_location()";
$node_standby->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby to catch up";
my $result = $node_standby->psql('postgres', "SELECT count(*) FROM tab_int");
is($result, qq(1000), 'check content from archives');

View File

@ -0,0 +1,126 @@
# Test for recovery targets: name, timestamp, XID
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 7;
# Create and test a standby from given backup, with a certain
# recovery target.
sub test_recovery_standby
{
my $test_name = shift;
my $node_name = shift;
my $node_master = shift;
my $recovery_params = shift;
my $num_rows = shift;
my $until_lsn = shift;
my $node_standby = get_new_node($node_name);
$node_standby->init_from_backup($node_master, 'my_backup',
has_restoring => 1);
foreach my $param_item (@$recovery_params)
{
$node_standby->append_conf(
'recovery.conf',
qq($param_item
));
}
$node_standby->start;
# Wait until standby has replayed enough data
my $caughtup_query =
"SELECT '$until_lsn'::pg_lsn <= pg_last_xlog_replay_location()";
$node_standby->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby to catch up";
# Create some content on master and check its presence in standby
my $result =
$node_standby->psql('postgres', "SELECT count(*) FROM tab_int");
is($result, qq($num_rows), "check standby content for $test_name");
# Stop standby node
$node_standby->teardown_node;
}
# Initialize master node
my $node_master = get_new_node('master');
$node_master->init(has_archiving => 1, allows_streaming => 1);
# Start it
$node_master->start;
# Create data before taking the backup, aimed at testing
# recovery_target = 'immediate'
$node_master->psql('postgres',
"CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
my $lsn1 =
$node_master->psql('postgres', "SELECT pg_current_xlog_location();");
# Take backup from which all operations will be run
$node_master->backup('my_backup');
# Insert some data with used as a replay reference, with a recovery
# target TXID.
$node_master->psql('postgres',
"INSERT INTO tab_int VALUES (generate_series(1001,2000))");
my $recovery_txid = $node_master->psql('postgres', "SELECT txid_current()");
my $lsn2 =
$node_master->psql('postgres', "SELECT pg_current_xlog_location();");
# More data, with recovery target timestamp
$node_master->psql('postgres',
"INSERT INTO tab_int VALUES (generate_series(2001,3000))");
my $recovery_time = $node_master->psql('postgres', "SELECT now()");
my $lsn3 =
$node_master->psql('postgres', "SELECT pg_current_xlog_location();");
# Even more data, this time with a recovery target name
$node_master->psql('postgres',
"INSERT INTO tab_int VALUES (generate_series(3001,4000))");
my $recovery_name = "my_target";
my $lsn4 =
$node_master->psql('postgres', "SELECT pg_current_xlog_location();");
$node_master->psql('postgres',
"SELECT pg_create_restore_point('$recovery_name'");
# Force archiving of WAL file
$node_master->psql('postgres', "SELECT pg_switch_xlog()");
# Test recovery targets
my @recovery_params = ("recovery_target = 'immediate'");
test_recovery_standby('immediate target',
'standby_1', $node_master, \@recovery_params, "1000", $lsn1);
@recovery_params = ("recovery_target_xid = '$recovery_txid'");
test_recovery_standby('XID', 'standby_2', $node_master, \@recovery_params,
"2000", $lsn2);
@recovery_params = ("recovery_target_time = '$recovery_time'");
test_recovery_standby('Time', 'standby_3', $node_master, \@recovery_params,
"3000", $lsn3);
@recovery_params = ("recovery_target_name = '$recovery_name'");
test_recovery_standby('Name', 'standby_4', $node_master, \@recovery_params,
"4000", $lsn4);
# Multiple targets
# Last entry has priority (note that an array respects the order of items
# not hashes).
@recovery_params = (
"recovery_target_name = '$recovery_name'",
"recovery_target_xid = '$recovery_txid'",
"recovery_target_time = '$recovery_time'");
test_recovery_standby('Name + XID + Time',
'standby_5', $node_master, \@recovery_params, "3000", $lsn3);
@recovery_params = (
"recovery_target_time = '$recovery_time'",
"recovery_target_name = '$recovery_name'",
"recovery_target_xid = '$recovery_txid'");
test_recovery_standby('Time + Name + XID',
'standby_6', $node_master, \@recovery_params, "2000", $lsn2);
@recovery_params = (
"recovery_target_xid = '$recovery_txid'",
"recovery_target_time = '$recovery_time'",
"recovery_target_name = '$recovery_name'");
test_recovery_standby('XID + Time + Name',
'standby_7', $node_master, \@recovery_params, "4000", $lsn4);

View File

@ -0,0 +1,75 @@
# Test for timeline switch
# Ensure that a standby is able to follow a newly-promoted standby
# on a new timeline.
use strict;
use warnings;
use File::Path qw(remove_tree);
use PostgresNode;
use TestLib;
use Test::More tests => 1;
$ENV{PGDATABASE} = 'postgres';
# Initialize master node
my $node_master = get_new_node('master');
$node_master->init(allows_streaming => 1);
$node_master->start;
# Take backup
my $backup_name = 'my_backup';
$node_master->backup($backup_name);
# Create two standbys linking to it
my $node_standby_1 = get_new_node('standby_1');
$node_standby_1->init_from_backup($node_master, $backup_name,
has_streaming => 1);
$node_standby_1->start;
my $node_standby_2 = get_new_node('standby_2');
$node_standby_2->init_from_backup($node_master, $backup_name,
has_streaming => 1);
$node_standby_2->start;
# Create some content on master
$node_master->psql('postgres',
"CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
my $until_lsn =
$node_master->psql('postgres', "SELECT pg_current_xlog_location();");
# Wait until standby has replayed enough data on standby 1
my $caughtup_query =
"SELECT '$until_lsn'::pg_lsn <= pg_last_xlog_replay_location()";
$node_standby_1->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby to catch up";
# Stop and remove master, and promote standby 1, switching it to a new timeline
$node_master->teardown_node;
$node_standby_1->promote;
# Switch standby 2 to replay from standby 1
remove_tree($node_standby_2->data_dir . '/recovery.conf');
my $connstr_1 = $node_standby_1->connstr;
$node_standby_2->append_conf(
'recovery.conf', qq(
primary_conninfo='$connstr_1'
standby_mode=on
recovery_target_timeline='latest'
));
$node_standby_2->restart;
# Insert some data in standby 1 and check its presence in standby 2
# to ensure that the timeline switch has been done. Standby 1 needs
# to exit recovery first before moving on with the test.
$node_standby_1->poll_query_until('postgres',
"SELECT pg_is_in_recovery() <> true");
$node_standby_1->psql('postgres',
"INSERT INTO tab_int VALUES (generate_series(1001,2000))");
$until_lsn =
$node_standby_1->psql('postgres', "SELECT pg_current_xlog_location();");
$caughtup_query =
"SELECT '$until_lsn'::pg_lsn <= pg_last_xlog_replay_location()";
$node_standby_2->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby to catch up";
my $result =
$node_standby_2->psql('postgres', "SELECT count(*) FROM tab_int");
is($result, qq(2000), 'check content of standby 2');

View File

@ -0,0 +1,49 @@
# Checks for recovery_min_apply_delay
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 2;
# Initialize master node
my $node_master = get_new_node('master');
$node_master->init(allows_streaming => 1);
$node_master->start;
# And some content
$node_master->psql('postgres',
"CREATE TABLE tab_int AS SELECT generate_series(1,10) AS a");
# Take backup
my $backup_name = 'my_backup';
$node_master->backup($backup_name);
# Create streaming standby from backup
my $node_standby = get_new_node('standby');
$node_standby->init_from_backup($node_master, $backup_name,
has_streaming => 1);
$node_standby->append_conf(
'recovery.conf', qq(
recovery_min_apply_delay = '2s'
));
$node_standby->start;
# Make new content on master and check its presence in standby
# depending on the delay of 2s applied above.
$node_master->psql('postgres',
"INSERT INTO tab_int VALUES (generate_series(11,20))");
sleep 1;
# Here we should have only 10 rows
my $result = $node_standby->psql('postgres', "SELECT count(*) FROM tab_int");
is($result, qq(10), 'check content with delay of 1s');
# Now wait for replay to complete on standby
my $until_lsn =
$node_master->psql('postgres', "SELECT pg_current_xlog_location();");
my $caughtup_query =
"SELECT '$until_lsn'::pg_lsn <= pg_last_xlog_replay_location()";
$node_standby->poll_query_until('postgres', $caughtup_query)
or die "Timed out while waiting for standby to catch up";
$result = $node_standby->psql('postgres', "SELECT count(*) FROM tab_int");
is($result, qq(20), 'check content with delay of 2s');

View File

@ -34,7 +34,7 @@ if (-e "src/tools/msvc/buildenv.pl")
my $what = shift || "";
if ($what =~
/^(check|installcheck|plcheck|contribcheck|modulescheck|ecpgcheck|isolationcheck|upgradecheck|bincheck)$/i
/^(check|installcheck|plcheck|contribcheck|modulescheck|ecpgcheck|isolationcheck|upgradecheck|bincheck|recoverycheck)$/i
)
{
$what = uc $what;
@ -89,6 +89,7 @@ my %command = (
MODULESCHECK => \&modulescheck,
ISOLATIONCHECK => \&isolationcheck,
BINCHECK => \&bincheck,
RECOVERYCHECK => \&recoverycheck,
UPGRADECHECK => \&upgradecheck,);
my $proc = $command{$what};
@ -360,6 +361,16 @@ sub modulescheck
exit $mstat if $mstat;
}
sub recoverycheck
{
InstallTemp();
my $mstat = 0;
my $dir = "$topdir/src/test/recovery";
my $status = tap_check($dir);
exit $status if $status;
}
# Run "initdb", then reconfigure authentication.
sub standard_initdb
{