postgresql/contrib/test_decoding/t/001_repl_stats.pl
Amit Kapila 3fa17d3771 Use HTAB for replication slot statistics.
Previously, we used to use the array of size max_replication_slots to
store stats for replication slots. But that had two problems in the cases
where a message for dropping a slot gets lost: 1) the stats for the new
slot are not recorded if the array is full and 2) writing beyond the end
of the array if the user reduces the max_replication_slots.

This commit uses HTAB for replication slot statistics, resolving both
problems. Now, pgstat_vacuum_stat() search for all the dead replication
slots in stats hashtable and tell the collector to remove them. To avoid
showing the stats for the already-dropped slots, pg_stat_replication_slots
view searches slot stats by the slot name taken from pg_replication_slots.

Also, we send a message for creating a slot at slot creation, initializing
the stats. This reduces the possibility that the stats are accumulated
into the old slot stats when a message for dropping a slot gets lost.

Reported-by: Andres Freund
Author: Sawada Masahiko, test case by Vignesh C
Reviewed-by: Amit Kapila, Vignesh C, Dilip Kumar
Discussion: https://postgr.es/m/20210319185247.ldebgpdaxsowiflw@alap3.anarazel.de
2021-04-27 09:09:11 +05:30

116 lines
3.7 KiB
Perl

# Test replication statistics data in pg_stat_replication_slots is sane after
# drop replication slot and restart.
use strict;
use warnings;
use File::Path qw(rmtree);
use PostgresNode;
use TestLib;
use Test::More tests => 2;
# Test set-up
my $node = get_new_node('test');
$node->init(allows_streaming => 'logical');
$node->append_conf('postgresql.conf', 'synchronous_commit = on');
$node->start;
# Check that replication slot stats are expected.
sub test_slot_stats
{
my ($node, $expected, $msg) = @_;
my $result = $node->safe_psql(
'postgres', qq[
SELECT slot_name, total_txns > 0 AS total_txn,
total_bytes > 0 AS total_bytes
FROM pg_stat_replication_slots
ORDER BY slot_name]);
is($result, $expected, $msg);
}
# Create table.
$node->safe_psql('postgres', "CREATE TABLE test_repl_stat(col1 int)");
# Create replication slots.
$node->safe_psql(
'postgres', qq[
SELECT pg_create_logical_replication_slot('regression_slot1', 'test_decoding');
SELECT pg_create_logical_replication_slot('regression_slot2', 'test_decoding');
SELECT pg_create_logical_replication_slot('regression_slot3', 'test_decoding');
SELECT pg_create_logical_replication_slot('regression_slot4', 'test_decoding');
]);
# Insert some data.
$node->safe_psql('postgres',
"INSERT INTO test_repl_stat values(generate_series(1, 5));");
$node->safe_psql(
'postgres', qq[
SELECT data FROM pg_logical_slot_get_changes('regression_slot1', NULL,
NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
SELECT data FROM pg_logical_slot_get_changes('regression_slot2', NULL,
NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
SELECT data FROM pg_logical_slot_get_changes('regression_slot3', NULL,
NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
SELECT data FROM pg_logical_slot_get_changes('regression_slot4', NULL,
NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
]);
# Wait for the statistics to be updated.
$node->poll_query_until(
'postgres', qq[
SELECT count(slot_name) >= 4 FROM pg_stat_replication_slots
WHERE slot_name ~ 'regression_slot'
AND total_txns > 0 AND total_bytes > 0;
]) or die "Timed out while waiting for statistics to be updated";
# Test to drop one of the replication slot and verify replication statistics data is
# fine after restart.
$node->safe_psql('postgres',
"SELECT pg_drop_replication_slot('regression_slot4')");
$node->stop;
$node->start;
# Verify statistics data present in pg_stat_replication_slots are sane after
# restart.
test_slot_stats(
$node,
qq(regression_slot1|t|t
regression_slot2|t|t
regression_slot3|t|t),
'check replication statistics are updated');
# Test to remove one of the replication slots and adjust
# max_replication_slots accordingly to the number of slots. This leads
# to a mismatch between the number of slots present in the stats file and the
# number of stats present in the shared memory, simulating the scenario for
# drop slot message lost by the statistics collector process. We verify
# replication statistics data is fine after restart.
$node->stop;
my $datadir = $node->data_dir;
my $slot3_replslotdir = "$datadir/pg_replslot/regression_slot3";
rmtree($slot3_replslotdir);
$node->append_conf('postgresql.conf', 'max_replication_slots = 2');
$node->start;
# Verify statistics data present in pg_stat_replication_slots are sane after
# restart.
test_slot_stats(
$node,
qq(regression_slot1|t|t
regression_slot2|t|t),
'check replication statistics after removing the slot file');
# cleanup
$node->safe_psql('postgres', "DROP TABLE test_repl_stat");
$node->safe_psql('postgres',
"SELECT pg_drop_replication_slot('regression_slot1')");
$node->safe_psql('postgres',
"SELECT pg_drop_replication_slot('regression_slot2')");
# shutdown
$node->stop;