Fix checkpoint signalling

Checkpointer uses its MyLatch to wake up when a checkpoint request is
received.  But before commit c655077639 the latch was not used for
anything else, so the code could just go to sleep after each loop
without rechecking the sleeping condition.  That commit added a separate
ResetLatch in its code path[1], which can cause a checkpoint to go
unnoticed for potentially a long time.

Fix by skipping sleep if any checkpoint flags are set.  Also add a test
to verify this; authored by Kyotaro Horiguchi.

[1] CreateCheckPoint -> InvalidateObsoleteReplicationSlots ->
ConditionVariableTimeSleep

Report and diagnosis by Kyotaro Horiguchi.
Co-authored-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Co-authored-by: Álvaro Herrera <alvherre@alvh.no-ip.org>
Discussion: https://postgr.es/m/20200408.141956.891237856186513376.horikyota.ntt@gmail.com
This commit is contained in:
Alvaro Herrera 2020-04-29 18:46:42 -04:00
parent fef819ac53
commit 1816a1c6ff
No known key found for this signature in database
GPG Key ID: 1C20ACB9D5C564AE
2 changed files with 49 additions and 2 deletions

View File

@ -494,6 +494,13 @@ CheckpointerMain(void)
*/
pgstat_send_bgwriter();
/*
* If any checkpoint flags have been set, redo the loop to handle the
* checkpoint without sleeping.
*/
if (((volatile CheckpointerShmemStruct *) CheckpointerShmem)->ckpt_flags)
continue;
/*
* Sleep until we are signaled or it's time for another checkpoint or
* xlog file switch.

View File

@ -8,7 +8,7 @@ use TestLib;
use PostgresNode;
use File::Path qw(rmtree);
use Test::More tests => 13;
use Test::More tests => 14;
use Time::HiRes qw(usleep);
$ENV{PGDATABASE} = 'postgres';
@ -179,7 +179,47 @@ for (my $i = 0; $i < 10000; $i++)
}
ok($failed, 'check that replication has been broken');
$node_standby->stop;
$node_master->stop('immediate');
$node_standby->stop('immediate');
my $node_master2 = get_new_node('master2');
$node_master2->init(allows_streaming => 1);
$node_master2->append_conf(
'postgresql.conf', qq(
min_wal_size = 32MB
max_wal_size = 32MB
log_checkpoints = yes
));
$node_master2->start;
$node_master2->safe_psql('postgres',
"SELECT pg_create_physical_replication_slot('rep1')");
$backup_name = 'my_backup2';
$node_master2->backup($backup_name);
$node_master2->stop;
$node_master2->append_conf(
'postgresql.conf', qq(
max_slot_wal_keep_size = 0
));
$node_master2->start;
$node_standby = get_new_node('standby_2');
$node_standby->init_from_backup($node_master2, $backup_name,
has_streaming => 1);
$node_standby->append_conf('postgresql.conf', "primary_slot_name = 'rep1'");
$node_standby->start;
my @result =
split(
'\n',
$node_master2->safe_psql(
'postgres',
"CREATE TABLE tt();
DROP TABLE tt;
SELECT pg_switch_wal();
CHECKPOINT;
SELECT 'finished';",
timeout => '60'));
is($result[1], 'finished', 'check if checkpoint command is not blocked');
#####################################
# Advance WAL of $node by $n segments