pgstat: add tests for handling of restarts, including crashes.

Test that stats are restored during normal restarts, discarded after a crash /
immediate restart, and that a corrupted stats file leads to stats being reset.

Author: Melanie Plageman <melanieplageman@gmail.com>
Author: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20220303021600.hs34ghqcw6zcokdh@alap3.anarazel.de
This commit is contained in:
Andres Freund 2022-04-07 12:07:50 -07:00
parent 99392cdd78
commit 16acf7f1aa
1 changed files with 307 additions and 0 deletions

View File

@ -0,0 +1,307 @@
# Copyright (c) 2021-2022, PostgreSQL Global Development Group
# Tests statistics handling around restarts, including handling of crashes and
# invalid stats files, as well as restorting stats after "normal" restarts.
use strict;
use warnings;
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
use File::Copy;
my $node = PostgreSQL::Test::Cluster->new('primary');
$node->init(allows_streaming => 1);
$node->append_conf('postgresql.conf', "track_functions = 'all'");
$node->start;
my $connect_db = 'postgres';
my $db_under_test = 'test';
# create test objects
$node->safe_psql($connect_db, "CREATE DATABASE $db_under_test");
$node->safe_psql($db_under_test,
"CREATE TABLE tab_stats_crash_discard_test1 AS SELECT generate_series(1,100) AS a"
);
$node->safe_psql($db_under_test,
"CREATE FUNCTION func_stats_crash_discard1() RETURNS VOID AS 'select 2;' LANGUAGE SQL IMMUTABLE"
);
# collect object oids
my $dboid = $node->safe_psql($db_under_test,
"SELECT oid FROM pg_database WHERE datname = '$db_under_test'");
my $funcoid = $node->safe_psql($db_under_test,
"SELECT 'func_stats_crash_discard1()'::regprocedure::oid");
my $tableoid = $node->safe_psql($db_under_test,
"SELECT 'tab_stats_crash_discard_test1'::regclass::oid");
# generate stats and flush them
trigger_funcrel_stat();
# verify stats objects exist
my $sect = "initial";
is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist");
is(have_stats('function', $dboid, $funcoid),
't', "$sect: function stats do exist");
is(have_stats('relation', $dboid, $tableoid),
't', "$sect: relation stats do exist");
# regular shutdown
$node->stop();
# backup stats files
my $statsfile = $PostgreSQL::Test::Utils::tmp_check . '/' . "discard_stats1";
ok(!-f "$statsfile", "backup statsfile cannot already exist");
my $datadir = $node->data_dir();
my $og_stats = "$datadir/pg_stat/pgstat.stat";
ok(-f "$og_stats", "origin stats file must exist");
copy($og_stats, $statsfile) or die "Copy failed: $!";
## test discarding of stats file after crash etc
$node->start;
$sect = "copy";
is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist");
is(have_stats('function', $dboid, $funcoid),
't', "$sect: function stats do exist");
is(have_stats('relation', $dboid, $tableoid),
't', "$sect: relation stats do exist");
$node->stop('immediate');
ok(!-f "$og_stats", "no stats file should exist after immediate shutdown");
# copy the old stats back to test we discard stats after crash restart
copy($statsfile, $og_stats) or die "Copy failed: $!";
$node->start;
# stats should have been discarded
$sect = "post immediate";
is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist");
is(have_stats('function', $dboid, $funcoid),
'f', "$sect: function stats do exist");
is(have_stats('relation', $dboid, $tableoid),
'f', "$sect: relation stats do not exist");
# get rid of backup statsfile
unlink $statsfile or die "cannot unlink $statsfile $!";
# generate new stats and flush them
trigger_funcrel_stat();
$sect = "post immediate, new";
is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist");
is(have_stats('function', $dboid, $funcoid),
't', "$sect: function stats do exist");
is(have_stats('relation', $dboid, $tableoid),
't', "$sect: relation stats do exist");
# regular shutdown
$node->stop();
## check an invalid stats file is handled
overwrite_file($og_stats, "ZZZZZZZZZZZZZ");
# normal startup and no issues despite invalid stats file
$node->start;
# no stats present due to invalid stats file
$sect = "invalid";
is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist");
is(have_stats('function', $dboid, $funcoid),
'f', "$sect: function stats do not exist");
is(have_stats('relation', $dboid, $tableoid),
'f', "$sect: relation stats do not exist");
## checks related to stats persistency around restarts and resets
# Ensure enough checkpoints to protect against races for test after reset,
# even on very slow machines.
$node->safe_psql($connect_db, "CHECKPOINT; CHECKPOINT;");
## check checkpoint and wal stats are incremented due to restart
my $ckpt_start = checkpoint_stats();
my $wal_start = wal_stats();
$node->restart;
$sect = "post restart";
my $ckpt_restart = checkpoint_stats();
my $wal_restart = wal_stats();
cmp_ok(
$ckpt_start->{count}, '<',
$ckpt_restart->{count},
"$sect: increased checkpoint count");
cmp_ok(
$wal_start->{records}, '<',
$wal_restart->{records},
"$sect: increased wal record count");
cmp_ok($wal_start->{bytes}, '<', $wal_restart->{bytes},
"$sect: increased wal bytes");
is( $ckpt_start->{reset},
$ckpt_restart->{reset},
"$sect: checkpoint stats_reset equal");
is($wal_start->{reset}, $wal_restart->{reset},
"$sect: wal stats_reset equal");
## Check that checkpoint stats are reset, WAL stats aren't affected
$node->safe_psql($connect_db, "SELECT pg_stat_reset_shared('bgwriter')");
$sect = "post ckpt reset";
my $ckpt_reset = checkpoint_stats();
my $wal_ckpt_reset = wal_stats();
cmp_ok($ckpt_restart->{count},
'>', $ckpt_reset->{count}, "$sect: checkpoint count smaller");
cmp_ok($ckpt_start->{reset}, 'lt', $ckpt_reset->{reset},
"$sect: stats_reset newer");
cmp_ok(
$wal_restart->{records},
'<=',
$wal_ckpt_reset->{records},
"$sect: wal record count not affected by reset");
is( $wal_start->{reset},
$wal_ckpt_reset->{reset},
"$sect: wal stats_reset equal");
## check that checkpoint stats stay reset after restart
$node->restart;
$sect = "post ckpt reset & restart";
my $ckpt_restart_reset = checkpoint_stats();
my $wal_restart2 = wal_stats();
# made sure above there's enough checkpoints that this will be stable even on slow machines
cmp_ok(
$ckpt_restart_reset->{count},
'<',
$ckpt_restart->{count},
"$sect: checkpoint still reset");
is($ckpt_restart_reset->{reset},
$ckpt_reset->{reset}, "$sect: stats_reset same");
cmp_ok(
$wal_ckpt_reset->{records},
'<',
$wal_restart2->{records},
"$sect: increased wal record count");
cmp_ok(
$wal_ckpt_reset->{bytes},
'<',
$wal_restart2->{bytes},
"$sect: increased wal bytes");
is( $wal_start->{reset},
$wal_restart2->{reset},
"$sect: wal stats_reset equal");
## check WAL stats stay reset
$node->safe_psql($connect_db, "SELECT pg_stat_reset_shared('wal')");
$sect = "post wal reset";
my $wal_reset = wal_stats();
cmp_ok(
$wal_reset->{records}, '<',
$wal_restart2->{records},
"$sect: smaller record count");
cmp_ok(
$wal_reset->{bytes}, '<',
$wal_restart2->{bytes},
"$sect: smaller bytes");
cmp_ok(
$wal_reset->{reset}, 'gt',
$wal_restart2->{reset},
"$sect: newer stats_reset");
$node->restart;
$sect = "post wal reset & restart";
my $wal_reset_restart = wal_stats();
# enough WAL generated during prior tests and initdb to make this not racy
cmp_ok(
$wal_reset_restart->{records},
'<',
$wal_restart2->{records},
"$sect: smaller record count");
cmp_ok(
$wal_reset->{bytes}, '<',
$wal_restart2->{bytes},
"$sect: smaller bytes");
cmp_ok(
$wal_reset->{reset}, 'gt',
$wal_restart2->{reset},
"$sect: newer stats_reset");
$node->stop;
done_testing();
sub trigger_funcrel_stat
{
$node->safe_psql(
$db_under_test, q[
SELECT * FROM tab_stats_crash_discard_test1;
SELECT func_stats_crash_discard1();
SELECT pg_stat_force_next_flush();]);
}
sub have_stats
{
my ($kind, $dboid, $objoid) = @_;
return $node->safe_psql($connect_db,
"SELECT pg_stat_have_stats('$kind', $dboid, $objoid)");
}
sub overwrite_file
{
my ($filename, $str) = @_;
open my $fh, ">", $filename
or die "could not write \"$filename\": $!";
print $fh $str;
close $fh;
return;
}
sub checkpoint_stats
{
my %results;
$results{count} = $node->safe_psql($connect_db,
"SELECT checkpoints_timed + checkpoints_req FROM pg_stat_bgwriter");
$results{reset} = $node->safe_psql($connect_db,
"SELECT stats_reset FROM pg_stat_bgwriter");
return \%results;
}
sub wal_stats
{
my %results;
$results{records} =
$node->safe_psql($connect_db, "SELECT wal_records FROM pg_stat_wal");
$results{bytes} =
$node->safe_psql($connect_db, "SELECT wal_bytes FROM pg_stat_wal");
$results{reset} =
$node->safe_psql($connect_db, "SELECT stats_reset FROM pg_stat_wal");
return \%results;
}