Add tests for XID wraparound.

The test module includes helper functions to quickly burn through lots
of XIDs. They are used in the tests, and are also handy for manually
testing XID wraparound.

Since these tests are very expensive the entire suite is disabled by
default. It requires to set PG_TEST_EXTRA to run it.

Reviewed-by: Daniel Gustafsson, John Naylor, Michael Paquier
Reviewed-by: vignesh C
Author: Heikki Linnakangas, Masahiko Sawada, Andres Freund
Discussion: https://www.postgresql.org/message-id/CAD21AoDVhkXp8HjpFO-gp3TgL6tCKcZQNxn04m01VAtcSi-5sA%40mail.gmail.com
This commit is contained in:
Masahiko Sawada 2023-11-30 14:29:48 +09:00
parent a243569bf6
commit e255b646a1
13 changed files with 644 additions and 1 deletions

View File

@ -314,6 +314,16 @@ make check-world PG_TEST_EXTRA='kerberos ldap ssl load_balance'
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>xid_wraparound</literal></term>
<listitem>
<para>
Runs the test suite under <filename>src/test/module/xid_wrapround</filename>.
Not enabled by default because it is resource intensive.
</para>
</listitem>
</varlistentry>
</variablelist>
Tests for features that are not supported by the current build

View File

@ -34,7 +34,8 @@ SUBDIRS = \
test_shm_mq \
test_slru \
unsafe_tests \
worker_spi
worker_spi \
xid_wraparound
ifeq ($(with_ssl),openssl)
SUBDIRS += ssl_passphrase_callback

View File

@ -32,3 +32,4 @@ subdir('test_shm_mq')
subdir('test_slru')
subdir('unsafe_tests')
subdir('worker_spi')
subdir('xid_wraparound')

View File

@ -0,0 +1,4 @@
# Generated subdirectories
/log/
/results/
/tmp_check/

View File

@ -0,0 +1,23 @@
# src/test/modules/xid_wraparound/Makefile
MODULE_big = xid_wraparound
OBJS = \
$(WIN32RES) \
xid_wraparound.o
PGFILEDESC = "xid_wraparound - tests for XID wraparound"
EXTENSION = xid_wraparound
DATA = xid_wraparound--1.0.sql
TAP_TESTS = 1
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = src/test/modules/xid_wraparound
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

View File

@ -0,0 +1,3 @@
This module contains tests for XID wraparound. The tests use two
helper functions to quickly consume lots of XIDs, to reach XID
wraparound faster.

View File

@ -0,0 +1,36 @@
# Copyright (c) 2023, PostgreSQL Global Development Group
xid_wraparound_sources = files(
'xid_wraparound.c',
)
if host_system == 'windows'
xid_wraparound_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
'--NAME', 'xid_wraparound',
'--FILEDESC', 'xid_wraparound - tests for XID wraparound',])
endif
xid_wraparound = shared_module('xid_wraparound',
xid_wraparound_sources,
kwargs: pg_mod_args,
)
testprep_targets += xid_wraparound
install_data(
'xid_wraparound.control',
'xid_wraparound--1.0.sql',
kwargs: contrib_data_args,
)
tests += {
'name': 'xid_wraparound',
'sd': meson.current_source_dir(),
'bd': meson.current_build_dir(),
'tap': {
'tests': [
't/001_emergency_vacuum.pl',
't/002_limits.pl',
't/003_wraparounds.pl',
],
},
}

View File

@ -0,0 +1,132 @@
# Copyright (c) 2023, PostgreSQL Global Development Group
# Test wraparound emergency autovacuum.
use strict;
use warnings;
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
if ($ENV{PG_TEST_EXTRA} !~ /\bxid_wraparound\b/)
{
plan skip_all => "test xid_wraparound not enabled in PG_TEST_EXTRA";
}
# Initialize node
my $node = PostgreSQL::Test::Cluster->new('main');
$node->init;
$node->append_conf(
'postgresql.conf', qq[
autovacuum = off # run autovacuum only when to anti wraparound
autovacuum_naptime = 1s
# so it's easier to verify the order of operations
autovacuum_max_workers = 1
log_autovacuum_min_duration = 0
]);
$node->start;
$node->safe_psql('postgres', 'CREATE EXTENSION xid_wraparound');
# Create tables for a few different test scenarios
$node->safe_psql(
'postgres', qq[
CREATE TABLE large(id serial primary key, data text, filler text default repeat(random()::text, 10));
INSERT INTO large(data) SELECT generate_series(1,30000);
CREATE TABLE large_trunc(id serial primary key, data text, filler text default repeat(random()::text, 10));
INSERT INTO large_trunc(data) SELECT generate_series(1,30000);
CREATE TABLE small(id serial primary key, data text, filler text default repeat(random()::text, 10));
INSERT INTO small(data) SELECT generate_series(1,15000);
CREATE TABLE small_trunc(id serial primary key, data text, filler text default repeat(random()::text, 10));
INSERT INTO small_trunc(data) SELECT generate_series(1,15000);
CREATE TABLE autovacuum_disabled(id serial primary key, data text) WITH (autovacuum_enabled=false);
INSERT INTO autovacuum_disabled(data) SELECT generate_series(1,1000);
]);
# Bump the query timeout to avoid false negatives on slow test systems.
my $psql_timeout_secs = 4 * $PostgreSQL::Test::Utils::timeout_default;
# Start a background session, which holds a transaction open, preventing
# autovacuum from advancing relfrozenxid and datfrozenxid.
my $background_psql = $node->background_psql(
'postgres',
on_error_stop => 0,
timeout => $psql_timeout_secs);
$background_psql->set_query_timer_restart();
$background_psql->query_safe(
qq[
BEGIN;
DELETE FROM large WHERE id % 2 = 0;
DELETE FROM large_trunc WHERE id > 10000;
DELETE FROM small WHERE id % 2 = 0;
DELETE FROM small_trunc WHERE id > 1000;
DELETE FROM autovacuum_disabled WHERE id % 2 = 0;
]);
# Consume 2 billion XIDs, to get us very close to wraparound
$node->safe_psql('postgres',
qq[SELECT consume_xids_until('2000000000'::xid8)]);
# Make sure the latest completed XID is advanced
$node->safe_psql('postgres', qq[INSERT INTO small(data) SELECT 1]);
# Check that all databases became old enough to trigger failsafe.
my $ret = $node->safe_psql(
'postgres',
qq[
SELECT datname,
age(datfrozenxid) > current_setting('vacuum_failsafe_age')::int as old
FROM pg_database ORDER BY 1
]);
is( $ret, "postgres|t
template0|t
template1|t", "all tables became old");
my $log_offset = -s $node->logfile;
# Finish the old transaction, to allow vacuum freezing to advance
# relfrozenxid and datfrozenxid again.
$background_psql->query_safe(qq[COMMIT]);
$background_psql->quit;
# Wait until autovacuum processed all tables and advanced the
# system-wide oldest-XID.
$node->poll_query_until(
'postgres', qq[
SELECT NOT EXISTS (
SELECT *
FROM pg_database
WHERE age(datfrozenxid) > current_setting('autovacuum_freeze_max_age')::int)
]) or die "timeout waiting for all databases to be vacuumed";
# Check if these tables are vacuumed.
$ret = $node->safe_psql(
'postgres', qq[
SELECT relname, age(relfrozenxid) > current_setting('autovacuum_freeze_max_age')::int
FROM pg_class
WHERE relname IN ('large', 'large_trunc', 'small', 'small_trunc', 'autovacuum_disabled')
ORDER BY 1
]);
is( $ret, "autovacuum_disabled|f
large|f
large_trunc|f
small|f
small_trunc|f", "all tables are vacuumed");
# Check if vacuum failsafe was triggered for each table.
my $log_contents = slurp_file($node->logfile, $log_offset);
foreach my $tablename ('large', 'large_trunc', 'small', 'small_trunc',
'autovacuum_disabled')
{
like(
$log_contents,
qr/bypassing nonessential maintenance of table "postgres.public.$tablename" as a failsafe after \d+ index scans/,
"failsafe vacuum triggered for $tablename");
}
$node->stop;
done_testing();

View File

@ -0,0 +1,138 @@
# Copyright (c) 2023, PostgreSQL Global Development Group
#
# Test XID wraparound limits.
#
# When you get close to XID wraparound, you start to get warnings, and
# when you get even closer, the system refuses to assign any more XIDs
# until the oldest databases have been vacuumed and datfrozenxid has
# been advanced.
use strict;
use warnings;
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
use Time::HiRes qw(usleep);
if ($ENV{PG_TEST_EXTRA} !~ /\bxid_wraparound\b/)
{
plan skip_all => "test xid_wraparound not enabled in PG_TEST_EXTRA";
}
my $ret;
# Initialize node
my $node = PostgreSQL::Test::Cluster->new('wraparound');
$node->init;
$node->append_conf(
'postgresql.conf', qq[
autovacuum = off # run autovacuum only to prevent wraparound
autovacuum_naptime = 1s
log_autovacuum_min_duration = 0
]);
$node->start;
$node->safe_psql('postgres', 'CREATE EXTENSION xid_wraparound');
# Create a test table
$node->safe_psql(
'postgres', qq[
CREATE TABLE wraparoundtest(t text);
INSERT INTO wraparoundtest VALUES ('start');
]);
# Bump the query timeout to avoid false negatives on slow test systems.
my $psql_timeout_secs = 4 * $PostgreSQL::Test::Utils::timeout_default;
# Start a background session, which holds a transaction open, preventing
# autovacuum from advancing relfrozenxid and datfrozenxid.
my $background_psql = $node->background_psql(
'postgres',
on_error_stop => 0,
timeout => $psql_timeout_secs);
$background_psql->query_safe(
qq[
BEGIN;
INSERT INTO wraparoundtest VALUES ('oldxact');
]);
# Consume 2 billion transactions, to get close to wraparound
$node->safe_psql('postgres', qq[SELECT consume_xids(1000000000)]);
$node->safe_psql('postgres',
qq[INSERT INTO wraparoundtest VALUES ('after 1 billion')]);
$node->safe_psql('postgres', qq[SELECT consume_xids(1000000000)]);
$node->safe_psql('postgres',
qq[INSERT INTO wraparoundtest VALUES ('after 2 billion')]);
# We are now just under 150 million XIDs away from wraparound.
# Continue consuming XIDs, in batches of 10 million, until we get
# the warning:
#
# WARNING: database "postgres" must be vacuumed within 3000024 transactions
# HINT: To avoid a database shutdown, execute a database-wide VACUUM in that database.
# You might also need to commit or roll back old prepared transactions, or drop stale replication slots.
my $stderr;
my $warn_limit = 0;
for my $i (1 .. 15)
{
$node->psql(
'postgres', qq[SELECT consume_xids(10000000)],
stderr => \$stderr,
on_error_die => 1);
if ($stderr =~
/WARNING: database "postgres" must be vacuumed within [0-9]+ transactions/
)
{
# Reached the warn-limit
$warn_limit = 1;
last;
}
}
ok($warn_limit == 1, "warn-limit reached");
# We can still INSERT, despite the warnings.
$node->safe_psql('postgres',
qq[INSERT INTO wraparoundtest VALUES ('reached warn-limit')]);
# Keep going. We'll hit the hard "stop" limit.
$ret = $node->psql(
'postgres',
qq[SELECT consume_xids(100000000)],
stderr => \$stderr);
like(
$stderr,
qr/ERROR: database is not accepting commands that assign new XIDs to avoid wraparound data loss in database "postgres"/,
"stop-limit");
# Finish the old transaction, to allow vacuum freezing to advance
# relfrozenxid and datfrozenxid again.
$background_psql->query_safe(qq[COMMIT]);
$background_psql->quit;
# VACUUM, to freeze the tables and advance datfrozenxid.
#
# Autovacuum does this for the other databases, and would do it for
# 'postgres' too, but let's test manual VACUUM.
#
$node->safe_psql('postgres', 'VACUUM');
# Wait until autovacuum has processed the other databases and advanced
# the system-wide oldest-XID.
$ret =
$node->poll_query_until('postgres',
qq[INSERT INTO wraparoundtest VALUES ('after VACUUM')],
'INSERT 0 1');
# Check the table contents
$ret = $node->safe_psql('postgres', qq[SELECT * from wraparoundtest]);
is( $ret, "start
oldxact
after 1 billion
after 2 billion
reached warn-limit
after VACUUM");
$node->stop;
done_testing();

View File

@ -0,0 +1,60 @@
# Copyright (c) 2023, PostgreSQL Global Development Group
#
# Consume a lot of XIDs, wrapping around a few times.
#
use strict;
use warnings;
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
use Time::HiRes qw(usleep);
if ($ENV{PG_TEST_EXTRA} !~ /\bxid_wraparound\b/)
{
plan skip_all => "test xid_wraparound not enabled in PG_TEST_EXTRA";
}
# Initialize node
my $node = PostgreSQL::Test::Cluster->new('wraparound');
$node->init;
$node->append_conf(
'postgresql.conf', qq[
autovacuum = off # run autovacuum only when to anti wraparound
autovacuum_naptime = 1s
# so it's easier to verify the order of operations
autovacuum_max_workers = 1
log_autovacuum_min_duration = 0
]);
$node->start;
$node->safe_psql('postgres', 'CREATE EXTENSION xid_wraparound');
# Create a test table
$node->safe_psql(
'postgres', qq[
CREATE TABLE wraparoundtest(t text);
INSERT INTO wraparoundtest VALUES ('beginning');
]);
# Bump the query timeout to avoid false negatives on slow test systems.
my $psql_timeout_secs = 4 * $PostgreSQL::Test::Utils::timeout_default;
# Burn through 10 billion transactions in total, in batches of 100 million.
my $ret;
for my $i (1 .. 100)
{
$ret = $node->safe_psql(
'postgres',
qq[SELECT consume_xids(100000000)],
timeout => $psql_timeout_secs);
$ret = $node->safe_psql('postgres',
qq[INSERT INTO wraparoundtest VALUES ('after $i batches')]);
}
$ret = $node->safe_psql('postgres', qq[SELECT COUNT(*) FROM wraparoundtest]);
is($ret, "101");
$node->stop;
done_testing();

View File

@ -0,0 +1,12 @@
/* src/test/modules/xid_wraparound/xid_wraparound--1.0.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION xid_wraparound" to load this file. \quit
CREATE FUNCTION consume_xids(nxids bigint)
RETURNS xid8 IMMUTABLE PARALLEL SAFE STRICT
AS 'MODULE_PATHNAME' LANGUAGE C;
CREATE FUNCTION consume_xids_until(targetxid xid8)
RETURNS xid8 IMMUTABLE PARALLEL SAFE STRICT
AS 'MODULE_PATHNAME' LANGUAGE C;

View File

@ -0,0 +1,219 @@
/*--------------------------------------------------------------------------
*
* xid_wraparound.c
* Utilities for testing XID wraparound
*
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/test/modules/xid_wraparound/xid_wraparound.c
*
* -------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/xact.h"
#include "miscadmin.h"
#include "storage/proc.h"
#include "utils/xid8.h"
PG_MODULE_MAGIC;
static int64 consume_xids_shortcut(void);
static FullTransactionId consume_xids_common(FullTransactionId untilxid, uint64 nxids);
/*
* Consume the specified number of XIDs.
*/
PG_FUNCTION_INFO_V1(consume_xids);
Datum
consume_xids(PG_FUNCTION_ARGS)
{
int64 nxids = PG_GETARG_INT64(0);
FullTransactionId lastxid;
if (nxids < 0)
elog(ERROR, "invalid nxids argument: %lld", (long long) nxids);
if (nxids == 0)
lastxid = ReadNextFullTransactionId();
else
lastxid = consume_xids_common(InvalidFullTransactionId, (uint64) nxids);
PG_RETURN_FULLTRANSACTIONID(lastxid);
}
/*
* Consume XIDs, up to the given XID.
*/
PG_FUNCTION_INFO_V1(consume_xids_until);
Datum
consume_xids_until(PG_FUNCTION_ARGS)
{
FullTransactionId targetxid = PG_GETARG_FULLTRANSACTIONID(0);
FullTransactionId lastxid;
if (!FullTransactionIdIsNormal(targetxid))
elog(ERROR, "targetxid %llu is not normal",
(unsigned long long) U64FromFullTransactionId(targetxid));
lastxid = consume_xids_common(targetxid, 0);
PG_RETURN_FULLTRANSACTIONID(lastxid);
}
/*
* Common functionality between the two public functions.
*/
static FullTransactionId
consume_xids_common(FullTransactionId untilxid, uint64 nxids)
{
FullTransactionId lastxid;
uint64 last_reported_at = 0;
uint64 consumed = 0;
/* Print a NOTICE every REPORT_INTERVAL xids */
#define REPORT_INTERVAL (10 * 1000000)
/* initialize 'lastxid' with the system's current next XID */
lastxid = ReadNextFullTransactionId();
/*
* We consume XIDs by calling GetNewTransactionId(true), which marks the
* consumed XIDs as subtransactions of the current top-level transaction.
* For that to work, this transaction must have a top-level XID.
*
* GetNewTransactionId registers them in the subxid cache in PGPROC, until
* the cache overflows, but beyond that, we don't keep track of the
* consumed XIDs.
*/
(void) GetTopTransactionId();
for (;;)
{
uint64 xids_left;
CHECK_FOR_INTERRUPTS();
/* How many XIDs do we have left to consume? */
if (nxids > 0)
{
if (consumed >= nxids)
break;
xids_left = nxids - consumed;
}
else
{
if (FullTransactionIdFollowsOrEquals(lastxid, untilxid))
break;
xids_left = U64FromFullTransactionId(untilxid) - U64FromFullTransactionId(lastxid);
}
/*
* If we still have plenty of XIDs to consume, try to take a shortcut
* and bump up the nextXid counter directly.
*/
if (xids_left > 2000 &&
consumed - last_reported_at < REPORT_INTERVAL &&
MyProc->subxidStatus.overflowed)
{
int64 consumed_by_shortcut = consume_xids_shortcut();
if (consumed_by_shortcut > 0)
{
consumed += consumed_by_shortcut;
continue;
}
}
/* Slow path: Call GetNewTransactionId to allocate a new XID. */
lastxid = GetNewTransactionId(true);
consumed++;
/* Report progress */
if (consumed - last_reported_at >= REPORT_INTERVAL)
{
if (nxids > 0)
elog(NOTICE, "consumed %llu / %llu XIDs, latest %u:%u",
(unsigned long long) consumed, (unsigned long long) nxids,
EpochFromFullTransactionId(lastxid),
XidFromFullTransactionId(lastxid));
else
elog(NOTICE, "consumed up to %u:%u / %u:%u",
EpochFromFullTransactionId(lastxid),
XidFromFullTransactionId(lastxid),
EpochFromFullTransactionId(untilxid),
XidFromFullTransactionId(untilxid));
last_reported_at = consumed;
}
}
return lastxid;
}
/*
* These constants copied from .c files, because they're private.
*/
#define COMMIT_TS_XACTS_PER_PAGE (BLCKSZ / 10)
#define SUBTRANS_XACTS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
#define CLOG_XACTS_PER_BYTE 4
#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
/*
* All the interesting action in GetNewTransactionId happens when we extend
* the SLRUs, or at the uint32 wraparound. If the nextXid counter is not close
* to any of those interesting values, take a shortcut and bump nextXID
* directly, close to the next "interesting" value.
*/
static inline uint32
XidSkip(FullTransactionId fullxid)
{
uint32 low = XidFromFullTransactionId(fullxid);
uint32 rem;
uint32 distance;
if (low < 5 || low >= UINT32_MAX - 5)
return 0;
distance = UINT32_MAX - 5 - low;
rem = low % COMMIT_TS_XACTS_PER_PAGE;
if (rem == 0)
return 0;
distance = Min(distance, COMMIT_TS_XACTS_PER_PAGE - rem);
rem = low % SUBTRANS_XACTS_PER_PAGE;
if (rem == 0)
return 0;
distance = Min(distance, SUBTRANS_XACTS_PER_PAGE - rem);
rem = low % CLOG_XACTS_PER_PAGE;
if (rem == 0)
return 0;
distance = Min(distance, CLOG_XACTS_PER_PAGE - rem);
return distance;
}
static int64
consume_xids_shortcut(void)
{
FullTransactionId nextXid;
uint32 consumed;
LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
nextXid = ShmemVariableCache->nextXid;
/*
* Go slow near the "interesting values". The interesting zones include 5
* transactions before and after SLRU page switches.
*/
consumed = XidSkip(nextXid);
if (consumed > 0)
ShmemVariableCache->nextXid.value += (uint64) consumed;
LWLockRelease(XidGenLock);
return consumed;
}

View File

@ -0,0 +1,4 @@
comment = 'Tests for XID wraparound'
default_version = '1.0'
module_pathname = '$libdir/xid_wraparound'
relocatable = true