768 lines
27 KiB
Perl
768 lines
27 KiB
Perl
|
|
# Copyright (c) 2021-2023, PostgreSQL Global Development Group
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use PostgreSQL::Test::Cluster;
|
|
use PostgreSQL::Test::Utils;
|
|
|
|
use Test::More;
|
|
|
|
# This regression test demonstrates that the pg_amcheck binary correctly
|
|
# identifies specific kinds of corruption within pages. To test this, we need
|
|
# a mechanism to create corrupt pages with predictable, repeatable corruption.
|
|
# The postgres backend cannot be expected to help us with this, as its design
|
|
# is not consistent with the goal of intentionally corrupting pages.
|
|
#
|
|
# Instead, we create a table to corrupt, and with careful consideration of how
|
|
# postgresql lays out heap pages, we seek to offsets within the page and
|
|
# overwrite deliberately chosen bytes with specific values calculated to
|
|
# corrupt the page in expected ways. We then verify that pg_amcheck reports
|
|
# the corruption, and that it runs without crashing. Note that the backend
|
|
# cannot simply be started to run queries against the corrupt table, as the
|
|
# backend will crash, at least for some of the corruption types we generate.
|
|
#
|
|
# Autovacuum potentially touching the table in the background makes the exact
|
|
# behavior of this test harder to reason about. We turn it off to keep things
|
|
# simpler. We use a "belt and suspenders" approach, turning it off for the
|
|
# system generally in postgresql.conf, and turning it off specifically for the
|
|
# test table.
|
|
#
|
|
# This test depends on the table being written to the heap file exactly as we
|
|
# expect it to be, so we take care to arrange the columns of the table, and
|
|
# insert rows of the table, that give predictable sizes and locations within
|
|
# the table page.
|
|
#
|
|
# The HeapTupleHeaderData has 23 bytes of fixed size fields before the variable
|
|
# length t_bits[] array. We have exactly 3 columns in the table, so natts = 3,
|
|
# t_bits is 1 byte long, and t_hoff = MAXALIGN(23 + 1) = 24.
|
|
#
|
|
# We're not too fussy about which datatypes we use for the test, but we do care
|
|
# about some specific properties. We'd like to test both fixed size and
|
|
# varlena types. We'd like some varlena data inline and some toasted. And
|
|
# we'd like the layout of the table such that the datums land at predictable
|
|
# offsets within the tuple. We choose a structure without padding on all
|
|
# supported architectures:
|
|
#
|
|
# a BIGINT
|
|
# b TEXT
|
|
# c TEXT
|
|
#
|
|
# We always insert a 7-ascii character string into field 'b', which with a
|
|
# 1-byte varlena header gives an 8 byte inline value. We always insert a long
|
|
# text string in field 'c', long enough to force toast storage.
|
|
#
|
|
# We choose to read and write binary copies of our table's tuples, using perl's
|
|
# pack() and unpack() functions. Perl uses a packing code system in which:
|
|
#
|
|
# l = "signed 32-bit Long",
|
|
# L = "Unsigned 32-bit Long",
|
|
# S = "Unsigned 16-bit Short",
|
|
# C = "Unsigned 8-bit Octet",
|
|
#
|
|
# Each tuple in our table has a layout as follows:
|
|
#
|
|
# xx xx xx xx t_xmin: xxxx offset = 0 L
|
|
# xx xx xx xx t_xmax: xxxx offset = 4 L
|
|
# xx xx xx xx t_field3: xxxx offset = 8 L
|
|
# xx xx bi_hi: xx offset = 12 S
|
|
# xx xx bi_lo: xx offset = 14 S
|
|
# xx xx ip_posid: xx offset = 16 S
|
|
# xx xx t_infomask2: xx offset = 18 S
|
|
# xx xx t_infomask: xx offset = 20 S
|
|
# xx t_hoff: x offset = 22 C
|
|
# xx t_bits: x offset = 23 C
|
|
# xx xx xx xx xx xx xx xx 'a': xxxxxxxx offset = 24 LL
|
|
# xx xx xx xx xx xx xx xx 'b': xxxxxxxx offset = 32 CCCCCCCC
|
|
# xx xx xx xx xx xx xx xx 'c': xxxxxxxx offset = 40 CCllLL
|
|
# xx xx xx xx xx xx xx xx : xxxxxxxx ...continued
|
|
# xx xx : xx ...continued
|
|
#
|
|
# We could choose to read and write columns 'b' and 'c' in other ways, but
|
|
# it is convenient enough to do it this way. We define packing code
|
|
# constants here, where they can be compared easily against the layout.
|
|
|
|
use constant HEAPTUPLE_PACK_CODE => 'LLLSSSSSCCLLCCCCCCCCCCllLL';
|
|
use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size
|
|
|
|
# Read a tuple of our table from a heap page.
|
|
#
|
|
# Takes an open filehandle to the heap file, and the offset of the tuple.
|
|
#
|
|
# Rather than returning the binary data from the file, unpacks the data into a
|
|
# perl hash with named fields. These fields exactly match the ones understood
|
|
# by write_tuple(), below. Returns a reference to this hash.
|
|
#
|
|
sub read_tuple
|
|
{
|
|
my ($fh, $offset) = @_;
|
|
my ($buffer, %tup);
|
|
sysseek($fh, $offset, 0)
|
|
or BAIL_OUT("sysseek failed: $!");
|
|
defined(sysread($fh, $buffer, HEAPTUPLE_PACK_LENGTH))
|
|
or BAIL_OUT("sysread failed: $!");
|
|
|
|
@_ = unpack(HEAPTUPLE_PACK_CODE, $buffer);
|
|
%tup = (
|
|
t_xmin => shift,
|
|
t_xmax => shift,
|
|
t_field3 => shift,
|
|
bi_hi => shift,
|
|
bi_lo => shift,
|
|
ip_posid => shift,
|
|
t_infomask2 => shift,
|
|
t_infomask => shift,
|
|
t_hoff => shift,
|
|
t_bits => shift,
|
|
a_1 => shift,
|
|
a_2 => shift,
|
|
b_header => shift,
|
|
b_body1 => shift,
|
|
b_body2 => shift,
|
|
b_body3 => shift,
|
|
b_body4 => shift,
|
|
b_body5 => shift,
|
|
b_body6 => shift,
|
|
b_body7 => shift,
|
|
c_va_header => shift,
|
|
c_va_vartag => shift,
|
|
c_va_rawsize => shift,
|
|
c_va_extinfo => shift,
|
|
c_va_valueid => shift,
|
|
c_va_toastrelid => shift);
|
|
# Stitch together the text for column 'b'
|
|
$tup{b} = join('', map { chr($tup{"b_body$_"}) } (1 .. 7));
|
|
return \%tup;
|
|
}
|
|
|
|
# Write a tuple of our table to a heap page.
|
|
#
|
|
# Takes an open filehandle to the heap file, the offset of the tuple, and a
|
|
# reference to a hash with the tuple values, as returned by read_tuple().
|
|
# Writes the tuple fields from the hash into the heap file.
|
|
#
|
|
# The purpose of this function is to write a tuple back to disk with some
|
|
# subset of fields modified. The function does no error checking. Use
|
|
# cautiously.
|
|
#
|
|
sub write_tuple
|
|
{
|
|
my ($fh, $offset, $tup) = @_;
|
|
my $buffer = pack(
|
|
HEAPTUPLE_PACK_CODE,
|
|
$tup->{t_xmin}, $tup->{t_xmax},
|
|
$tup->{t_field3}, $tup->{bi_hi},
|
|
$tup->{bi_lo}, $tup->{ip_posid},
|
|
$tup->{t_infomask2}, $tup->{t_infomask},
|
|
$tup->{t_hoff}, $tup->{t_bits},
|
|
$tup->{a_1}, $tup->{a_2},
|
|
$tup->{b_header}, $tup->{b_body1},
|
|
$tup->{b_body2}, $tup->{b_body3},
|
|
$tup->{b_body4}, $tup->{b_body5},
|
|
$tup->{b_body6}, $tup->{b_body7},
|
|
$tup->{c_va_header}, $tup->{c_va_vartag},
|
|
$tup->{c_va_rawsize}, $tup->{c_va_extinfo},
|
|
$tup->{c_va_valueid}, $tup->{c_va_toastrelid});
|
|
sysseek($fh, $offset, 0)
|
|
or BAIL_OUT("sysseek failed: $!");
|
|
defined(syswrite($fh, $buffer, HEAPTUPLE_PACK_LENGTH))
|
|
or BAIL_OUT("syswrite failed: $!");
|
|
return;
|
|
}
|
|
|
|
# Set umask so test directories and files are created with default permissions
|
|
umask(0077);
|
|
|
|
my $pred_xmax;
|
|
my $pred_posid;
|
|
my $aborted_xid;
|
|
# Set up the node. Once we create and corrupt the table,
|
|
# autovacuum workers visiting the table could crash the backend.
|
|
# Disable autovacuum so that won't happen.
|
|
my $node = PostgreSQL::Test::Cluster->new('test');
|
|
$node->init;
|
|
$node->append_conf('postgresql.conf', 'autovacuum=off');
|
|
$node->append_conf('postgresql.conf', 'max_prepared_transactions=10');
|
|
|
|
# Start the node and load the extensions. We depend on both
|
|
# amcheck and pageinspect for this test.
|
|
$node->start;
|
|
my $port = $node->port;
|
|
my $pgdata = $node->data_dir;
|
|
$node->safe_psql('postgres', "CREATE EXTENSION amcheck");
|
|
$node->safe_psql('postgres', "CREATE EXTENSION pageinspect");
|
|
|
|
# Get a non-zero datfrozenxid
|
|
$node->safe_psql('postgres', qq(VACUUM FREEZE));
|
|
|
|
# Create the test table with precisely the schema that our corruption function
|
|
# expects.
|
|
$node->safe_psql(
|
|
'postgres', qq(
|
|
CREATE TABLE public.test (a BIGINT, b TEXT, c TEXT);
|
|
ALTER TABLE public.test SET (autovacuum_enabled=false);
|
|
ALTER TABLE public.test ALTER COLUMN c SET STORAGE EXTERNAL;
|
|
CREATE INDEX test_idx ON public.test(a, b);
|
|
));
|
|
|
|
# We want (0 < datfrozenxid < test.relfrozenxid). To achieve this, we freeze
|
|
# an otherwise unused table, public.junk, prior to inserting data and freezing
|
|
# public.test
|
|
$node->safe_psql(
|
|
'postgres', qq(
|
|
CREATE TABLE public.junk AS SELECT 'junk'::TEXT AS junk_column;
|
|
ALTER TABLE public.junk SET (autovacuum_enabled=false);
|
|
VACUUM FREEZE public.junk
|
|
));
|
|
|
|
my $rel = $node->safe_psql('postgres',
|
|
qq(SELECT pg_relation_filepath('public.test')));
|
|
my $relpath = "$pgdata/$rel";
|
|
|
|
# Initial setup for the public.test table.
|
|
# $ROWCOUNT is the total number of rows that we expect to insert into the page.
|
|
# $ROWCOUNT_BASIC is the number of those rows that are related to basic
|
|
# tuple validation, rather than update chain validation.
|
|
my $ROWCOUNT = 44;
|
|
my $ROWCOUNT_BASIC = 16;
|
|
|
|
# First insert data needed for tests unrelated to update chain validation.
|
|
# Then freeze the page. These tuples are at offset numbers 1 to 16.
|
|
$node->safe_psql(
|
|
'postgres', qq(
|
|
INSERT INTO public.test (a, b, c)
|
|
SELECT
|
|
x'DEADF9F9DEADF9F9'::bigint,
|
|
'abcdefg',
|
|
repeat('w', 10000)
|
|
FROM generate_series(1, $ROWCOUNT_BASIC);
|
|
VACUUM FREEZE public.test;)
|
|
);
|
|
|
|
# Create some simple HOT update chains for line pointer validation. After
|
|
# the page is HOT pruned, we'll have two redirects line pointers each pointing
|
|
# to a tuple. We'll then change the second redirect to point to the same
|
|
# tuple as the first one and verify that we can detect corruption.
|
|
$node->safe_psql(
|
|
'postgres', qq(
|
|
INSERT INTO public.test (a, b, c)
|
|
VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg',
|
|
generate_series(1,2)); -- offset numbers 17 and 18
|
|
UPDATE public.test SET c = 'a' WHERE c = '1'; -- offset number 19
|
|
UPDATE public.test SET c = 'a' WHERE c = '2'; -- offset number 20
|
|
));
|
|
|
|
# Create some more HOT update chains.
|
|
$node->safe_psql(
|
|
'postgres', qq(
|
|
INSERT INTO public.test (a, b, c)
|
|
VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg',
|
|
generate_series(3,6)); -- offset numbers 21 through 24
|
|
UPDATE public.test SET c = 'a' WHERE c = '3'; -- offset number 25
|
|
UPDATE public.test SET c = 'a' WHERE c = '4'; -- offset number 26
|
|
));
|
|
|
|
# Negative test case of HOT-pruning with aborted tuple.
|
|
$node->safe_psql(
|
|
'postgres', qq(
|
|
BEGIN;
|
|
UPDATE public.test SET c = 'a' WHERE c = '5'; -- offset number 27
|
|
ABORT;
|
|
VACUUM FREEZE public.test;
|
|
));
|
|
|
|
# Next update on any tuple will be stored at the same place of tuple inserted
|
|
# by aborted transaction. This should not cause the table to appear corrupt.
|
|
$node->safe_psql(
|
|
'postgres', qq(
|
|
UPDATE public.test SET c = 'a' WHERE c = '6'; -- offset number 27 again
|
|
VACUUM FREEZE public.test;
|
|
));
|
|
|
|
# Data for HOT chain validation, so not calling VACUUM FREEZE.
|
|
$node->safe_psql(
|
|
'postgres', qq(
|
|
INSERT INTO public.test (a, b, c)
|
|
VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg',
|
|
generate_series(7,15)); -- offset numbers 28 to 36
|
|
UPDATE public.test SET c = 'a' WHERE c = '7'; -- offset number 37
|
|
UPDATE public.test SET c = 'a' WHERE c = '10'; -- offset number 38
|
|
UPDATE public.test SET c = 'a' WHERE c = '11'; -- offset number 39
|
|
UPDATE public.test SET c = 'a' WHERE c = '12'; -- offset number 40
|
|
UPDATE public.test SET c = 'a' WHERE c = '13'; -- offset number 41
|
|
UPDATE public.test SET c = 'a' WHERE c = '14'; -- offset number 42
|
|
UPDATE public.test SET c = 'a' WHERE c = '15'; -- offset number 43
|
|
));
|
|
|
|
# Need one aborted transaction to test corruption in HOT chains.
|
|
$node->safe_psql(
|
|
'postgres', qq(
|
|
BEGIN;
|
|
UPDATE public.test SET c = 'a' WHERE c = '9'; -- offset number 44
|
|
ABORT;
|
|
));
|
|
|
|
# Need one in-progress transaction to test few corruption in HOT chains.
|
|
# We are creating PREPARE TRANSACTION here as these will not be aborted
|
|
# even if we stop the node.
|
|
$node->safe_psql(
|
|
'postgres', qq(
|
|
BEGIN;
|
|
PREPARE TRANSACTION 'in_progress_tx';
|
|
));
|
|
my $in_progress_xid = $node->safe_psql(
|
|
'postgres', qq(
|
|
SELECT transaction FROM pg_prepared_xacts;
|
|
));
|
|
|
|
my $relfrozenxid = $node->safe_psql('postgres',
|
|
q(select relfrozenxid from pg_class where relname = 'test'));
|
|
my $datfrozenxid = $node->safe_psql('postgres',
|
|
q(select datfrozenxid from pg_database where datname = 'postgres'));
|
|
|
|
# Sanity check that our 'test' table has a relfrozenxid newer than the
|
|
# datfrozenxid for the database, and that the datfrozenxid is greater than the
|
|
# first normal xid. We rely on these invariants in some of our tests.
|
|
if ($datfrozenxid <= 3 || $datfrozenxid >= $relfrozenxid)
|
|
{
|
|
$node->clean_node;
|
|
plan skip_all =>
|
|
"Xid thresholds not as expected: got datfrozenxid = $datfrozenxid, relfrozenxid = $relfrozenxid";
|
|
exit;
|
|
}
|
|
|
|
# Find where each of the tuples is located on the page. If a particular
|
|
# line pointer is a redirect rather than a tuple, we record the offset as -1.
|
|
my @lp_off = split '\n', $node->safe_psql(
|
|
'postgres', qq(
|
|
SELECT CASE WHEN lp_flags = 2 THEN -1 ELSE lp_off END
|
|
FROM heap_page_items(get_raw_page('test', 'main', 0))
|
|
)
|
|
);
|
|
is(scalar @lp_off, $ROWCOUNT, "acquired row offsets");
|
|
|
|
# Sanity check that our 'test' table on disk layout matches expectations. If
|
|
# this is not so, we will have to skip the test until somebody updates the test
|
|
# to work on this platform.
|
|
$node->stop;
|
|
my $file;
|
|
open($file, '+<', $relpath)
|
|
or BAIL_OUT("open failed: $!");
|
|
binmode $file;
|
|
|
|
my $ENDIANNESS;
|
|
for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
|
|
{
|
|
my $offnum = $tupidx + 1; # offnum is 1-based, not zero-based
|
|
my $offset = $lp_off[$tupidx];
|
|
next if $offset == -1; # ignore redirect line pointers
|
|
my $tup = read_tuple($file, $offset);
|
|
|
|
# Sanity-check that the data appears on the page where we expect.
|
|
my $a_1 = $tup->{a_1};
|
|
my $a_2 = $tup->{a_2};
|
|
my $b = $tup->{b};
|
|
if ($a_1 != 0xDEADF9F9 || $a_2 != 0xDEADF9F9 || $b ne 'abcdefg')
|
|
{
|
|
close($file); # ignore errors on close; we're exiting anyway
|
|
$node->clean_node;
|
|
plan skip_all =>
|
|
sprintf(
|
|
"Page layout of index %d differs from our expectations: expected (%x, %x, \"%s\"), got (%x, %x, \"%s\")", $tupidx,
|
|
0xDEADF9F9, 0xDEADF9F9, "abcdefg", $a_1, $a_2, $b);
|
|
exit;
|
|
}
|
|
|
|
# Determine endianness of current platform from the 1-byte varlena header
|
|
$ENDIANNESS = $tup->{b_header} == 0x11 ? "little" : "big";
|
|
}
|
|
close($file)
|
|
or BAIL_OUT("close failed: $!");
|
|
$node->start;
|
|
|
|
# Ok, Xids and page layout look ok. We can run corruption tests.
|
|
|
|
# Check that pg_amcheck runs against the uncorrupted table without error.
|
|
$node->command_ok(
|
|
[ 'pg_amcheck', '-p', $port, 'postgres' ],
|
|
'pg_amcheck test table, prior to corruption');
|
|
|
|
# Check that pg_amcheck runs against the uncorrupted table and index without error.
|
|
$node->command_ok([ 'pg_amcheck', '-p', $port, 'postgres' ],
|
|
'pg_amcheck test table and index, prior to corruption');
|
|
|
|
$node->stop;
|
|
|
|
# Some #define constants from access/htup_details.h for use while corrupting.
|
|
use constant HEAP_HASNULL => 0x0001;
|
|
use constant HEAP_XMAX_LOCK_ONLY => 0x0080;
|
|
use constant HEAP_XMIN_COMMITTED => 0x0100;
|
|
use constant HEAP_XMIN_INVALID => 0x0200;
|
|
use constant HEAP_XMAX_COMMITTED => 0x0400;
|
|
use constant HEAP_XMAX_INVALID => 0x0800;
|
|
use constant HEAP_NATTS_MASK => 0x07FF;
|
|
use constant HEAP_XMAX_IS_MULTI => 0x1000;
|
|
use constant HEAP_KEYS_UPDATED => 0x2000;
|
|
use constant HEAP_HOT_UPDATED => 0x4000;
|
|
use constant HEAP_ONLY_TUPLE => 0x8000;
|
|
use constant HEAP_UPDATED => 0x2000;
|
|
|
|
# Helper function to generate a regular expression matching the header we
|
|
# expect verify_heapam() to return given which fields we expect to be non-null.
|
|
sub header
|
|
{
|
|
my ($blkno, $offnum, $attnum) = @_;
|
|
return
|
|
qr/heap table "postgres\.public\.test", block $blkno, offset $offnum, attribute $attnum:\s+/ms
|
|
if (defined $attnum);
|
|
return
|
|
qr/heap table "postgres\.public\.test", block $blkno, offset $offnum:\s+/ms
|
|
if (defined $offnum);
|
|
return qr/heap table "postgres\.public\.test", block $blkno:\s+/ms
|
|
if (defined $blkno);
|
|
return qr/heap table "postgres\.public\.test":\s+/ms;
|
|
}
|
|
|
|
# Corrupt the tuples, one type of corruption per tuple. Some types of
|
|
# corruption cause verify_heapam to skip to the next tuple without
|
|
# performing any remaining checks, so we can't exercise the system properly if
|
|
# we focus all our corruption on a single tuple.
|
|
#
|
|
my @expected;
|
|
open($file, '+<', $relpath)
|
|
or BAIL_OUT("open failed: $!");
|
|
binmode $file;
|
|
|
|
for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
|
|
{
|
|
my $offnum = $tupidx + 1; # offnum is 1-based, not zero-based
|
|
my $offset = $lp_off[$tupidx];
|
|
my $header = header(0, $offnum, undef);
|
|
|
|
# Read tuple, if there is one.
|
|
my $tup = $offset == -1 ? undef : read_tuple($file, $offset);
|
|
|
|
if ($offnum == 1)
|
|
{
|
|
# Corruptly set xmin < relfrozenxid
|
|
my $xmin = $relfrozenxid - 1;
|
|
$tup->{t_xmin} = $xmin;
|
|
$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
|
|
$tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
|
|
|
|
# Expected corruption report
|
|
push @expected,
|
|
qr/${header}xmin $xmin precedes relation freeze threshold 0:\d+/;
|
|
}
|
|
elsif ($offnum == 2)
|
|
{
|
|
# Corruptly set xmin < datfrozenxid
|
|
my $xmin = 3;
|
|
$tup->{t_xmin} = $xmin;
|
|
$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
|
|
$tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
|
|
|
|
push @expected,
|
|
qr/${$header}xmin $xmin precedes oldest valid transaction ID 0:\d+/;
|
|
}
|
|
elsif ($offnum == 3)
|
|
{
|
|
# Corruptly set xmin < datfrozenxid, further back, noting circularity
|
|
# of xid comparison.
|
|
my $xmin = 4026531839;
|
|
$tup->{t_xmin} = $xmin;
|
|
$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
|
|
$tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
|
|
|
|
push @expected,
|
|
qr/${$header}xmin ${xmin} precedes oldest valid transaction ID 0:\d+/;
|
|
}
|
|
elsif ($offnum == 4)
|
|
{
|
|
# Corruptly set xmax < relminmxid;
|
|
my $xmax = 4026531839;
|
|
$tup->{t_xmax} = $xmax;
|
|
$tup->{t_infomask} &= ~HEAP_XMAX_INVALID;
|
|
|
|
push @expected,
|
|
qr/${$header}xmax ${xmax} precedes oldest valid transaction ID 0:\d+/;
|
|
}
|
|
elsif ($offnum == 5)
|
|
{
|
|
# Corrupt the tuple t_hoff, but keep it aligned properly
|
|
$tup->{t_hoff} += 128;
|
|
|
|
push @expected,
|
|
qr/${$header}data begins at offset 152 beyond the tuple length 58/,
|
|
qr/${$header}tuple data should begin at byte 24, but actually begins at byte 152 \(3 attributes, no nulls\)/;
|
|
}
|
|
elsif ($offnum == 6)
|
|
{
|
|
# Corrupt the tuple t_hoff, wrong alignment
|
|
$tup->{t_hoff} += 3;
|
|
|
|
push @expected,
|
|
qr/${$header}tuple data should begin at byte 24, but actually begins at byte 27 \(3 attributes, no nulls\)/;
|
|
}
|
|
elsif ($offnum == 7)
|
|
{
|
|
# Corrupt the tuple t_hoff, underflow but correct alignment
|
|
$tup->{t_hoff} -= 8;
|
|
|
|
push @expected,
|
|
qr/${$header}tuple data should begin at byte 24, but actually begins at byte 16 \(3 attributes, no nulls\)/;
|
|
}
|
|
elsif ($offnum == 8)
|
|
{
|
|
# Corrupt the tuple t_hoff, underflow and wrong alignment
|
|
$tup->{t_hoff} -= 3;
|
|
|
|
push @expected,
|
|
qr/${$header}tuple data should begin at byte 24, but actually begins at byte 21 \(3 attributes, no nulls\)/;
|
|
}
|
|
elsif ($offnum == 9)
|
|
{
|
|
# Corrupt the tuple to look like it has lots of attributes, not just 3
|
|
$tup->{t_infomask2} |= HEAP_NATTS_MASK;
|
|
|
|
push @expected,
|
|
qr/${$header}number of attributes 2047 exceeds maximum expected for table 3/;
|
|
}
|
|
elsif ($offnum == 10)
|
|
{
|
|
# Corrupt the tuple to look like it has lots of attributes, some of
|
|
# them null. This falsely creates the impression that the t_bits
|
|
# array is longer than just one byte, but t_hoff still says otherwise.
|
|
$tup->{t_infomask} |= HEAP_HASNULL;
|
|
$tup->{t_infomask2} |= HEAP_NATTS_MASK;
|
|
$tup->{t_bits} = 0xAA;
|
|
|
|
push @expected,
|
|
qr/${$header}tuple data should begin at byte 280, but actually begins at byte 24 \(2047 attributes, has nulls\)/;
|
|
}
|
|
elsif ($offnum == 11)
|
|
{
|
|
# Same as above, but this time t_hoff plays along
|
|
$tup->{t_infomask} |= HEAP_HASNULL;
|
|
$tup->{t_infomask2} |= (HEAP_NATTS_MASK & 0x40);
|
|
$tup->{t_bits} = 0xAA;
|
|
$tup->{t_hoff} = 32;
|
|
|
|
push @expected,
|
|
qr/${$header}number of attributes 67 exceeds maximum expected for table 3/;
|
|
}
|
|
elsif ($offnum == 12)
|
|
{
|
|
# Overwrite column 'b' 1-byte varlena header and initial characters to
|
|
# look like a long 4-byte varlena
|
|
#
|
|
# On little endian machines, bytes ending in two zero bits (xxxxxx00 bytes)
|
|
# are 4-byte length word, aligned, uncompressed data (up to 1G). We set the
|
|
# high six bits to 111111 and the lower two bits to 00, then the next three
|
|
# bytes with 0xFF using 0xFCFFFFFF.
|
|
#
|
|
# On big endian machines, bytes starting in two zero bits (00xxxxxx bytes)
|
|
# are 4-byte length word, aligned, uncompressed data (up to 1G). We set the
|
|
# low six bits to 111111 and the high two bits to 00, then the next three
|
|
# bytes with 0xFF using 0x3FFFFFFF.
|
|
#
|
|
$tup->{b_header} = $ENDIANNESS eq 'little' ? 0xFC : 0x3F;
|
|
$tup->{b_body1} = 0xFF;
|
|
$tup->{b_body2} = 0xFF;
|
|
$tup->{b_body3} = 0xFF;
|
|
|
|
$header = header(0, $offnum, 1);
|
|
push @expected,
|
|
qr/${header}attribute with length \d+ ends at offset \d+ beyond total tuple length \d+/;
|
|
}
|
|
elsif ($offnum == 13)
|
|
{
|
|
# Corrupt the bits in column 'c' toast pointer
|
|
$tup->{c_va_valueid} = 0xFFFFFFFF;
|
|
|
|
$header = header(0, $offnum, 2);
|
|
push @expected, qr/${header}toast value \d+ not found in toast table/;
|
|
}
|
|
elsif ($offnum == 14)
|
|
{
|
|
# Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI
|
|
$tup->{t_infomask} |= HEAP_XMAX_COMMITTED;
|
|
$tup->{t_infomask} |= HEAP_XMAX_IS_MULTI;
|
|
$tup->{t_xmax} = 4;
|
|
|
|
push @expected,
|
|
qr/${header}multitransaction ID 4 equals or exceeds next valid multitransaction ID 1/;
|
|
}
|
|
elsif ($offnum == 15)
|
|
{
|
|
# Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI
|
|
$tup->{t_infomask} |= HEAP_XMAX_COMMITTED;
|
|
$tup->{t_infomask} |= HEAP_XMAX_IS_MULTI;
|
|
$tup->{t_xmax} = 4000000000;
|
|
|
|
push @expected,
|
|
qr/${header}multitransaction ID 4000000000 precedes relation minimum multitransaction ID threshold 1/;
|
|
}
|
|
elsif ($offnum == 16) # Last offnum must equal ROWCOUNT
|
|
{
|
|
# Corruptly set xmin > next_xid to be in the future.
|
|
my $xmin = 123456;
|
|
$tup->{t_xmin} = $xmin;
|
|
$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
|
|
$tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
|
|
|
|
push @expected,
|
|
qr/${$header}xmin ${xmin} equals or exceeds next valid transaction ID 0:\d+/;
|
|
}
|
|
elsif ($offnum == 17)
|
|
{
|
|
# at offnum 19 we will unset HEAP_ONLY_TUPLE and HEAP_UPDATED flags.
|
|
die "offnum $offnum should be a redirect" if defined $tup;
|
|
push @expected,
|
|
qr/${header}redirected line pointer points to a non-heap-only tuple at offset \d+/;
|
|
push @expected,
|
|
qr/${header}redirected line pointer points to a non-heap-updated tuple at offset \d+/;
|
|
}
|
|
elsif ($offnum == 18)
|
|
{
|
|
# rewrite line pointer with lp_off = 17, lp_flags = 2, lp_len = 0.
|
|
die "offnum $offnum should be a redirect" if defined $tup;
|
|
sysseek($file, 92, 0) or BAIL_OUT("sysseek failed: $!");
|
|
syswrite($file,
|
|
pack("L", $ENDIANNESS eq 'little' ? 0x00010011 : 0x11000100))
|
|
or BAIL_OUT("syswrite failed: $!");
|
|
push @expected,
|
|
qr/${header}redirected line pointer points to another redirected line pointer at offset \d+/;
|
|
}
|
|
elsif ($offnum == 19)
|
|
{
|
|
# unset HEAP_ONLY_TUPLE and HEAP_UPDATED flag, so that update chain
|
|
# validation will complain about offset 17
|
|
$tup->{t_infomask2} &= ~HEAP_ONLY_TUPLE;
|
|
$tup->{t_infomask} &= ~HEAP_UPDATED;
|
|
}
|
|
elsif ($offnum == 22)
|
|
{
|
|
# rewrite line pointer with lp.off = 25, lp_flags = 2, lp_len = 0
|
|
sysseek($file, 108, 0) or BAIL_OUT("sysseek failed: $!");
|
|
syswrite($file,
|
|
pack("L", $ENDIANNESS eq 'little' ? 0x00010019 : 0x19000100))
|
|
or BAIL_OUT("syswrite failed: $!");
|
|
push @expected,
|
|
qr/${header}redirect line pointer points to offset \d+, but offset \d+ also points there/;
|
|
}
|
|
elsif ($offnum == 28)
|
|
{
|
|
$tup->{t_infomask2} &= ~HEAP_HOT_UPDATED;
|
|
push @expected,
|
|
qr/${header}non-heap-only update produced a heap-only tuple at offset \d+/;
|
|
|
|
# Save these values so we can insert them into the tuple at offnum 29.
|
|
$pred_xmax = $tup->{t_xmax};
|
|
$pred_posid = $tup->{ip_posid};
|
|
}
|
|
elsif ($offnum == 29)
|
|
{
|
|
# Copy these values from the tuple at offset 28.
|
|
$tup->{t_xmax} = $pred_xmax;
|
|
$tup->{ip_posid} = $pred_posid;
|
|
push @expected,
|
|
qr/${header}tuple points to new version at offset \d+, but offset \d+ also points there/;
|
|
}
|
|
elsif ($offnum == 30)
|
|
{
|
|
# Save xid, so we can insert into into tuple at offset 31.
|
|
$aborted_xid = $tup->{t_xmax};
|
|
}
|
|
elsif ($offnum == 31)
|
|
{
|
|
# Set xmin to xmax of tuple at offset 30.
|
|
$tup->{t_xmin} = $aborted_xid;
|
|
$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
|
|
push @expected,
|
|
qr/${header}tuple with aborted xmin \d+ was updated to produce a tuple at offset \d+ with committed xmin \d+/;
|
|
}
|
|
elsif ($offnum == 32)
|
|
{
|
|
$tup->{t_infomask2} |= HEAP_ONLY_TUPLE;
|
|
push @expected,
|
|
qr/${header}tuple is root of chain but is marked as heap-only tuple/;
|
|
}
|
|
elsif ($offnum == 33)
|
|
{
|
|
# Tuple at offset 40 is the successor of this one; we'll corrupt it to
|
|
# be non-heap-only.
|
|
push @expected,
|
|
qr/${header}heap-only update produced a non-heap only tuple at offset \d+/;
|
|
}
|
|
elsif ($offnum == 34)
|
|
{
|
|
$tup->{t_xmax} = 0;
|
|
push @expected,
|
|
qr/${header}tuple has been HOT updated, but xmax is 0/;
|
|
}
|
|
elsif ($offnum == 35)
|
|
{
|
|
$tup->{t_xmin} = $in_progress_xid;
|
|
$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
|
|
push @expected,
|
|
qr/${header}tuple with in-progress xmin \d+ was updated to produce a tuple at offset \d+ with committed xmin \d+/;
|
|
}
|
|
elsif ($offnum == 36)
|
|
{
|
|
# Tuple at offset 43 is the successor of this one; we'll corrupt it to
|
|
# have xmin = $in_progress_xid. By setting the xmax of this tuple to
|
|
# the same value, we make it look like an update chain with an
|
|
# in-progress XID following a committed one.
|
|
$tup->{t_xmin} = $aborted_xid;
|
|
$tup->{t_xmax} = $in_progress_xid;
|
|
$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
|
|
push @expected,
|
|
qr/${header}tuple with aborted xmin \d+ was updated to produce a tuple at offset \d+ with in-progress xmin \d+/;
|
|
}
|
|
elsif ($offnum == 40)
|
|
{
|
|
# Tuple at offset 33 is the predecessor of this one; the error will
|
|
# be reported there.
|
|
$tup->{t_infomask2} &= ~HEAP_ONLY_TUPLE;
|
|
}
|
|
elsif ($offnum == 43)
|
|
{
|
|
# Tuple at offset 36 is the predecessor of this one; the error will
|
|
# be reported there.
|
|
$tup->{t_xmin} = $in_progress_xid;
|
|
$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
|
|
}
|
|
else
|
|
{
|
|
# The tests for update chain validation end up creating a bunch of
|
|
# tuples that aren't corrupted in any way e.g. because only one of
|
|
# the two tuples in the update chain needs to be corrupted for the
|
|
# test, or because one update chain is being made to erroneously
|
|
# point into the middle of another that has nothing wrong with it.
|
|
# In all such cases we need not write the tuple back to the file.
|
|
next;
|
|
}
|
|
|
|
write_tuple($file, $offset, $tup) if defined $tup;
|
|
}
|
|
close($file)
|
|
or BAIL_OUT("close failed: $!");
|
|
$node->start;
|
|
|
|
# Run pg_amcheck against the corrupt table with epoch=0, comparing actual
|
|
# corruption messages against the expected messages
|
|
$node->command_checks_all(
|
|
[ 'pg_amcheck', '--no-dependent-indexes', '-p', $port, 'postgres' ],
|
|
2, [@expected], [], 'Expected corruption message output');
|
|
$node->safe_psql(
|
|
'postgres', qq(
|
|
COMMIT PREPARED 'in_progress_tx';
|
|
));
|
|
|
|
$node->teardown_node;
|
|
$node->clean_node;
|
|
|
|
done_testing();
|