amcheck: fix multiple problems with TOAST pointer validation

First, don't perform database access while holding a buffer lock.
When checking a heap, we can validate that TOAST pointers are sane by
performing a scan on the TOAST index and looking up the chunks that
correspond to each value ID that appears in a TOAST poiner in the main
table. But, to do that while holding a buffer lock at least risks
causing other backends to wait uninterruptibly, and probably can cause
undetected and uninterruptible deadlocks.  So, instead, make a list of
checks to perform while holding the lock, and then perform the checks
after releasing it.

Second, adjust things so that we don't try to follow TOAST pointers
for tuples that are already eligible to be pruned. The TOAST tuples
become eligible for pruning at the same time that the main tuple does,
so trying to check them may lead to spurious reports of corruption,
as observed in the buildfarm. The necessary infrastructure to decide
whether or not the tuple being checked is prunable was added by
commit 3b6c1259f9, but it wasn't
actually used for its intended purpose prior to this patch.

Mark Dilger, adjusted by me to avoid a memory leak.

Discussion: http://postgr.es/m/AC5479E4-6321-473D-AC92-5EC36299FBC2@enterprisedb.com
This commit is contained in:
Robert Haas 2021-04-07 13:28:35 -04:00
parent 5fd9dfa5f5
commit ec7ffb8096
2 changed files with 165 additions and 72 deletions

View File

@ -58,6 +58,19 @@ typedef enum SkipPages
SKIP_PAGES_NONE SKIP_PAGES_NONE
} SkipPages; } SkipPages;
/*
* Struct holding information about a toasted attribute sufficient to both
* check the toasted attribute and, if found to be corrupt, to report where it
* was encountered in the main table.
*/
typedef struct ToastedAttribute
{
struct varatt_external toast_pointer;
BlockNumber blkno; /* block in main table */
OffsetNumber offnum; /* offset in main table */
AttrNumber attnum; /* attribute in main table */
} ToastedAttribute;
/* /*
* Struct holding the running context information during * Struct holding the running context information during
* a lifetime of a verify_heapam execution. * a lifetime of a verify_heapam execution.
@ -119,11 +132,11 @@ typedef struct HeapCheckContext
/* True if tuple's xmax makes it eligible for pruning */ /* True if tuple's xmax makes it eligible for pruning */
bool tuple_could_be_pruned; bool tuple_could_be_pruned;
/* Values for iterating over toast for the attribute */ /*
int32 chunkno; * List of ToastedAttribute structs for toasted attributes which are not
int32 attrsize; * eligible for pruning and should be checked
int32 endchunk; */
int32 totalchunks; List *toasted_attributes;
/* Whether verify_heapam has yet encountered any corrupt tuples */ /* Whether verify_heapam has yet encountered any corrupt tuples */
bool is_corrupt; bool is_corrupt;
@ -136,13 +149,20 @@ typedef struct HeapCheckContext
/* Internal implementation */ /* Internal implementation */
static void sanity_check_relation(Relation rel); static void sanity_check_relation(Relation rel);
static void check_tuple(HeapCheckContext *ctx); static void check_tuple(HeapCheckContext *ctx);
static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx); static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
ToastedAttribute *ta, int32 chunkno,
int32 endchunk);
static bool check_tuple_attribute(HeapCheckContext *ctx); static bool check_tuple_attribute(HeapCheckContext *ctx);
static void check_toasted_attribute(HeapCheckContext *ctx,
ToastedAttribute *ta);
static bool check_tuple_header(HeapCheckContext *ctx); static bool check_tuple_header(HeapCheckContext *ctx);
static bool check_tuple_visibility(HeapCheckContext *ctx); static bool check_tuple_visibility(HeapCheckContext *ctx);
static void report_corruption(HeapCheckContext *ctx, char *msg); static void report_corruption(HeapCheckContext *ctx, char *msg);
static void report_toast_corruption(HeapCheckContext *ctx,
ToastedAttribute *ta, char *msg);
static TupleDesc verify_heapam_tupdesc(void); static TupleDesc verify_heapam_tupdesc(void);
static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid, static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid,
const HeapCheckContext *ctx); const HeapCheckContext *ctx);
@ -253,6 +273,7 @@ verify_heapam(PG_FUNCTION_ARGS)
memset(&ctx, 0, sizeof(HeapCheckContext)); memset(&ctx, 0, sizeof(HeapCheckContext));
ctx.cached_xid = InvalidTransactionId; ctx.cached_xid = InvalidTransactionId;
ctx.toasted_attributes = NIL;
/* /*
* Any xmin newer than the xmin of our snapshot can't become all-visible * Any xmin newer than the xmin of our snapshot can't become all-visible
@ -469,6 +490,19 @@ verify_heapam(PG_FUNCTION_ARGS)
/* clean up */ /* clean up */
UnlockReleaseBuffer(ctx.buffer); UnlockReleaseBuffer(ctx.buffer);
/*
* Check any toast pointers from the page whose lock we just released
*/
if (ctx.toasted_attributes != NIL)
{
ListCell *cell;
foreach(cell, ctx.toasted_attributes)
check_toasted_attribute(&ctx, lfirst(cell));
list_free_deep(ctx.toasted_attributes);
ctx.toasted_attributes = NIL;
}
if (on_error_stop && ctx.is_corrupt) if (on_error_stop && ctx.is_corrupt)
break; break;
} }
@ -510,14 +544,13 @@ sanity_check_relation(Relation rel)
} }
/* /*
* Record a single corruption found in the table. The values in ctx should * Shared internal implementation for report_corruption and
* reflect the location of the corruption, and the msg argument should contain * report_toast_corruption.
* a human-readable description of the corruption.
*
* The msg argument is pfree'd by this function.
*/ */
static void static void
report_corruption(HeapCheckContext *ctx, char *msg) report_corruption_internal(Tuplestorestate *tupstore, TupleDesc tupdesc,
BlockNumber blkno, OffsetNumber offnum,
AttrNumber attnum, char *msg)
{ {
Datum values[HEAPCHECK_RELATION_COLS]; Datum values[HEAPCHECK_RELATION_COLS];
bool nulls[HEAPCHECK_RELATION_COLS]; bool nulls[HEAPCHECK_RELATION_COLS];
@ -525,10 +558,10 @@ report_corruption(HeapCheckContext *ctx, char *msg)
MemSet(values, 0, sizeof(values)); MemSet(values, 0, sizeof(values));
MemSet(nulls, 0, sizeof(nulls)); MemSet(nulls, 0, sizeof(nulls));
values[0] = Int64GetDatum(ctx->blkno); values[0] = Int64GetDatum(blkno);
values[1] = Int32GetDatum(ctx->offnum); values[1] = Int32GetDatum(offnum);
values[2] = Int32GetDatum(ctx->attnum); values[2] = Int32GetDatum(attnum);
nulls[2] = (ctx->attnum < 0); nulls[2] = (attnum < 0);
values[3] = CStringGetTextDatum(msg); values[3] = CStringGetTextDatum(msg);
/* /*
@ -541,8 +574,39 @@ report_corruption(HeapCheckContext *ctx, char *msg)
*/ */
pfree(msg); pfree(msg);
tuple = heap_form_tuple(ctx->tupdesc, values, nulls); tuple = heap_form_tuple(tupdesc, values, nulls);
tuplestore_puttuple(ctx->tupstore, tuple); tuplestore_puttuple(tupstore, tuple);
}
/*
* Record a single corruption found in the main table. The values in ctx should
* indicate the location of the corruption, and the msg argument should contain
* a human-readable description of the corruption.
*
* The msg argument is pfree'd by this function.
*/
static void
report_corruption(HeapCheckContext *ctx, char *msg)
{
report_corruption_internal(ctx->tupstore, ctx->tupdesc, ctx->blkno,
ctx->offnum, ctx->attnum, msg);
ctx->is_corrupt = true;
}
/*
* Record corruption found in the toast table. The values in ta should
* indicate the location in the main table where the toast pointer was
* encountered, and the msg argument should contain a human-readable
* description of the toast table corruption.
*
* As above, the msg argument is pfree'd by this function.
*/
static void
report_toast_corruption(HeapCheckContext *ctx, ToastedAttribute *ta,
char *msg)
{
report_corruption_internal(ctx->tupstore, ctx->tupdesc, ta->blkno,
ta->offnum, ta->attnum, msg);
ctx->is_corrupt = true; ctx->is_corrupt = true;
} }
@ -1094,9 +1158,12 @@ check_tuple_visibility(HeapCheckContext *ctx)
* tuples that store the toasted value are retrieved and checked in order, with * tuples that store the toasted value are retrieved and checked in order, with
* each toast tuple being checked against where we are in the sequence, as well * each toast tuple being checked against where we are in the sequence, as well
* as each toast tuple having its varlena structure sanity checked. * as each toast tuple having its varlena structure sanity checked.
*
* Returns whether the toast tuple passed the corruption checks.
*/ */
static void static void
check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx) check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
ToastedAttribute *ta, int32 chunkno, int32 endchunk)
{ {
int32 curchunk; int32 curchunk;
Pointer chunk; Pointer chunk;
@ -1111,7 +1178,7 @@ check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx)
ctx->toast_rel->rd_att, &isnull)); ctx->toast_rel->rd_att, &isnull));
if (isnull) if (isnull)
{ {
report_corruption(ctx, report_toast_corruption(ctx, ta,
pstrdup("toast chunk sequence number is null")); pstrdup("toast chunk sequence number is null"));
return; return;
} }
@ -1119,7 +1186,7 @@ check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx)
ctx->toast_rel->rd_att, &isnull)); ctx->toast_rel->rd_att, &isnull));
if (isnull) if (isnull)
{ {
report_corruption(ctx, report_toast_corruption(ctx, ta,
pstrdup("toast chunk data is null")); pstrdup("toast chunk data is null"));
return; return;
} }
@ -1137,7 +1204,7 @@ check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx)
/* should never happen */ /* should never happen */
uint32 header = ((varattrib_4b *) chunk)->va_4byte.va_header; uint32 header = ((varattrib_4b *) chunk)->va_4byte.va_header;
report_corruption(ctx, report_toast_corruption(ctx, ta,
psprintf("corrupt extended toast chunk has invalid varlena header: %0x (sequence number %d)", psprintf("corrupt extended toast chunk has invalid varlena header: %0x (sequence number %d)",
header, curchunk)); header, curchunk));
return; return;
@ -1146,30 +1213,28 @@ check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx)
/* /*
* Some checks on the data we've found * Some checks on the data we've found
*/ */
if (curchunk != ctx->chunkno) if (curchunk != chunkno)
{ {
report_corruption(ctx, report_toast_corruption(ctx, ta,
psprintf("toast chunk sequence number %u does not match the expected sequence number %u", psprintf("toast chunk sequence number %u does not match the expected sequence number %u",
curchunk, ctx->chunkno)); curchunk, chunkno));
return; return;
} }
if (curchunk > ctx->endchunk) if (curchunk > endchunk)
{ {
report_corruption(ctx, report_toast_corruption(ctx, ta,
psprintf("toast chunk sequence number %u exceeds the end chunk sequence number %u", psprintf("toast chunk sequence number %u exceeds the end chunk sequence number %u",
curchunk, ctx->endchunk)); curchunk, endchunk));
return; return;
} }
expected_size = curchunk < ctx->totalchunks - 1 ? TOAST_MAX_CHUNK_SIZE expected_size = curchunk < endchunk ? TOAST_MAX_CHUNK_SIZE
: ctx->attrsize - ((ctx->totalchunks - 1) * TOAST_MAX_CHUNK_SIZE); : VARATT_EXTERNAL_GET_EXTSIZE(ta->toast_pointer) - (endchunk * TOAST_MAX_CHUNK_SIZE);
if (chunksize != expected_size) if (chunksize != expected_size)
{ report_toast_corruption(ctx, ta,
report_corruption(ctx,
psprintf("toast chunk size %u differs from the expected size %u", psprintf("toast chunk size %u differs from the expected size %u",
chunksize, expected_size)); chunksize, expected_size));
return;
}
} }
/* /*
@ -1177,17 +1242,17 @@ check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx)
* found in ctx->tupstore. * found in ctx->tupstore.
* *
* This function follows the logic performed by heap_deform_tuple(), and in the * This function follows the logic performed by heap_deform_tuple(), and in the
* case of a toasted value, optionally continues along the logic of * case of a toasted value, optionally stores the toast pointer so later it can
* detoast_external_attr(), checking for any conditions that would result in * be checked following the logic of detoast_external_attr(), checking for any
* either of those functions Asserting or crashing the backend. The checks * conditions that would result in either of those functions Asserting or
* performed by Asserts present in those two functions are also performed here. * crashing the backend. The checks performed by Asserts present in those two
* In cases where those two functions are a bit cavalier in their assumptions * functions are also performed here and in check_toasted_attribute. In cases
* about data being correct, we perform additional checks not present in either * where those two functions are a bit cavalier in their assumptions about data
* of those two functions. Where some condition is checked in both of those * being correct, we perform additional checks not present in either of those
* functions, we perform it here twice, as we parallel the logical flow of * two functions. Where some condition is checked in both of those functions,
* those two functions. The presence of duplicate checks seems a reasonable * we perform it here twice, as we parallel the logical flow of those two
* price to pay for keeping this code tightly coupled with the code it * functions. The presence of duplicate checks seems a reasonable price to pay
* protects. * for keeping this code tightly coupled with the code it protects.
* *
* Returns true if the tuple attribute is sane enough for processing to * Returns true if the tuple attribute is sane enough for processing to
* continue on to the next attribute, false otherwise. * continue on to the next attribute, false otherwise.
@ -1195,12 +1260,6 @@ check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx)
static bool static bool
check_tuple_attribute(HeapCheckContext *ctx) check_tuple_attribute(HeapCheckContext *ctx)
{ {
struct varatt_external toast_pointer;
ScanKeyData toastkey;
SysScanDesc toastscan;
SnapshotData SnapshotToast;
HeapTuple toasttup;
bool found_toasttup;
Datum attdatum; Datum attdatum;
struct varlena *attr; struct varlena *attr;
char *tp; /* pointer to the tuple data */ char *tp; /* pointer to the tuple data */
@ -1335,13 +1394,44 @@ check_tuple_attribute(HeapCheckContext *ctx)
return true; return true;
/* /*
* Must copy attr into toast_pointer for alignment considerations * If this tuple is eligible to be pruned, we cannot check the toast.
* Otherwise, we push a copy of the toast tuple so we can check it after
* releasing the main table buffer lock.
*/ */
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); if (!ctx->tuple_could_be_pruned)
{
ToastedAttribute *ta;
ctx->attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); ta = (ToastedAttribute *) palloc0(sizeof(ToastedAttribute));
ctx->endchunk = (ctx->attrsize - 1) / TOAST_MAX_CHUNK_SIZE;
ctx->totalchunks = ctx->endchunk + 1; VARATT_EXTERNAL_GET_POINTER(ta->toast_pointer, attr);
ta->blkno = ctx->blkno;
ta->offnum = ctx->offnum;
ta->attnum = ctx->attnum;
ctx->toasted_attributes = lappend(ctx->toasted_attributes, ta);
}
return true;
}
/*
* For each attribute collected in ctx->toasted_attributes, look up the value
* in the toast table and perform checks on it. This function should only be
* called on toast pointers which cannot be vacuumed away during our
* processing.
*/
static void
check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta)
{
SnapshotData SnapshotToast;
ScanKeyData toastkey;
SysScanDesc toastscan;
bool found_toasttup;
HeapTuple toasttup;
int32 chunkno;
int32 endchunk;
endchunk = (VARATT_EXTERNAL_GET_EXTSIZE(ta->toast_pointer) - 1) / TOAST_MAX_CHUNK_SIZE;
/* /*
* Setup a scan key to find chunks in toast table with matching va_valueid * Setup a scan key to find chunks in toast table with matching va_valueid
@ -1349,7 +1439,7 @@ check_tuple_attribute(HeapCheckContext *ctx)
ScanKeyInit(&toastkey, ScanKeyInit(&toastkey,
(AttrNumber) 1, (AttrNumber) 1,
BTEqualStrategyNumber, F_OIDEQ, BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(toast_pointer.va_valueid)); ObjectIdGetDatum(ta->toast_pointer.va_valueid));
/* /*
* Check if any chunks for this toasted object exist in the toast table, * Check if any chunks for this toasted object exist in the toast table,
@ -1360,27 +1450,26 @@ check_tuple_attribute(HeapCheckContext *ctx)
ctx->valid_toast_index, ctx->valid_toast_index,
&SnapshotToast, 1, &SnapshotToast, 1,
&toastkey); &toastkey);
ctx->chunkno = 0; chunkno = 0;
found_toasttup = false; found_toasttup = false;
while ((toasttup = while ((toasttup =
systable_getnext_ordered(toastscan, systable_getnext_ordered(toastscan,
ForwardScanDirection)) != NULL) ForwardScanDirection)) != NULL)
{ {
found_toasttup = true; found_toasttup = true;
check_toast_tuple(toasttup, ctx); check_toast_tuple(toasttup, ctx, ta, chunkno, endchunk);
ctx->chunkno++; chunkno++;
} }
if (!found_toasttup)
report_corruption(ctx,
psprintf("toasted value for attribute %u missing from toast table",
ctx->attnum));
else if (ctx->chunkno != (ctx->endchunk + 1))
report_corruption(ctx,
psprintf("final toast chunk number %u differs from expected value %u",
ctx->chunkno, (ctx->endchunk + 1)));
systable_endscan_ordered(toastscan); systable_endscan_ordered(toastscan);
return true; if (!found_toasttup)
report_toast_corruption(ctx, ta,
psprintf("toasted value for attribute %u missing from toast table",
ta->attnum));
else if (chunkno != (endchunk + 1))
report_toast_corruption(ctx, ta,
psprintf("final toast chunk number %u differs from expected value %u",
chunkno, (endchunk + 1)));
} }
/* /*
@ -1391,8 +1480,8 @@ static void
check_tuple(HeapCheckContext *ctx) check_tuple(HeapCheckContext *ctx)
{ {
/* /*
* Check various forms of tuple header corruption, and if the header is too * Check various forms of tuple header corruption, and if the header is
* corrupt, do not continue with other checks. * too corrupt, do not continue with other checks.
*/ */
if (!check_tuple_header(ctx)) if (!check_tuple_header(ctx))
return; return;
@ -1423,7 +1512,10 @@ check_tuple(HeapCheckContext *ctx)
* Check each attribute unless we hit corruption that confuses what to do * Check each attribute unless we hit corruption that confuses what to do
* next, at which point we abort further attribute checks for this tuple. * next, at which point we abort further attribute checks for this tuple.
* Note that we don't abort for all types of corruption, only for those * Note that we don't abort for all types of corruption, only for those
* types where we don't know how to continue. * types where we don't know how to continue. We also don't abort the
* checking of toasted attributes collected from the tuple prior to
* aborting. Those will still be checked later along with other toasted
* attributes collected from the page.
*/ */
ctx->offset = 0; ctx->offset = 0;
for (ctx->attnum = 0; ctx->attnum < ctx->natts; ctx->attnum++) for (ctx->attnum = 0; ctx->attnum < ctx->natts; ctx->attnum++)

View File

@ -2558,6 +2558,7 @@ TmFromChar
TmToChar TmToChar
ToastAttrInfo ToastAttrInfo
ToastTupleContext ToastTupleContext
ToastedAttribute
TocEntry TocEntry
TokenAuxData TokenAuxData
TokenizedLine TokenizedLine