Prevent "snapshot too old" from trying to return pruned TOAST tuples.

Previously, we tested for MVCC snapshots to see whether they were too
old, but not TOAST snapshots, which can lead to complaints about missing
TOAST chunks if those chunks are subject to early pruning.  Ideally,
the threshold lsn and timestamp for a TOAST snapshot would be that of
the corresponding MVCC snapshot, but since we have no way of deciding
which MVCC snapshot was used to fetch the TOAST pointer, use the oldest
active or registered snapshot instead.

Reported by Andres Freund, who also sketched out what the fix should
look like.  Patch by me, reviewed by Amit Kapila.
This commit is contained in:
Robert Haas 2016-08-03 16:41:43 -04:00
parent a3c7a993d5
commit 3e2f3c2e42
6 changed files with 96 additions and 16 deletions

View File

@ -40,6 +40,7 @@
#include "utils/expandeddatum.h"
#include "utils/fmgroids.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
#include "utils/typcache.h"
#include "utils/tqual.h"
@ -81,6 +82,7 @@ static int toast_open_indexes(Relation toastrel,
int *num_indexes);
static void toast_close_indexes(Relation *toastidxs, int num_indexes,
LOCKMODE lock);
static void init_toast_snapshot(Snapshot toast_snapshot);
/* ----------
@ -1665,6 +1667,7 @@ toast_delete_datum(Relation rel, Datum value)
HeapTuple toasttup;
int num_indexes;
int validIndex;
SnapshotData SnapshotToast;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
return;
@ -1696,8 +1699,9 @@ toast_delete_datum(Relation rel, Datum value)
* sequence or not, but since we've already locked the index we might as
* well use systable_beginscan_ordered.)
*/
init_toast_snapshot(&SnapshotToast);
toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
SnapshotToast, 1, &toastkey);
&SnapshotToast, 1, &toastkey);
while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
/*
@ -1730,6 +1734,7 @@ toastrel_valueid_exists(Relation toastrel, Oid valueid)
int num_indexes;
int validIndex;
Relation *toastidxs;
SnapshotData SnapshotToast;
/* Fetch a valid index relation */
validIndex = toast_open_indexes(toastrel,
@ -1748,9 +1753,10 @@ toastrel_valueid_exists(Relation toastrel, Oid valueid)
/*
* Is there any such chunk?
*/
init_toast_snapshot(&SnapshotToast);
toastscan = systable_beginscan(toastrel,
RelationGetRelid(toastidxs[validIndex]),
true, SnapshotToast, 1, &toastkey);
true, &SnapshotToast, 1, &toastkey);
if (systable_getnext(toastscan) != NULL)
result = true;
@ -1813,6 +1819,7 @@ toast_fetch_datum(struct varlena * attr)
int32 chunksize;
int num_indexes;
int validIndex;
SnapshotData SnapshotToast;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
@ -1859,8 +1866,9 @@ toast_fetch_datum(struct varlena * attr)
*/
nextidx = 0;
init_toast_snapshot(&SnapshotToast);
toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
SnapshotToast, 1, &toastkey);
&SnapshotToast, 1, &toastkey);
while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
/*
@ -1990,6 +1998,7 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
int32 chcpyend;
int num_indexes;
int validIndex;
SnapshotData SnapshotToast;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
@ -2082,9 +2091,10 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
*
* The index is on (valueid, chunkidx) so they will come in order
*/
init_toast_snapshot(&SnapshotToast);
nextidx = startchunk;
toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
SnapshotToast, nscankeys, toastkey);
&SnapshotToast, nscankeys, toastkey);
while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
/*
@ -2289,3 +2299,22 @@ toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
index_close(toastidxs[i], lock);
pfree(toastidxs);
}
/* ----------
* init_toast_snapshot
*
* Initialize an appropriate TOAST snapshot. We must use an MVCC snapshot
* to initialize the TOAST snapshot; since we don't know which one to use,
* just use the oldest one. This is safe: at worst, we will get a "snapshot
* too old" error that might have been avoided otherwise.
*/
static void
init_toast_snapshot(Snapshot toast_snapshot)
{
Snapshot snapshot = GetOldestSnapshot();
if (snapshot == NULL)
elog(ERROR, "no known snapshots");
InitToastSnapshot(toast_snapshot, snapshot->lsn, snapshot->whenTaken);
}

View File

@ -188,6 +188,9 @@ typedef struct ActiveSnapshotElt
/* Top of the stack of active snapshots */
static ActiveSnapshotElt *ActiveSnapshot = NULL;
/* Bottom of the stack of active snapshots */
static ActiveSnapshotElt *OldestActiveSnapshot = NULL;
/*
* Currently registered Snapshots. Ordered in a heap by xmin, so that we can
* quickly find the one with lowest xmin, to advance our MyPgXat->xmin.
@ -393,6 +396,34 @@ GetLatestSnapshot(void)
return SecondarySnapshot;
}
/*
* GetOldestSnapshot
*
* Get the oldest known snapshot, as judged by the LSN.
*/
Snapshot
GetOldestSnapshot(void)
{
Snapshot OldestRegisteredSnapshot = NULL;
XLogRecPtr RegisteredLSN = InvalidXLogRecPtr;
XLogRecPtr ActiveLSN = InvalidXLogRecPtr;
if (!pairingheap_is_empty(&RegisteredSnapshots))
{
OldestRegisteredSnapshot = pairingheap_container(SnapshotData, ph_node,
pairingheap_first(&RegisteredSnapshots));
RegisteredLSN = OldestRegisteredSnapshot->lsn;
}
if (OldestActiveSnapshot != NULL)
ActiveLSN = OldestActiveSnapshot->as_snap->lsn;
if (XLogRecPtrIsInvalid(RegisteredLSN) || RegisteredLSN > ActiveLSN)
return OldestActiveSnapshot->as_snap;
return OldestRegisteredSnapshot;
}
/*
* GetCatalogSnapshot
* Get a snapshot that is sufficiently up-to-date for scan of the
@ -674,6 +705,8 @@ PushActiveSnapshot(Snapshot snap)
newactive->as_snap->active_count++;
ActiveSnapshot = newactive;
if (OldestActiveSnapshot == NULL)
OldestActiveSnapshot = ActiveSnapshot;
}
/*
@ -744,6 +777,8 @@ PopActiveSnapshot(void)
pfree(ActiveSnapshot);
ActiveSnapshot = newstack;
if (ActiveSnapshot == NULL)
OldestActiveSnapshot = NULL;
SnapshotResetXmin();
}
@ -953,6 +988,8 @@ AtSubAbort_Snapshot(int level)
pfree(ActiveSnapshot);
ActiveSnapshot = next;
if (ActiveSnapshot == NULL)
OldestActiveSnapshot = NULL;
}
SnapshotResetXmin();
@ -1037,6 +1074,7 @@ AtEOXact_Snapshot(bool isCommit)
* it'll go away with TopTransactionContext.
*/
ActiveSnapshot = NULL;
OldestActiveSnapshot = NULL;
pairingheap_reset(&RegisteredSnapshots);
CurrentSnapshot = NULL;

View File

@ -78,7 +78,6 @@
/* Static variables representing various special snapshot semantics */
SnapshotData SnapshotSelfData = {HeapTupleSatisfiesSelf};
SnapshotData SnapshotAnyData = {HeapTupleSatisfiesAny};
SnapshotData SnapshotToastData = {HeapTupleSatisfiesToast};
/* local functions */
static bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot);

View File

@ -279,7 +279,8 @@ TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
if (old_snapshot_threshold >= 0
&& (snapshot) != NULL
&& (snapshot)->satisfies == HeapTupleSatisfiesMVCC
&& ((snapshot)->satisfies == HeapTupleSatisfiesMVCC
|| (snapshot)->satisfies == HeapTupleSatisfiesToast)
&& !XLogRecPtrIsInvalid((snapshot)->lsn)
&& PageGetLSN(page) > (snapshot)->lsn)
TestForOldSnapshot_impl(snapshot, relation);

View File

@ -64,6 +64,7 @@ extern TransactionId RecentGlobalDataXmin;
extern Snapshot GetTransactionSnapshot(void);
extern Snapshot GetLatestSnapshot(void);
extern void SnapshotSetCommandId(CommandId curcid);
extern Snapshot GetOldestSnapshot(void);
extern Snapshot GetCatalogSnapshot(Oid relid);
extern Snapshot GetNonHistoricCatalogSnapshot(Oid relid);

View File

@ -16,25 +16,16 @@
#define TQUAL_H
#include "utils/snapshot.h"
#include "access/xlogdefs.h"
/* Static variables representing various special snapshot semantics */
extern PGDLLIMPORT SnapshotData SnapshotSelfData;
extern PGDLLIMPORT SnapshotData SnapshotAnyData;
extern PGDLLIMPORT SnapshotData SnapshotToastData;
extern PGDLLIMPORT SnapshotData CatalogSnapshotData;
#define SnapshotSelf (&SnapshotSelfData)
#define SnapshotAny (&SnapshotAnyData)
#define SnapshotToast (&SnapshotToastData)
/*
* We don't provide a static SnapshotDirty variable because it would be
* non-reentrant. Instead, users of that snapshot type should declare a
* local variable of type SnapshotData, and initialize it with this macro.
*/
#define InitDirtySnapshot(snapshotdata) \
((snapshotdata).satisfies = HeapTupleSatisfiesDirty)
/* This macro encodes the knowledge of which snapshots are MVCC-safe */
#define IsMVCCSnapshot(snapshot) \
@ -100,4 +91,25 @@ extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
HeapTuple htup,
Buffer buffer,
CommandId *cmin, CommandId *cmax);
/*
* We don't provide a static SnapshotDirty variable because it would be
* non-reentrant. Instead, users of that snapshot type should declare a
* local variable of type SnapshotData, and initialize it with this macro.
*/
#define InitDirtySnapshot(snapshotdata) \
((snapshotdata).satisfies = HeapTupleSatisfiesDirty)
/*
* Similarly, some initialization is required for SnapshotToast. We need
* to set lsn and whenTaken correctly to support snapshot_too_old.
*/
static inline void
InitToastSnapshot(Snapshot snapshot, XLogRecPtr lsn, int64 whenTaken)
{
snapshot->satisfies = HeapTupleSatisfiesToast;
snapshot->lsn = lsn;
snapshot->whenTaken = whenTaken;
}
#endif /* TQUAL_H */