TOAST needs to do at least minimal time-qual checking in order not to

mess up after an aborted VACUUM FULL, per today's pghackers discussion.
Add a suitable HeapTupleSatisfiesToast routine.  Remove useless special-
case test in HeapTupleSatisfiesVisibility macro for xmax =
BootstrapTransactionId; perhaps that was needed at one time, but it's
a waste of cycles now, not to mention actively wrong for SnapshotAny.
Along the way, add some much-needed comments to tqual.c, and simplify
toast_fetch_datum, which no longer needs to assume it may see chunks
out-of-order.
This commit is contained in:
Tom Lane 2002-01-16 20:29:02 +00:00
parent 0f2d949c1e
commit cf97080fa4
3 changed files with 158 additions and 59 deletions

View File

@ -4,11 +4,11 @@
* Support routines for external and compressed storage of
* variable size attributes.
*
* Copyright (c) 2000, PostgreSQL Global Development Group
* Copyright (c) 2000-2002, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.26 2001/11/05 17:46:23 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.27 2002/01/16 20:29:01 tgl Exp $
*
*
* INTERFACE ROUTINES
@ -921,7 +921,7 @@ toast_delete_datum(Relation rel, Datum value)
while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL)
{
toasttup.t_self = indexRes->heap_iptr;
heap_fetch(toastrel, SnapshotAny, &toasttup, &buffer, toastscan);
heap_fetch(toastrel, SnapshotToast, &toasttup, &buffer, toastscan);
pfree(indexRes);
if (!toasttup.t_data)
@ -963,26 +963,18 @@ toast_fetch_datum(varattrib *attr)
TupleDesc toasttupDesc;
RetrieveIndexResult indexRes;
Buffer buffer;
varattrib *result;
int32 ressize;
int32 residx;
int numchunks;
int32 residx,
nextidx;
int32 numchunks;
Pointer chunk;
bool isnull;
int32 chunksize;
char *chunks_found;
char *chunks_expected;
ressize = attr->va_content.va_external.va_extsize;
numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
chunks_found = palloc(numchunks);
chunks_expected = palloc(numchunks);
memset(chunks_found, 0, numchunks);
memset(chunks_expected, 1, numchunks);
result = (varattrib *) palloc(ressize + VARHDRSZ);
VARATT_SIZEP(result) = ressize + VARHDRSZ;
if (VARATT_IS_COMPRESSED(attr))
@ -1008,13 +1000,17 @@ toast_fetch_datum(varattrib *attr)
/*
* Read the chunks by index
*
* Note we will not necessarily see the chunks in sequence-number order.
* Note that because the index is actually on (valueid, chunkidx)
* we will see the chunks in chunkidx order, even though we didn't
* explicitly ask for it.
*/
nextidx = 0;
toastscan = index_beginscan(toastidx, false, 1, &toastkey);
while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL)
{
toasttup.t_self = indexRes->heap_iptr;
heap_fetch(toastrel, SnapshotAny, &toasttup, &buffer, toastscan);
heap_fetch(toastrel, SnapshotToast, &toasttup, &buffer, toastscan);
pfree(indexRes);
if (toasttup.t_data == NULL)
@ -1033,9 +1029,9 @@ toast_fetch_datum(varattrib *attr)
/*
* Some checks on the data we've found
*/
if (residx < 0 || residx >= numchunks)
elog(ERROR, "unexpected chunk number %d for toast value %u",
residx,
if (residx != nextidx)
elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u",
residx, nextidx,
attr->va_content.va_external.va_valueid);
if (residx < numchunks - 1)
{
@ -1044,15 +1040,15 @@ toast_fetch_datum(varattrib *attr)
chunksize, residx,
attr->va_content.va_external.va_valueid);
}
else
else if (residx < numchunks)
{
if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u",
chunksize, residx,
attr->va_content.va_external.va_valueid);
}
if (chunks_found[residx]++ > 0)
elog(ERROR, "chunk %d for toast value %u appears multiple times",
else
elog(ERROR, "unexpected chunk number %d for toast value %u",
residx,
attr->va_content.va_external.va_valueid);
@ -1064,16 +1060,16 @@ toast_fetch_datum(varattrib *attr)
chunksize);
ReleaseBuffer(buffer);
nextidx++;
}
/*
* Final checks that we successfully fetched the datum
*/
if (memcmp(chunks_found, chunks_expected, numchunks) != 0)
elog(ERROR, "not all toast chunks found for value %u",
if (nextidx != numchunks)
elog(ERROR, "missing chunk number %d for toast value %u",
nextidx,
attr->va_content.va_external.va_valueid);
pfree(chunks_expected);
pfree(chunks_found);
/*
* End scan and close relations

View File

@ -1,14 +1,22 @@
/*-------------------------------------------------------------------------
*
* tqual.c
* POSTGRES "time" qualification code.
* POSTGRES "time" qualification code, ie, tuple visibility rules.
*
* NOTE: all the HeapTupleSatisfies routines will update the tuple's
* "hint" status bits if we see that the inserting or deleting transaction
* has now committed or aborted. The caller is responsible for noticing any
* change in t_infomask and scheduling a disk write if so. Note that the
* caller must hold at least a shared buffer context lock on the buffer
* containing the tuple. (VACUUM FULL assumes it's sufficient to have
* exclusive lock on the containing relation, instead.)
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.46 2002/01/11 20:07:03 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.47 2002/01/16 20:29:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -36,8 +44,7 @@ bool ReferentialIntegritySnapshotOverride = false;
*
* Note:
* Assumes heap tuple is valid.
*/
/*
*
* The satisfaction of "itself" requires the following:
*
* ((Xmin == my-transaction && the row was updated by the current transaction, and
@ -153,8 +160,7 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple)
*
* Note:
* Assumes heap tuple is valid.
*/
/*
*
* The satisfaction of "now" requires the following:
*
* ((Xmin == my-transaction && changed by the current transaction
@ -288,6 +294,71 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple)
return false;
}
/*
* HeapTupleSatisfiesToast
* True iff heap tuple is valid for TOAST usage.
*
* This is a simplified version that only checks for VACUUM moving conditions.
* It's appropriate for TOAST usage because TOAST really doesn't want to do
* its own time qual checks; if you can see the main-table row that contains
* a TOAST reference, you should be able to see the TOASTed value. However,
* vacuuming a TOAST table is independent of the main table, and in case such
* a vacuum fails partway through, we'd better do this much checking.
*
* Among other things, this means you can't do UPDATEs of rows in a TOAST
* table.
*/
bool
HeapTupleSatisfiesToast(HeapTupleHeader tuple)
{
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
{
if (tuple->t_infomask & HEAP_XMIN_INVALID)
return false;
if (tuple->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdIsCurrentTransactionId((TransactionId) tuple->t_cmin))
return false;
if (!TransactionIdIsInProgress((TransactionId) tuple->t_cmin))
{
if (TransactionIdDidCommit((TransactionId) tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdIsCurrentTransactionId((TransactionId) tuple->t_cmin))
{
if (TransactionIdIsInProgress((TransactionId) tuple->t_cmin))
return false;
if (TransactionIdDidCommit((TransactionId) tuple->t_cmin))
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
else
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
}
}
/* otherwise assume the tuple is valid for TOAST. */
return true;
}
/*
* HeapTupleSatisfiesUpdate
* Check whether a tuple can be updated.
*
* This applies exactly the same checks as HeapTupleSatisfiesNow,
* but returns a more-detailed result code, since UPDATE needs to know
* more than "is it visible?"
*/
int
HeapTupleSatisfiesUpdate(HeapTuple htuple)
{
@ -404,6 +475,18 @@ HeapTupleSatisfiesUpdate(HeapTuple htuple)
return HeapTupleUpdated; /* updated by other */
}
/*
* HeapTupleSatisfiesDirty
* True iff heap tuple is valid, including effects of concurrent xacts.
*
* This is essentially like HeapTupleSatisfiesItself as far as effects of
* the current transaction and committed/aborted xacts are concerned.
* However, we also include the effects of other xacts still in progress.
*
* Returns extra information in the global variable SnapshotDirty, namely
* xids of concurrent xacts that affected the tuple. Also, the tuple's
* t_ctid (forward link) is returned if it's being updated.
*/
bool
HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
{
@ -516,6 +599,18 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
return false; /* updated by other */
}
/*
* HeapTupleSatisfiesSnapshot
* True iff heap tuple is valid for the given snapshot.
*
* This is the same as HeapTupleSatisfiesNow, except that transactions that
* were in progress or as yet unstarted when the snapshot was taken will
* be treated as uncommitted, even if they really have committed by now.
*
* (Notice, however, that the tuple status hint bits will be updated on the
* basis of the true state of the transaction, even if we then pretend we
* can't see it.)
*/
bool
HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
{
@ -658,11 +753,6 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
* deleted by XIDs >= OldestXmin are deemed "recently dead"; they might
* still be visible to some open transaction, so we can't remove them,
* even if we see that the deleting transaction has committed.
*
* As with the other HeapTupleSatisfies routines, we may update the tuple's
* "hint" status bits if we see that the inserting or deleting transaction
* has now committed or aborted. The caller is responsible for noticing any
* change in t_infomask and scheduling a disk write if so.
*/
HTSV_Result
HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin)
@ -808,13 +898,21 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin)
}
/*
* SetQuerySnapshot
* Initialize query snapshot for a new query
*
* The SerializableSnapshot is the first one taken in a transaction.
* In serializable mode we just use that one throughout the transaction.
* In read-committed mode, we take a new snapshot at the start of each query.
*/
void
SetQuerySnapshot(void)
{
/* Initialize snapshot overriding to false */
ReferentialIntegritySnapshotOverride = false;
/* 1st call in xaction */
/* 1st call in xaction? */
if (SerializableSnapshot == NULL)
{
SerializableSnapshot = GetSnapshotData(true);
@ -837,6 +935,10 @@ SetQuerySnapshot(void)
Assert(QuerySnapshot != NULL);
}
/*
* FreeXactSnapshot
* Free snapshot(s) at end of transaction.
*/
void
FreeXactSnapshot(void)
{

View File

@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* tqual.h
* POSTGRES "time" qualification definitions.
* POSTGRES "time" qualification definitions, ie, tuple visibility rules.
*
* Should be moved/renamed... - vadim 07/28/98
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: tqual.h,v 1.37 2001/11/05 17:46:36 momjian Exp $
* $Id: tqual.h,v 1.38 2002/01/16 20:29:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -34,6 +34,7 @@ typedef SnapshotData *Snapshot;
#define SnapshotNow ((Snapshot) 0x0)
#define SnapshotSelf ((Snapshot) 0x1)
#define SnapshotAny ((Snapshot) 0x2)
#define SnapshotToast ((Snapshot) 0x3)
extern DLLIMPORT Snapshot SnapshotDirty;
extern DLLIMPORT Snapshot QuerySnapshot;
@ -44,37 +45,36 @@ extern bool ReferentialIntegritySnapshotOverride;
#define IsSnapshotNow(snapshot) ((Snapshot) (snapshot) == SnapshotNow)
#define IsSnapshotSelf(snapshot) ((Snapshot) (snapshot) == SnapshotSelf)
#define IsSnapshotAny(snapshot) ((Snapshot) (snapshot) == SnapshotAny)
#define IsSnapshotToast(snapshot) ((Snapshot) (snapshot) == SnapshotToast)
#define IsSnapshotDirty(snapshot) ((Snapshot) (snapshot) == SnapshotDirty)
/*
* HeapTupleSatisfiesVisibility
* True iff heap tuple satsifies a time qual.
* True iff heap tuple satisfies a time qual.
*
* Notes:
* Assumes heap tuple is valid.
* Beware of multiple evaluations of arguments.
* Beware of multiple evaluations of snapshot argument.
*/
#define HeapTupleSatisfiesVisibility(tuple, snapshot) \
( \
TransactionIdEquals((tuple)->t_data->t_xmax, BootstrapTransactionId) ? \
false \
(IsSnapshotNow(snapshot) ? \
HeapTupleSatisfiesNow((tuple)->t_data) \
: \
(IsSnapshotSelf(snapshot) ? \
HeapTupleSatisfiesItself((tuple)->t_data) \
: \
( \
IsSnapshotAny(snapshot) ? \
(IsSnapshotAny(snapshot) ? \
true \
: \
(IsSnapshotSelf(snapshot) ? \
HeapTupleSatisfiesItself((tuple)->t_data) \
(IsSnapshotToast(snapshot) ? \
HeapTupleSatisfiesToast((tuple)->t_data) \
: \
(IsSnapshotNow(snapshot) ? \
HeapTupleSatisfiesNow((tuple)->t_data) \
(IsSnapshotDirty(snapshot) ? \
HeapTupleSatisfiesDirty((tuple)->t_data) \
: \
(IsSnapshotDirty(snapshot) ? \
HeapTupleSatisfiesDirty((tuple)->t_data) \
: \
HeapTupleSatisfiesSnapshot((tuple)->t_data, snapshot) \
) \
HeapTupleSatisfiesSnapshot((tuple)->t_data, snapshot) \
) \
) \
) \
) \
@ -93,14 +93,15 @@ typedef enum
HEAPTUPLE_DEAD, /* tuple is dead and deletable */
HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in
* progress */
HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in
* progress */
HEAPTUPLE_DELETE_IN_PROGRESS /* deleting xact is still in progress */
} HTSV_Result;
extern bool HeapTupleSatisfiesItself(HeapTupleHeader tuple);
extern bool HeapTupleSatisfiesNow(HeapTupleHeader tuple);
extern bool HeapTupleSatisfiesDirty(HeapTupleHeader tuple);
extern bool HeapTupleSatisfiesToast(HeapTupleHeader tuple);
extern bool HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple,
Snapshot snapshot);
extern int HeapTupleSatisfiesUpdate(HeapTuple tuple);