TOAST needs to do at least minimal time-qual checking in order not to

mess up after an aborted VACUUM FULL, per today's pghackers discussion.
Add a suitable HeapTupleSatisfiesToast routine.  Remove useless special-
case test in HeapTupleSatisfiesVisibility macro for xmax =
BootstrapTransactionId; perhaps that was needed at one time, but it's
a waste of cycles now, not to mention actively wrong for SnapshotAny.
Along the way, add some much-needed comments to tqual.c, and simplify
toast_fetch_datum, which no longer needs to assume it may see chunks
out-of-order.
This commit is contained in:
Tom Lane 2002-01-16 20:29:02 +00:00
parent 0f2d949c1e
commit cf97080fa4
3 changed files with 158 additions and 59 deletions

View File

@ -4,11 +4,11 @@
* Support routines for external and compressed storage of * Support routines for external and compressed storage of
* variable size attributes. * variable size attributes.
* *
* Copyright (c) 2000, PostgreSQL Global Development Group * Copyright (c) 2000-2002, PostgreSQL Global Development Group
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.26 2001/11/05 17:46:23 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.27 2002/01/16 20:29:01 tgl Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
@ -921,7 +921,7 @@ toast_delete_datum(Relation rel, Datum value)
while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL) while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL)
{ {
toasttup.t_self = indexRes->heap_iptr; toasttup.t_self = indexRes->heap_iptr;
heap_fetch(toastrel, SnapshotAny, &toasttup, &buffer, toastscan); heap_fetch(toastrel, SnapshotToast, &toasttup, &buffer, toastscan);
pfree(indexRes); pfree(indexRes);
if (!toasttup.t_data) if (!toasttup.t_data)
@ -963,26 +963,18 @@ toast_fetch_datum(varattrib *attr)
TupleDesc toasttupDesc; TupleDesc toasttupDesc;
RetrieveIndexResult indexRes; RetrieveIndexResult indexRes;
Buffer buffer; Buffer buffer;
varattrib *result; varattrib *result;
int32 ressize; int32 ressize;
int32 residx; int32 residx,
int numchunks; nextidx;
int32 numchunks;
Pointer chunk; Pointer chunk;
bool isnull; bool isnull;
int32 chunksize; int32 chunksize;
char *chunks_found;
char *chunks_expected;
ressize = attr->va_content.va_external.va_extsize; ressize = attr->va_content.va_external.va_extsize;
numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
chunks_found = palloc(numchunks);
chunks_expected = palloc(numchunks);
memset(chunks_found, 0, numchunks);
memset(chunks_expected, 1, numchunks);
result = (varattrib *) palloc(ressize + VARHDRSZ); result = (varattrib *) palloc(ressize + VARHDRSZ);
VARATT_SIZEP(result) = ressize + VARHDRSZ; VARATT_SIZEP(result) = ressize + VARHDRSZ;
if (VARATT_IS_COMPRESSED(attr)) if (VARATT_IS_COMPRESSED(attr))
@ -1008,13 +1000,17 @@ toast_fetch_datum(varattrib *attr)
/* /*
* Read the chunks by index * Read the chunks by index
* *
* Note we will not necessarily see the chunks in sequence-number order. * Note that because the index is actually on (valueid, chunkidx)
* we will see the chunks in chunkidx order, even though we didn't
* explicitly ask for it.
*/ */
nextidx = 0;
toastscan = index_beginscan(toastidx, false, 1, &toastkey); toastscan = index_beginscan(toastidx, false, 1, &toastkey);
while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL) while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL)
{ {
toasttup.t_self = indexRes->heap_iptr; toasttup.t_self = indexRes->heap_iptr;
heap_fetch(toastrel, SnapshotAny, &toasttup, &buffer, toastscan); heap_fetch(toastrel, SnapshotToast, &toasttup, &buffer, toastscan);
pfree(indexRes); pfree(indexRes);
if (toasttup.t_data == NULL) if (toasttup.t_data == NULL)
@ -1033,9 +1029,9 @@ toast_fetch_datum(varattrib *attr)
/* /*
* Some checks on the data we've found * Some checks on the data we've found
*/ */
if (residx < 0 || residx >= numchunks) if (residx != nextidx)
elog(ERROR, "unexpected chunk number %d for toast value %u", elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u",
residx, residx, nextidx,
attr->va_content.va_external.va_valueid); attr->va_content.va_external.va_valueid);
if (residx < numchunks - 1) if (residx < numchunks - 1)
{ {
@ -1044,15 +1040,15 @@ toast_fetch_datum(varattrib *attr)
chunksize, residx, chunksize, residx,
attr->va_content.va_external.va_valueid); attr->va_content.va_external.va_valueid);
} }
else else if (residx < numchunks)
{ {
if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u", elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u",
chunksize, residx, chunksize, residx,
attr->va_content.va_external.va_valueid); attr->va_content.va_external.va_valueid);
} }
if (chunks_found[residx]++ > 0) else
elog(ERROR, "chunk %d for toast value %u appears multiple times", elog(ERROR, "unexpected chunk number %d for toast value %u",
residx, residx,
attr->va_content.va_external.va_valueid); attr->va_content.va_external.va_valueid);
@ -1064,16 +1060,16 @@ toast_fetch_datum(varattrib *attr)
chunksize); chunksize);
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
nextidx++;
} }
/* /*
* Final checks that we successfully fetched the datum * Final checks that we successfully fetched the datum
*/ */
if (memcmp(chunks_found, chunks_expected, numchunks) != 0) if (nextidx != numchunks)
elog(ERROR, "not all toast chunks found for value %u", elog(ERROR, "missing chunk number %d for toast value %u",
nextidx,
attr->va_content.va_external.va_valueid); attr->va_content.va_external.va_valueid);
pfree(chunks_expected);
pfree(chunks_found);
/* /*
* End scan and close relations * End scan and close relations

View File

@ -1,14 +1,22 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* tqual.c * tqual.c
* POSTGRES "time" qualification code. * POSTGRES "time" qualification code, ie, tuple visibility rules.
*
* NOTE: all the HeapTupleSatisfies routines will update the tuple's
* "hint" status bits if we see that the inserting or deleting transaction
* has now committed or aborted. The caller is responsible for noticing any
* change in t_infomask and scheduling a disk write if so. Note that the
* caller must hold at least a shared buffer context lock on the buffer
* containing the tuple. (VACUUM FULL assumes it's sufficient to have
* exclusive lock on the containing relation, instead.)
*
* *
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
*
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.46 2002/01/11 20:07:03 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.47 2002/01/16 20:29:02 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -36,8 +44,7 @@ bool ReferentialIntegritySnapshotOverride = false;
* *
* Note: * Note:
* Assumes heap tuple is valid. * Assumes heap tuple is valid.
*/ *
/*
* The satisfaction of "itself" requires the following: * The satisfaction of "itself" requires the following:
* *
* ((Xmin == my-transaction && the row was updated by the current transaction, and * ((Xmin == my-transaction && the row was updated by the current transaction, and
@ -153,8 +160,7 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple)
* *
* Note: * Note:
* Assumes heap tuple is valid. * Assumes heap tuple is valid.
*/ *
/*
* The satisfaction of "now" requires the following: * The satisfaction of "now" requires the following:
* *
* ((Xmin == my-transaction && changed by the current transaction * ((Xmin == my-transaction && changed by the current transaction
@ -288,6 +294,71 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple)
return false; return false;
} }
/*
* HeapTupleSatisfiesToast
* True iff heap tuple is valid for TOAST usage.
*
* This is a simplified version that only checks for VACUUM moving conditions.
* It's appropriate for TOAST usage because TOAST really doesn't want to do
* its own time qual checks; if you can see the main-table row that contains
* a TOAST reference, you should be able to see the TOASTed value. However,
* vacuuming a TOAST table is independent of the main table, and in case such
* a vacuum fails partway through, we'd better do this much checking.
*
* Among other things, this means you can't do UPDATEs of rows in a TOAST
* table.
*/
bool
HeapTupleSatisfiesToast(HeapTupleHeader tuple)
{
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
{
if (tuple->t_infomask & HEAP_XMIN_INVALID)
return false;
if (tuple->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdIsCurrentTransactionId((TransactionId) tuple->t_cmin))
return false;
if (!TransactionIdIsInProgress((TransactionId) tuple->t_cmin))
{
if (TransactionIdDidCommit((TransactionId) tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdIsCurrentTransactionId((TransactionId) tuple->t_cmin))
{
if (TransactionIdIsInProgress((TransactionId) tuple->t_cmin))
return false;
if (TransactionIdDidCommit((TransactionId) tuple->t_cmin))
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
else
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
}
}
/* otherwise assume the tuple is valid for TOAST. */
return true;
}
/*
* HeapTupleSatisfiesUpdate
* Check whether a tuple can be updated.
*
* This applies exactly the same checks as HeapTupleSatisfiesNow,
* but returns a more-detailed result code, since UPDATE needs to know
* more than "is it visible?"
*/
int int
HeapTupleSatisfiesUpdate(HeapTuple htuple) HeapTupleSatisfiesUpdate(HeapTuple htuple)
{ {
@ -404,6 +475,18 @@ HeapTupleSatisfiesUpdate(HeapTuple htuple)
return HeapTupleUpdated; /* updated by other */ return HeapTupleUpdated; /* updated by other */
} }
/*
* HeapTupleSatisfiesDirty
* True iff heap tuple is valid, including effects of concurrent xacts.
*
* This is essentially like HeapTupleSatisfiesItself as far as effects of
* the current transaction and committed/aborted xacts are concerned.
* However, we also include the effects of other xacts still in progress.
*
* Returns extra information in the global variable SnapshotDirty, namely
* xids of concurrent xacts that affected the tuple. Also, the tuple's
* t_ctid (forward link) is returned if it's being updated.
*/
bool bool
HeapTupleSatisfiesDirty(HeapTupleHeader tuple) HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
{ {
@ -516,6 +599,18 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
return false; /* updated by other */ return false; /* updated by other */
} }
/*
* HeapTupleSatisfiesSnapshot
* True iff heap tuple is valid for the given snapshot.
*
* This is the same as HeapTupleSatisfiesNow, except that transactions that
* were in progress or as yet unstarted when the snapshot was taken will
* be treated as uncommitted, even if they really have committed by now.
*
* (Notice, however, that the tuple status hint bits will be updated on the
* basis of the true state of the transaction, even if we then pretend we
* can't see it.)
*/
bool bool
HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot) HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
{ {
@ -658,11 +753,6 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
* deleted by XIDs >= OldestXmin are deemed "recently dead"; they might * deleted by XIDs >= OldestXmin are deemed "recently dead"; they might
* still be visible to some open transaction, so we can't remove them, * still be visible to some open transaction, so we can't remove them,
* even if we see that the deleting transaction has committed. * even if we see that the deleting transaction has committed.
*
* As with the other HeapTupleSatisfies routines, we may update the tuple's
* "hint" status bits if we see that the inserting or deleting transaction
* has now committed or aborted. The caller is responsible for noticing any
* change in t_infomask and scheduling a disk write if so.
*/ */
HTSV_Result HTSV_Result
HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin) HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin)
@ -808,13 +898,21 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin)
} }
/*
* SetQuerySnapshot
* Initialize query snapshot for a new query
*
* The SerializableSnapshot is the first one taken in a transaction.
* In serializable mode we just use that one throughout the transaction.
* In read-committed mode, we take a new snapshot at the start of each query.
*/
void void
SetQuerySnapshot(void) SetQuerySnapshot(void)
{ {
/* Initialize snapshot overriding to false */ /* Initialize snapshot overriding to false */
ReferentialIntegritySnapshotOverride = false; ReferentialIntegritySnapshotOverride = false;
/* 1st call in xaction */ /* 1st call in xaction? */
if (SerializableSnapshot == NULL) if (SerializableSnapshot == NULL)
{ {
SerializableSnapshot = GetSnapshotData(true); SerializableSnapshot = GetSnapshotData(true);
@ -837,6 +935,10 @@ SetQuerySnapshot(void)
Assert(QuerySnapshot != NULL); Assert(QuerySnapshot != NULL);
} }
/*
* FreeXactSnapshot
* Free snapshot(s) at end of transaction.
*/
void void
FreeXactSnapshot(void) FreeXactSnapshot(void)
{ {

View File

@ -1,14 +1,14 @@
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* *
* tqual.h * tqual.h
* POSTGRES "time" qualification definitions. * POSTGRES "time" qualification definitions, ie, tuple visibility rules.
* *
* Should be moved/renamed... - vadim 07/28/98 * Should be moved/renamed... - vadim 07/28/98
* *
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: tqual.h,v 1.37 2001/11/05 17:46:36 momjian Exp $ * $Id: tqual.h,v 1.38 2002/01/16 20:29:02 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -34,6 +34,7 @@ typedef SnapshotData *Snapshot;
#define SnapshotNow ((Snapshot) 0x0) #define SnapshotNow ((Snapshot) 0x0)
#define SnapshotSelf ((Snapshot) 0x1) #define SnapshotSelf ((Snapshot) 0x1)
#define SnapshotAny ((Snapshot) 0x2) #define SnapshotAny ((Snapshot) 0x2)
#define SnapshotToast ((Snapshot) 0x3)
extern DLLIMPORT Snapshot SnapshotDirty; extern DLLIMPORT Snapshot SnapshotDirty;
extern DLLIMPORT Snapshot QuerySnapshot; extern DLLIMPORT Snapshot QuerySnapshot;
@ -44,37 +45,36 @@ extern bool ReferentialIntegritySnapshotOverride;
#define IsSnapshotNow(snapshot) ((Snapshot) (snapshot) == SnapshotNow) #define IsSnapshotNow(snapshot) ((Snapshot) (snapshot) == SnapshotNow)
#define IsSnapshotSelf(snapshot) ((Snapshot) (snapshot) == SnapshotSelf) #define IsSnapshotSelf(snapshot) ((Snapshot) (snapshot) == SnapshotSelf)
#define IsSnapshotAny(snapshot) ((Snapshot) (snapshot) == SnapshotAny) #define IsSnapshotAny(snapshot) ((Snapshot) (snapshot) == SnapshotAny)
#define IsSnapshotToast(snapshot) ((Snapshot) (snapshot) == SnapshotToast)
#define IsSnapshotDirty(snapshot) ((Snapshot) (snapshot) == SnapshotDirty) #define IsSnapshotDirty(snapshot) ((Snapshot) (snapshot) == SnapshotDirty)
/* /*
* HeapTupleSatisfiesVisibility * HeapTupleSatisfiesVisibility
* True iff heap tuple satsifies a time qual. * True iff heap tuple satisfies a time qual.
* *
* Notes: * Notes:
* Assumes heap tuple is valid. * Assumes heap tuple is valid.
* Beware of multiple evaluations of arguments. * Beware of multiple evaluations of snapshot argument.
*/ */
#define HeapTupleSatisfiesVisibility(tuple, snapshot) \ #define HeapTupleSatisfiesVisibility(tuple, snapshot) \
( \ (IsSnapshotNow(snapshot) ? \
TransactionIdEquals((tuple)->t_data->t_xmax, BootstrapTransactionId) ? \ HeapTupleSatisfiesNow((tuple)->t_data) \
false \ : \
(IsSnapshotSelf(snapshot) ? \
HeapTupleSatisfiesItself((tuple)->t_data) \
: \ : \
( \ (IsSnapshotAny(snapshot) ? \
IsSnapshotAny(snapshot) ? \
true \ true \
: \ : \
(IsSnapshotSelf(snapshot) ? \ (IsSnapshotToast(snapshot) ? \
HeapTupleSatisfiesItself((tuple)->t_data) \ HeapTupleSatisfiesToast((tuple)->t_data) \
: \ : \
(IsSnapshotNow(snapshot) ? \ (IsSnapshotDirty(snapshot) ? \
HeapTupleSatisfiesNow((tuple)->t_data) \ HeapTupleSatisfiesDirty((tuple)->t_data) \
: \ : \
(IsSnapshotDirty(snapshot) ? \ HeapTupleSatisfiesSnapshot((tuple)->t_data, snapshot) \
HeapTupleSatisfiesDirty((tuple)->t_data) \ ) \
: \
HeapTupleSatisfiesSnapshot((tuple)->t_data, snapshot) \
) \
) \ ) \
) \ ) \
) \ ) \
@ -93,14 +93,15 @@ typedef enum
HEAPTUPLE_DEAD, /* tuple is dead and deletable */ HEAPTUPLE_DEAD, /* tuple is dead and deletable */
HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */ HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */ HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in
* progress */ * progress */
HEAPTUPLE_DELETE_IN_PROGRESS /* deleting xact is still in progress */ HEAPTUPLE_DELETE_IN_PROGRESS /* deleting xact is still in progress */
} HTSV_Result; } HTSV_Result;
extern bool HeapTupleSatisfiesItself(HeapTupleHeader tuple); extern bool HeapTupleSatisfiesItself(HeapTupleHeader tuple);
extern bool HeapTupleSatisfiesNow(HeapTupleHeader tuple); extern bool HeapTupleSatisfiesNow(HeapTupleHeader tuple);
extern bool HeapTupleSatisfiesDirty(HeapTupleHeader tuple); extern bool HeapTupleSatisfiesDirty(HeapTupleHeader tuple);
extern bool HeapTupleSatisfiesToast(HeapTupleHeader tuple);
extern bool HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, extern bool HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple,
Snapshot snapshot); Snapshot snapshot);
extern int HeapTupleSatisfiesUpdate(HeapTuple tuple); extern int HeapTupleSatisfiesUpdate(HeapTuple tuple);