diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README index 3c70394844..067d15c803 100644 --- a/src/backend/access/nbtree/README +++ b/src/backend/access/nbtree/README @@ -525,8 +525,12 @@ MVCC scans is not required on standby nodes. That is because HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesDirty() and HeapTupleSatisfiesVacuum() are only ever used during write transactions, which cannot exist on the standby. -This leaves HeapTupleSatisfiesMVCC() and HeapTupleSatisfiesToast(), so -HeapTupleSatisfiesToast() is the only non-MVCC scan type used on standbys. +This leaves HeapTupleSatisfiesMVCC() and HeapTupleSatisfiesToast(). +HeapTupleSatisfiesToast() doesn't use MVCC semantics, though that's +because it doesn't need to - if the main heap row is visible then the +toast rows will also be visible. So as long as we follow a toast +pointer from a visible (live) tuple the corresponding toast rows +will also be visible, so we do not need to recheck MVCC on them. There is one minor exception, which is that the optimizer sometimes looks at the boundaries of value ranges using SnapshotDirty, which could result in returning a newer value for query statistics; this @@ -536,13 +540,6 @@ in the index, so the scan retrieves a tid then immediately uses it to look in the heap. It is unlikely that the tid could have been deleted, vacuumed and re-inserted in the time taken to look in the heap via direct tid access. So we ignore that scan type as a problem. -This means if we re-check the results of any scan of a toast index we -will be able to completely avoid performing the "pin scan" operation -during replay of VACUUM WAL records. - -XXX FIXME: Toast re-checks are not yet added, so we still perform the -pin scan when replaying vacuum records of toast indexes. - Other Things That Are Handy to Know ----------------------------------- diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index f2905cb734..bf8ade375d 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -22,7 +22,6 @@ #include "access/relscan.h" #include "access/xlog.h" #include "catalog/index.h" -#include "catalog/pg_namespace.h" #include "commands/vacuum.h" #include "storage/indexfsm.h" #include "storage/ipc.h" @@ -833,8 +832,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, /* * Check to see if we need to issue one final WAL record for this index, * which may be needed for correctness on a hot standby node when - * non-MVCC index scans could take place. This now only occurs when we - * perform a TOAST scan, so only occurs for TOAST indexes. + * non-MVCC index scans could take place. * * If the WAL is replayed in hot standby, the replay process needs to get * cleanup locks on all index leaf pages, just as we've been doing here. @@ -846,7 +844,6 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, * against the last leaf page in the index, if that one wasn't vacuumed. */ if (XLogStandbyInfoActive() && - rel->rd_rel->relnamespace == PG_TOAST_NAMESPACE && vstate.lastBlockVacuumed < vstate.lastBlockLocked) { Buffer buf; @@ -1045,25 +1042,14 @@ restart: */ if (ndeletable > 0) { - BlockNumber lastBlockVacuumed = InvalidBlockNumber; - /* - * We may need to record the lastBlockVacuumed for use when - * non-MVCC scans might be performed on the index on a - * hot standby. See explanation in btree_xlog_vacuum(). - * - * On a hot standby, a non-MVCC scan can only take place - * when we access a Toast Index, so we need only record - * the lastBlockVacuumed if we are vacuuming a Toast Index. - */ - if (rel->rd_rel->relnamespace == PG_TOAST_NAMESPACE) - lastBlockVacuumed = vstate->lastBlockVacuumed; - - /* - * Notice that the issued XLOG_BTREE_VACUUM WAL record includes an - * instruction to the replay code to get cleanup lock on all pages - * between the previous lastBlockVacuumed and this page. This - * ensures that WAL replay locks all leaf pages at some point. + * Notice that the issued XLOG_BTREE_VACUUM WAL record includes all + * information to the replay code to allow it to get a cleanup lock + * on all pages between the previous lastBlockVacuumed and this page. + * This ensures that WAL replay locks all leaf pages at some point, + * which is important should non-MVCC scans be requested. + * This is currently unused on standby, but we record it anyway, so + * that the WAL contains the required information. * * Since we can visit leaf pages out-of-order when recursing, * replay might end up locking such pages an extra time, but it @@ -1071,7 +1057,7 @@ restart: * that. */ _bt_delitems_vacuum(rel, buf, deletable, ndeletable, - lastBlockVacuumed); + vstate->lastBlockVacuumed); /* * Remember highest leaf page number we've issued a diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 0d094ca7fa..f8691bbc44 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -385,17 +385,21 @@ static void btree_xlog_vacuum(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; - xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record); Buffer buffer; Page page; BTPageOpaque opaque; +#ifdef UNUSED + xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record); /* + * This section of code is thought to be no longer needed, after + * analysis of the calling paths. It is retained to allow the code + * to be reinstated if a flaw is revealed in that thinking. + * * If we are running non-MVCC scans using this index we need to do some * additional work to ensure correctness, which is known as a "pin scan" * described in more detail in next paragraphs. We used to do the extra - * work in all cases, whereas we now avoid that work except when the index - * is a toast index, since toast scans aren't fully MVCC compliant. + * work in all cases, whereas we now avoid that work in most cases. * If lastBlockVacuumed is set to InvalidBlockNumber then we skip the * additional work required for the pin scan. * @@ -458,6 +462,7 @@ btree_xlog_vacuum(XLogReaderState *record) } } } +#endif /* * Like in btvacuumpage(), we need to take a cleanup lock on every leaf