tableam: Avoid relying on relation size to determine validity of tids.

Instead add a tableam callback to do so. To avoid adding per validation overhead, pass a scan to tuple_tid_valid. In heap's case we'd otherwise incurred a RelationGetNumberOfBlocks() call for each tid - which'd have added noticable overhead to nodeTidscan.c. Author: Andres Freund Reviewed-By: Ashwin Agrawal Discussion: https://postgr.es/m/20190515185447.gno2jtqxyktylyvs@alap3.anarazel.de
2019-05-17 18:52:01 -07:00 · 2019-05-17 18:52:01 -07:00 · 147e3722f7
parent 7f44ede594
commit 147e3722f7
7 changed files with 129 additions and 57 deletions
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@ -1654,8 +1654,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
 /*
 *	heap_get_latest_tid -  get the latest tid of a specified tuple
 *
- * Actually, this gets the latest version that is visible according to
- * the passed snapshot.  You can pass SnapshotDirty to get the very latest,
+ * Actually, this gets the latest version that is visible according to the
+ * scan's snapshot.  Create a scan using SnapshotDirty to get the very latest,
 * possibly uncommitted version.
 *
 * *tid is both an input and an output parameter: it is updated to
@ -1663,28 +1663,20 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
 * if no version of the row passes the snapshot test.
 */
 void
-heap_get_latest_tid(Relation relation,
-					Snapshot snapshot,
+heap_get_latest_tid(TableScanDesc sscan,
 					ItemPointer tid)
 {
-	BlockNumber blk;
+	Relation relation = sscan->rs_rd;
+	Snapshot snapshot = sscan->rs_snapshot;
 	ItemPointerData ctid;
 	TransactionId priorXmax;

-	/* this is to avoid Assert failures on bad input */
-	if (!ItemPointerIsValid(tid))
-		return;
-
 	/*
-	 * Since this can be called with user-supplied TID, don't trust the input
-	 * too much.  (RelationGetNumberOfBlocks is an expensive check, so we
-	 * don't check t_ctid links again this way.  Note that it would not do to
-	 * call it just once and save the result, either.)
+	 * table_get_latest_tid verified that the passed in tid is valid.  Assume
+	 * that t_ctid links are valid however - there shouldn't be invalid ones
+	 * in the table.
 	 */
-	blk = ItemPointerGetBlockNumber(tid);
-	if (blk >= RelationGetNumberOfBlocks(relation))
-		elog(ERROR, "block number %u is out of range for relation \"%s\"",
-			 blk, RelationGetRelationName(relation));
+	Assert(ItemPointerIsValid(tid));

 	/*
 	 * Loop to chase down t_ctid links.  At top of loop, ctid is the tuple we
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@ -204,6 +204,15 @@ heapam_fetch_row_version(Relation relation,
 	return false;
 }

+static bool
+heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
+{
+	HeapScanDesc hscan = (HeapScanDesc) scan;
+
+	return ItemPointerIsValid(tid) &&
+		ItemPointerGetBlockNumber(tid) < hscan->rs_nblocks;
+}
+
 static bool
 heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
 								Snapshot snapshot)
@ -2568,6 +2577,7 @@ static const TableAmRoutine heapam_methods = {

 	.tuple_fetch_row_version = heapam_fetch_row_version,
 	.tuple_get_latest_tid = heap_get_latest_tid,
+	.tuple_tid_valid = heapam_tuple_tid_valid,
 	.tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
 	.compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,

--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@ -213,6 +213,33 @@ table_index_fetch_tuple_check(Relation rel,
 }


+/* ------------------------------------------------------------------------
+ * Functions for non-modifying operations on individual tuples
+ * ------------------------------------------------------------------------
+ */
+
+void
+table_get_latest_tid(TableScanDesc scan, ItemPointer tid)
+{
+	Relation rel = scan->rs_rd;
+	const TableAmRoutine *tableam = rel->rd_tableam;
+
+	/*
+	 * Since this can be called with user-supplied TID, don't trust the input
+	 * too much.
+	 */
+	if (!tableam->tuple_tid_valid(scan, tid))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("tid (%u, %u) is not valid for relation for relation \"%s\"",
+						ItemPointerGetBlockNumberNoCheck(tid),
+						ItemPointerGetOffsetNumberNoCheck(tid),
+						RelationGetRelationName(rel))));
+
+	return tableam->tuple_get_latest_tid(scan, tid);
+}
+
+
 /* ----------------------------------------------------------------------------
 * Functions to make modifications a bit simpler.
 * ----------------------------------------------------------------------------
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@ -129,19 +129,23 @@ static void
 TidListEval(TidScanState *tidstate)
 {
 	ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
-	BlockNumber nblocks;
+	TableScanDesc scan;
 	ItemPointerData *tidList;
 	int			numAllocTids;
 	int			numTids;
 	ListCell   *l;

 	/*
-	 * We silently discard any TIDs that are out of range at the time of scan
-	 * start.  (Since we hold at least AccessShareLock on the table, it won't
-	 * be possible for someone to truncate away the blocks we intend to
-	 * visit.)
+	 * Start scan on-demand - initializing a scan isn't free (e.g. heap stats
+	 * the size of the table), so it makes sense to delay that until needed -
+	 * the node might never get executed.
 	 */
-	nblocks = RelationGetNumberOfBlocks(tidstate->ss.ss_currentRelation);
+	if (tidstate->ss.ss_currentScanDesc == NULL)
+		tidstate->ss.ss_currentScanDesc =
+			table_beginscan(tidstate->ss.ss_currentRelation,
+							tidstate->ss.ps.state->es_snapshot,
+							0, NULL);
+	scan = tidstate->ss.ss_currentScanDesc;

 	/*
 	 * We initialize the array with enough slots for the case that all quals
@ -165,19 +169,27 @@ TidListEval(TidScanState *tidstate)
 				DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
 														  econtext,
 														  &isNull));
-			if (!isNull &&
-				ItemPointerIsValid(itemptr) &&
-				ItemPointerGetBlockNumber(itemptr) < nblocks)
+			if (isNull)
+				continue;
+
+			/*
+			 * We silently discard any TIDs that the AM considers invalid
+			 * (E.g. for heap, they could be out of range at the time of scan
+			 * start.  Since we hold at least AccessShareLock on the table, it
+			 * won't be possible for someone to truncate away the blocks we
+			 * intend to visit.).
+			 */
+			if (!table_tuple_tid_valid(scan, itemptr))
+				continue;
+
+			if (numTids >= numAllocTids)
 			{
-				if (numTids >= numAllocTids)
-				{
-					numAllocTids *= 2;
-					tidList = (ItemPointerData *)
-						repalloc(tidList,
-								 numAllocTids * sizeof(ItemPointerData));
-				}
-				tidList[numTids++] = *itemptr;
+				numAllocTids *= 2;
+				tidList = (ItemPointerData *)
+					repalloc(tidList,
+							 numAllocTids * sizeof(ItemPointerData));
 			}
+			tidList[numTids++] = *itemptr;
 		}
 		else if (tidexpr->exprstate && tidexpr->isarray)
 		{
@ -206,13 +218,15 @@ TidListEval(TidScanState *tidstate)
 			}
 			for (i = 0; i < ndatums; i++)
 			{
-				if (!ipnulls[i])
-				{
-					itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
-					if (ItemPointerIsValid(itemptr) &&
-						ItemPointerGetBlockNumber(itemptr) < nblocks)
-						tidList[numTids++] = *itemptr;
-				}
+				if (ipnulls[i])
+					continue;
+
+				itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
+
+				if (!table_tuple_tid_valid(scan, itemptr))
+					continue;
+
+				tidList[numTids++] = *itemptr;
 			}
 			pfree(ipdatums);
 			pfree(ipnulls);
@ -306,6 +320,7 @@ TidNext(TidScanState *node)
 	EState	   *estate;
 	ScanDirection direction;
 	Snapshot	snapshot;
+	TableScanDesc scan;
 	Relation	heapRelation;
 	TupleTableSlot *slot;
 	ItemPointerData *tidList;
@ -327,6 +342,7 @@ TidNext(TidScanState *node)
 	if (node->tss_TidList == NULL)
 		TidListEval(node);

+	scan = node->ss.ss_currentScanDesc;
 	tidList = node->tss_TidList;
 	numTids = node->tss_NumTids;

@ -365,7 +381,7 @@ TidNext(TidScanState *node)
 		 * current according to our snapshot.
 		 */
 		if (node->tss_isCurrentOf)
-			table_get_latest_tid(heapRelation, snapshot, &tid);
+			table_get_latest_tid(scan, &tid);

 		if (table_fetch_row_version(heapRelation, &tid, snapshot, slot))
 			return slot;
@ -442,6 +458,10 @@ ExecReScanTidScan(TidScanState *node)
 	node->tss_NumTids = 0;
 	node->tss_TidPtr = -1;

+	/* not really necessary, but seems good form */
+	if (node->ss.ss_currentScanDesc)
+		table_rescan(node->ss.ss_currentScanDesc, NULL);
+
 	ExecScanReScan(&node->ss);
 }

@ -455,6 +475,9 @@ ExecReScanTidScan(TidScanState *node)
 void
 ExecEndTidScan(TidScanState *node)
 {
+	if (node->ss.ss_currentScanDesc)
+		table_endscan(node->ss.ss_currentScanDesc);
+
 	/*
 	 * Free the exprcontext
 	 */
--- a/src/backend/utils/adt/tid.c
+++ b/src/backend/utils/adt/tid.c
@ -358,6 +358,7 @@ currtid_byreloid(PG_FUNCTION_ARGS)
 	Relation	rel;
 	AclResult	aclresult;
 	Snapshot	snapshot;
+	TableScanDesc scan;

 	result = (ItemPointer) palloc(sizeof(ItemPointerData));
 	if (!reloid)
@ -380,7 +381,9 @@ currtid_byreloid(PG_FUNCTION_ARGS)
 	ItemPointerCopy(tid, result);

 	snapshot = RegisterSnapshot(GetLatestSnapshot());
-	table_get_latest_tid(rel, snapshot, result);
+	scan = table_beginscan(rel, snapshot, 0, NULL);
+	table_get_latest_tid(scan, result);
+	table_endscan(scan);
 	UnregisterSnapshot(snapshot);

 	table_close(rel, AccessShareLock);
@ -398,6 +401,7 @@ currtid_byrelname(PG_FUNCTION_ARGS)
 	Relation	rel;
 	AclResult	aclresult;
 	Snapshot	snapshot;
+	TableScanDesc scan;

 	relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
 	rel = table_openrv(relrv, AccessShareLock);
@ -415,7 +419,9 @@ currtid_byrelname(PG_FUNCTION_ARGS)
 	ItemPointerCopy(tid, result);

 	snapshot = RegisterSnapshot(GetLatestSnapshot());
-	table_get_latest_tid(rel, snapshot, result);
+	scan = table_beginscan(rel, snapshot, 0, NULL);
+	table_get_latest_tid(scan, result);
+	table_endscan(scan);
 	UnregisterSnapshot(snapshot);

 	table_close(rel, AccessShareLock);
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@ -134,8 +134,7 @@ extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
 					   Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
 					   bool *all_dead, bool first_call);

-extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
-					ItemPointer tid);
+extern void heap_get_latest_tid(TableScanDesc scan, ItemPointer tid);
 extern void setLastTid(const ItemPointer tid);

 extern BulkInsertState GetBulkInsertState(void);
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@ -308,12 +308,17 @@ typedef struct TableAmRoutine
 											Snapshot snapshot,
 											TupleTableSlot *slot);

+	/*
+	 * Is tid valid for a scan of this relation.
+	 */
+	bool		(*tuple_tid_valid) (TableScanDesc scan,
+									ItemPointer tid);
+
 	/*
 	 * Return the latest version of the tuple at `tid`, by updating `tid` to
 	 * point at the newest version.
 	 */
-	void		(*tuple_get_latest_tid) (Relation rel,
-										 Snapshot snapshot,
+	void		(*tuple_get_latest_tid) (TableScanDesc scan,
 										 ItemPointer tid);

 	/*
@ -548,10 +553,10 @@ typedef struct TableAmRoutine
 	/*
 	 * See table_relation_size().
 	 *
-	 * Note that currently a few callers use the MAIN_FORKNUM size to vet the
-	 * validity of tids (e.g. nodeTidscans.c), and others use it to figure out
-	 * the range of potentially interesting blocks (brin, analyze). The
-	 * abstraction around this will need to be improved in the near future.
+	 * Note that currently a few callers use the MAIN_FORKNUM size to figure
+	 * out the range of potentially interesting blocks (brin, analyze). It's
+	 * probable that we'll need to revise the interface for those at some
+	 * point.
 	 */
 	uint64		(*relation_size) (Relation rel, ForkNumber forkNumber);

@ -985,15 +990,25 @@ table_fetch_row_version(Relation rel,
 	return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
 }

+/*
+ * Verify that `tid` is a potentially valid tuple identifier. That doesn't
+ * mean that the pointed to row needs to exist or be visible, but that
+ * attempting to fetch the row (e.g. with table_get_latest_tid() or
+ * table_fetch_row_version()) should not error out if called with that tid.
+ *
+ * `scan` needs to have been started via table_beginscan().
+ */
+static inline bool
+table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
+{
+	return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
+}
+
 /*
 * Return the latest version of the tuple at `tid`, by updating `tid` to
 * point at the newest version.
 */
-static inline void
-table_get_latest_tid(Relation rel, Snapshot snapshot, ItemPointer tid)
-{
-	rel->rd_tableam->tuple_get_latest_tid(rel, snapshot, tid);
-}
+extern void table_get_latest_tid(TableScanDesc scan, ItemPointer tid);

 /*
 * Return true iff tuple in slot satisfies the snapshot.