Pass down "logically unchanged index" hint.

Add an executor aminsert() hint mechanism that informs index AMs that
the incoming index tuple (the tuple that accompanies the hint) is not
being inserted by execution of an SQL statement that logically modifies
any of the index's key columns.

The hint is received by indexes when an UPDATE takes place that does not
apply an optimization like heapam's HOT (though only for indexes where
all key columns are logically unchanged).  Any index tuple that receives
the hint on insert is expected to be a duplicate of at least one
existing older version that is needed for the same logical row.  Related
versions will typically be stored on the same index page, at least
within index AMs that apply the hint.

Recognizing the difference between MVCC version churn duplicates and
true logical row duplicates at the index AM level can help with cleanup
of garbage index tuples.  Cleanup can intelligently target tuples that
are likely to be garbage, without wasting too many cycles on less
promising tuples/pages (index pages with little or no version churn).

This is infrastructure for an upcoming commit that will teach nbtree to
perform bottom-up index deletion.  No index AM actually applies the hint
just yet.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Victor Yegorov <vyegorov@gmail.com>
Discussion: https://postgr.es/m/CAH2-Wz=CEKFa74EScx_hFVshCOn6AA5T-ajFASTdzipdkLTNQQ@mail.gmail.com
This commit is contained in:
Peter Geoghegan 2021-01-13 08:11:00 -08:00
parent 39b03690b5
commit 9dc718bdf2
31 changed files with 214 additions and 25 deletions

View File

@ -198,6 +198,7 @@ bool
blinsert(Relation index, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
BloomState blstate;

View File

@ -192,6 +192,7 @@ extern bool blvalidate(Oid opclassoid);
extern bool blinsert(Relation index, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
extern IndexScanDesc blbeginscan(Relation r, int nkeys, int norderbys);
extern int64 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);

View File

@ -293,6 +293,7 @@ aminsert (Relation indexRelation,
ItemPointer heap_tid,
Relation heapRelation,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo);
</programlisting>
Insert a new tuple into an existing index. The <literal>values</literal> and
@ -308,6 +309,20 @@ aminsert (Relation indexRelation,
look into the heap to verify tuple liveness).
</para>
<para>
The <literal>indexUnchanged</literal> boolean value gives a hint
about the nature of the tuple to be indexed. When it is true,
the tuple is a duplicate of some existing tuple in the index. The
new tuple is a logically unchanged successor MVCC tuple version. This
happens when an <command>UPDATE</command> takes place that does not
modify any columns covered by the index, but nevertheless requires a
new version in the index. The index AM may use this hint to decide
to apply bottom-up index deletion in parts of the index where many
versions of the same logical row accumulate. Note that updating a
non-key column does not affect the value of
<literal>indexUnchanged</literal>.
</para>
<para>
The function's Boolean result value is significant only when
<literal>checkUnique</literal> is <literal>UNIQUE_CHECK_PARTIAL</literal>.

View File

@ -151,6 +151,7 @@ bool
brininsert(Relation idxRel, Datum *values, bool *nulls,
ItemPointer heaptid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
BlockNumber pagesPerRange;

View File

@ -328,7 +328,7 @@ toast_save_datum(Relation rel, Datum value,
toastrel,
toastidxs[i]->rd_index->indisunique ?
UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
NULL);
false, NULL);
}
/*

View File

@ -488,6 +488,7 @@ bool
gininsert(Relation index, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
GinState *ginstate = (GinState *) indexInfo->ii_AmCache;

View File

@ -156,6 +156,7 @@ bool
gistinsert(Relation r, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache;

View File

@ -247,6 +247,7 @@ bool
hashinsert(Relation rel, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
Datum index_values[1];

View File

@ -1956,6 +1956,7 @@ heapam_index_validate_scan(Relation heapRelation,
heapRelation,
indexInfo->ii_Unique ?
UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
false,
indexInfo);
state->tups_inserted += 1;

View File

@ -179,6 +179,7 @@ index_insert(Relation indexRelation,
ItemPointer heap_t_ctid,
Relation heapRelation,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
RELATION_CHECKS;
@ -191,7 +192,8 @@ index_insert(Relation indexRelation,
return indexRelation->rd_indam->aminsert(indexRelation, values, isnull,
heap_t_ctid, heapRelation,
checkUnique, indexInfo);
checkUnique, indexUnchanged,
indexInfo);
}
/*

View File

@ -199,6 +199,7 @@ bool
btinsert(Relation rel, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
bool result;

View File

@ -207,6 +207,7 @@ bool
spginsert(Relation index, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
SpGistState spgstate;

View File

@ -162,6 +162,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple)
heapRelation,
index->rd_index->indisunique ?
UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
false,
indexInfo);
}

View File

@ -175,7 +175,7 @@ unique_key_recheck(PG_FUNCTION_ARGS)
*/
index_insert(indexRel, values, isnull, &checktid,
trigdata->tg_relation, UNIQUE_CHECK_EXISTING,
indexInfo);
false, indexInfo);
}
else
{

View File

@ -342,8 +342,8 @@ CopyMultiInsertBufferFlush(CopyMultiInsertInfo *miinfo,
cstate->cur_lineno = buffer->linenos[i];
recheckIndexes =
ExecInsertIndexTuples(resultRelInfo,
buffer->slots[i], estate, false, NULL,
NIL);
buffer->slots[i], estate, false, false,
NULL, NIL);
ExecARInsertTriggers(estate, resultRelInfo,
slots[i], recheckIndexes,
cstate->transition_capture);
@ -1087,6 +1087,7 @@ CopyFrom(CopyFromState cstate)
myslot,
estate,
false,
false,
NULL,
NIL);
}

View File

@ -71,10 +71,8 @@ int SessionReplicationRole = SESSION_REPLICATION_ROLE_ORIGIN;
static int MyTriggerDepth = 0;
/*
* Note that similar macros also exist in executor/execMain.c. There does not
* appear to be any good header to put them into, given the structures that
* they use, so we let them be duplicated. Be sure to update all if one needs
* to be changed, however.
* The authoritative version of this macro is in executor/execMain.c. Be sure
* to keep everything in sync.
*/
#define GetAllUpdatedColumns(relinfo, estate) \
(bms_union(exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->updatedCols, \

View File

@ -124,6 +124,15 @@ typedef enum
CEOUC_LIVELOCK_PREVENTING_WAIT
} CEOUC_WAIT_MODE;
/*
* The authoritative version of these macro are in executor/execMain.c. Be
* sure to keep everything in sync.
*/
#define GetUpdatedColumns(relinfo, estate) \
(exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->updatedCols)
#define GetExtraUpdatedColumns(relinfo, estate) \
(exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->extraUpdatedCols)
static bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
IndexInfo *indexInfo,
ItemPointer tupleid,
@ -136,6 +145,11 @@ static bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
static bool index_recheck_constraint(Relation index, Oid *constr_procs,
Datum *existing_values, bool *existing_isnull,
Datum *new_values);
static bool index_unchanged_by_update(ResultRelInfo *resultRelInfo,
EState *estate, IndexInfo *indexInfo,
Relation indexRelation);
static bool index_expression_changed_walker(Node *node,
Bitmapset *allUpdatedCols);
/* ----------------------------------------------------------------
* ExecOpenIndices
@ -254,6 +268,16 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
* into all the relations indexing the result relation
* when a heap tuple is inserted into the result relation.
*
* When 'update' is true, executor is performing an UPDATE
* that could not use an optimization like heapam's HOT (in
* more general terms a call to table_tuple_update() took
* place and set 'update_indexes' to true). Receiving this
* hint makes us consider if we should pass down the
* 'indexUnchanged' hint in turn. That's something that we
* figure out for each index_insert() call iff 'update' is
* true. (When 'update' is false we already know not to pass
* the hint to any index.)
*
* Unique and exclusion constraints are enforced at the same
* time. This returns a list of index OIDs for any unique or
* exclusion constraints that are deferred and that had
@ -263,16 +287,13 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
*
* If 'arbiterIndexes' is nonempty, noDupErr applies only to
* those indexes. NIL means noDupErr applies to all indexes.
*
* CAUTION: this must not be called for a HOT update.
* We can't defend against that here for lack of info.
* Should we change the API to make it safer?
* ----------------------------------------------------------------
*/
List *
ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot,
EState *estate,
bool update,
bool noDupErr,
bool *specConflict,
List *arbiterIndexes)
@ -319,6 +340,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
IndexInfo *indexInfo;
bool applyNoDupErr;
IndexUniqueCheck checkUnique;
bool indexUnchanged;
bool satisfiesConstraint;
if (indexRelation == NULL)
@ -389,6 +411,16 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
else
checkUnique = UNIQUE_CHECK_PARTIAL;
/*
* There's definitely going to be an index_insert() call for this
* index. If we're being called as part of an UPDATE statement,
* consider if the 'indexUnchanged' = true hint should be passed.
*/
indexUnchanged = update && index_unchanged_by_update(resultRelInfo,
estate,
indexInfo,
indexRelation);
satisfiesConstraint =
index_insert(indexRelation, /* index relation */
values, /* array of index Datums */
@ -396,6 +428,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
tupleid, /* tid of heap tuple */
heapRelation, /* heap relation */
checkUnique, /* type of uniqueness check to do */
indexUnchanged, /* UPDATE without logical change? */
indexInfo); /* index AM may need this */
/*
@ -899,3 +932,122 @@ index_recheck_constraint(Relation index, Oid *constr_procs,
return true;
}
/*
* Check if ExecInsertIndexTuples() should pass indexUnchanged hint.
*
* When the executor performs an UPDATE that requires a new round of index
* tuples, determine if we should pass 'indexUnchanged' = true hint for one
* single index.
*/
static bool
index_unchanged_by_update(ResultRelInfo *resultRelInfo, EState *estate,
IndexInfo *indexInfo, Relation indexRelation)
{
Bitmapset *updatedCols = GetUpdatedColumns(resultRelInfo, estate);
Bitmapset *extraUpdatedCols = GetExtraUpdatedColumns(resultRelInfo, estate);
Bitmapset *allUpdatedCols;
bool hasexpression = false;
List *idxExprs;
/*
* Check for indexed attribute overlap with updated columns.
*
* Only do this for key columns. A change to a non-key column within an
* INCLUDE index should not be counted here. Non-key column values are
* opaque payload state to the index AM, a little like an extra table TID.
*/
for (int attr = 0; attr < indexInfo->ii_NumIndexKeyAttrs; attr++)
{
int keycol = indexInfo->ii_IndexAttrNumbers[attr];
if (keycol <= 0)
{
/*
* Skip expressions for now, but remember to deal with them later
* on
*/
hasexpression = true;
continue;
}
if (bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
updatedCols) ||
bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
extraUpdatedCols))
{
/* Changed key column -- don't hint for this index */
return false;
}
}
/*
* When we get this far and index has no expressions, return true so that
* index_insert() call will go on to pass 'indexUnchanged' = true hint.
*
* The _absence_ of an indexed key attribute that overlaps with updated
* attributes (in addition to the total absence of indexed expressions)
* shows that the index as a whole is logically unchanged by UPDATE.
*/
if (!hasexpression)
return true;
/*
* Need to pass only one bms to expression_tree_walker helper function.
* Avoid allocating memory in common case where there are no extra cols.
*/
if (!extraUpdatedCols)
allUpdatedCols = updatedCols;
else
allUpdatedCols = bms_union(updatedCols, extraUpdatedCols);
/*
* We have to work slightly harder in the event of indexed expressions,
* but the principle is the same as before: try to find columns (Vars,
* actually) that overlap with known-updated columns.
*
* If we find any matching Vars, don't pass hint for index. Otherwise
* pass hint.
*/
idxExprs = RelationGetIndexExpressions(indexRelation);
hasexpression = index_expression_changed_walker((Node *) idxExprs,
allUpdatedCols);
list_free(idxExprs);
if (extraUpdatedCols)
bms_free(allUpdatedCols);
if (hasexpression)
return false;
return true;
}
/*
* Indexed expression helper for index_unchanged_by_update().
*
* Returns true when Var that appears within allUpdatedCols located.
*/
static bool
index_expression_changed_walker(Node *node, Bitmapset *allUpdatedCols)
{
if (node == NULL)
return false;
if (IsA(node, Var))
{
Var *var = (Var *) node;
if (bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
allUpdatedCols))
{
/* Var was updated -- indicates that we should not hint */
return true;
}
/* Still haven't found a reason to not pass the hint */
return false;
}
return expression_tree_walker(node, index_expression_changed_walker,
(void *) allUpdatedCols);
}

View File

@ -101,10 +101,10 @@ static char *ExecBuildSlotValueDescription(Oid reloid,
static void EvalPlanQualStart(EPQState *epqstate, Plan *planTree);
/*
* Note that GetAllUpdatedColumns() also exists in commands/trigger.c. There does
* not appear to be any good header to put it into, given the structures that
* it uses, so we let them be duplicated. Be sure to update both if one needs
* to be changed, however.
* Note that variants of these macros exists in commands/trigger.c and in
* execIndexing.c. There does not appear to be any good header to put it
* into, given the structures that it uses, so we let them be duplicated. Be
* sure to keep everything in sync.
*/
#define GetInsertedColumns(relinfo, estate) \
(exec_rt_fetch((relinfo)->ri_RangeTableIndex, estate)->insertedCols)

View File

@ -444,8 +444,8 @@ ExecSimpleRelationInsert(ResultRelInfo *resultRelInfo,
if (resultRelInfo->ri_NumIndices > 0)
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
slot, estate, false, NULL,
NIL);
slot, estate, false, false,
NULL, NIL);
/* AFTER ROW INSERT Triggers */
ExecARInsertTriggers(estate, resultRelInfo, slot,
@ -512,8 +512,8 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
slot, estate, false, NULL,
NIL);
slot, estate, true, false,
NULL, NIL);
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(estate, resultRelInfo,

View File

@ -599,7 +599,7 @@ ExecInsert(ModifyTableState *mtstate,
/* insert index entries for tuple */
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
slot, estate, true,
slot, estate, false, true,
&specConflict,
arbiterIndexes);
@ -640,7 +640,7 @@ ExecInsert(ModifyTableState *mtstate,
if (resultRelInfo->ri_NumIndices > 0)
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
slot, estate, false,
NULL, NIL);
false, NULL, NIL);
}
}
@ -1511,7 +1511,7 @@ lreplace:;
/* insert index entries for tuple if necessary */
if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
slot, estate, false,
slot, estate, true, false,
NULL, NIL);
}

View File

@ -1309,7 +1309,8 @@ apply_handle_update(StringInfo s)
InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0);
/*
* Populate updatedCols so that per-column triggers can fire. This could
* Populate updatedCols so that per-column triggers can fire, and so
* executor can correctly pass down indexUnchanged hint. This could
* include more columns than were actually changed on the publisher
* because the logical replication protocol doesn't contain that
* information. But it would for example exclude columns that only exist

View File

@ -110,6 +110,7 @@ typedef bool (*aminsert_function) (Relation indexRelation,
ItemPointer heap_tid,
Relation heapRelation,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
/* bulk delete */

View File

@ -91,6 +91,7 @@ extern void brinbuildempty(Relation index);
extern bool brininsert(Relation idxRel, Datum *values, bool *nulls,
ItemPointer heaptid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
extern IndexScanDesc brinbeginscan(Relation r, int nkeys, int norderbys);
extern int64 bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm);

View File

@ -143,6 +143,7 @@ extern bool index_insert(Relation indexRelation,
ItemPointer heap_t_ctid,
Relation heapRelation,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
extern IndexScanDesc index_beginscan(Relation heapRelation,

View File

@ -116,6 +116,7 @@ extern void ginbuildempty(Relation index);
extern bool gininsert(Relation index, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
extern void ginEntryInsert(GinState *ginstate,
OffsetNumber attnum, Datum key, GinNullCategory category,

View File

@ -403,6 +403,7 @@ extern void gistbuildempty(Relation index);
extern bool gistinsert(Relation r, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
extern MemoryContext createTempGistContext(void);
extern GISTSTATE *initGISTstate(Relation index);

View File

@ -364,6 +364,7 @@ extern void hashbuildempty(Relation index);
extern bool hashinsert(Relation rel, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
extern bool hashgettuple(IndexScanDesc scan, ScanDirection dir);
extern int64 hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);

View File

@ -996,6 +996,7 @@ extern void btbuildempty(Relation index);
extern bool btinsert(Relation rel, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
extern IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys);
extern Size btestimateparallelscan(void);

View File

@ -199,6 +199,7 @@ extern void spgbuildempty(Relation index);
extern bool spginsert(Relation index, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
struct IndexInfo *indexInfo);
/* spgscan.c */

View File

@ -581,6 +581,7 @@ extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative);
extern void ExecCloseIndices(ResultRelInfo *resultRelInfo);
extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate,
bool update,
bool noDupErr,
bool *specConflict, List *arbiterIndexes);
extern bool ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo,

View File

@ -168,6 +168,7 @@ static bool
diinsert(Relation index, Datum *values, bool *isnull,
ItemPointer ht_ctid, Relation heapRel,
IndexUniqueCheck checkUnique,
bool indexUnchanged,
IndexInfo *indexInfo)
{
/* nothing to do */