postgresql/src/backend/replication/logical/relation.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

912 lines
25 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
* relation.c
* PostgreSQL logical replication relation mapping cache
*
* Copyright (c) 2016-2023, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/replication/logical/relation.c
*
* NOTES
* Routines in this file mainly have to do with mapping the properties
* of local replication target relations to the properties of their
* remote counterpart.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#ifdef USE_ASSERT_CHECKING
#include "access/amapi.h"
#endif
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
#include "access/genam.h"
#include "access/table.h"
#include "catalog/namespace.h"
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
#include "catalog/pg_am_d.h"
#include "catalog/pg_subscription_rel.h"
#include "executor/executor.h"
#include "nodes/makefuncs.h"
#include "replication/logicalrelation.h"
#include "replication/worker_internal.h"
#include "utils/inval.h"
static MemoryContext LogicalRepRelMapContext = NULL;
static HTAB *LogicalRepRelMap = NULL;
/*
* Partition map (LogicalRepPartMap)
*
* When a partitioned table is used as replication target, replicated
* operations are actually performed on its leaf partitions, which requires
* the partitions to also be mapped to the remote relation. Parent's entry
* (LogicalRepRelMapEntry) cannot be used as-is for all partitions, because
* individual partitions may have different attribute numbers, which means
* attribute mappings to remote relation's attributes must be maintained
* separately for each partition.
*/
static MemoryContext LogicalRepPartMapContext = NULL;
static HTAB *LogicalRepPartMap = NULL;
typedef struct LogicalRepPartMapEntry
{
Oid partoid; /* LogicalRepPartMap's key */
LogicalRepRelMapEntry relmapentry;
} LogicalRepPartMapEntry;
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
static Oid FindLogicalRepLocalIndex(Relation localrel, LogicalRepRelation *remoterel,
AttrMap *attrMap);
/*
* Relcache invalidation callback for our relation map cache.
*/
static void
logicalrep_relmap_invalidate_cb(Datum arg, Oid reloid)
{
LogicalRepRelMapEntry *entry;
/* Just to be sure. */
if (LogicalRepRelMap == NULL)
return;
if (reloid != InvalidOid)
{
HASH_SEQ_STATUS status;
hash_seq_init(&status, LogicalRepRelMap);
/* TODO, use inverse lookup hashtable? */
while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
{
if (entry->localreloid == reloid)
{
entry->localrelvalid = false;
hash_seq_term(&status);
break;
}
}
}
else
{
/* invalidate all cache entries */
HASH_SEQ_STATUS status;
hash_seq_init(&status, LogicalRepRelMap);
while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
entry->localrelvalid = false;
}
}
/*
* Initialize the relation map cache.
*/
static void
logicalrep_relmap_init(void)
{
HASHCTL ctl;
if (!LogicalRepRelMapContext)
LogicalRepRelMapContext =
AllocSetContextCreate(CacheMemoryContext,
"LogicalRepRelMapContext",
ALLOCSET_DEFAULT_SIZES);
/* Initialize the relation hash table. */
ctl.keysize = sizeof(LogicalRepRelId);
ctl.entrysize = sizeof(LogicalRepRelMapEntry);
ctl.hcxt = LogicalRepRelMapContext;
LogicalRepRelMap = hash_create("logicalrep relation map cache", 128, &ctl,
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
/* Watch for invalidation events. */
CacheRegisterRelcacheCallback(logicalrep_relmap_invalidate_cb,
(Datum) 0);
}
/*
* Free the entry of a relation map cache.
*/
static void
logicalrep_relmap_free_entry(LogicalRepRelMapEntry *entry)
{
LogicalRepRelation *remoterel;
remoterel = &entry->remoterel;
pfree(remoterel->nspname);
pfree(remoterel->relname);
if (remoterel->natts > 0)
{
int i;
for (i = 0; i < remoterel->natts; i++)
pfree(remoterel->attnames[i]);
pfree(remoterel->attnames);
pfree(remoterel->atttyps);
}
bms_free(remoterel->attkeys);
if (entry->attrmap)
free_attrmap(entry->attrmap);
}
/*
* Add new entry or update existing entry in the relation map cache.
*
* Called when new relation mapping is sent by the publisher to update
* our expected view of incoming data from said publisher.
*/
void
logicalrep_relmap_update(LogicalRepRelation *remoterel)
{
MemoryContext oldctx;
LogicalRepRelMapEntry *entry;
bool found;
int i;
if (LogicalRepRelMap == NULL)
logicalrep_relmap_init();
/*
* HASH_ENTER returns the existing entry if present or creates a new one.
*/
entry = hash_search(LogicalRepRelMap, &remoterel->remoteid,
HASH_ENTER, &found);
if (found)
logicalrep_relmap_free_entry(entry);
memset(entry, 0, sizeof(LogicalRepRelMapEntry));
/* Make cached copy of the data */
oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
entry->remoterel.remoteid = remoterel->remoteid;
entry->remoterel.nspname = pstrdup(remoterel->nspname);
entry->remoterel.relname = pstrdup(remoterel->relname);
entry->remoterel.natts = remoterel->natts;
entry->remoterel.attnames = palloc(remoterel->natts * sizeof(char *));
entry->remoterel.atttyps = palloc(remoterel->natts * sizeof(Oid));
for (i = 0; i < remoterel->natts; i++)
{
entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
entry->remoterel.atttyps[i] = remoterel->atttyps[i];
}
entry->remoterel.replident = remoterel->replident;
entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
MemoryContextSwitchTo(oldctx);
}
/*
* Find attribute index in TupleDesc struct by attribute name.
*
* Returns -1 if not found.
*/
static int
logicalrep_rel_att_by_name(LogicalRepRelation *remoterel, const char *attname)
{
int i;
for (i = 0; i < remoterel->natts; i++)
{
if (strcmp(remoterel->attnames[i], attname) == 0)
return i;
}
return -1;
}
/*
* Report error with names of the missing local relation column(s), if any.
*/
static void
logicalrep_report_missing_attrs(LogicalRepRelation *remoterel,
Bitmapset *missingatts)
{
if (!bms_is_empty(missingatts))
{
StringInfoData missingattsbuf;
int missingattcnt = 0;
int i;
initStringInfo(&missingattsbuf);
i = -1;
while ((i = bms_next_member(missingatts, i)) >= 0)
{
missingattcnt++;
if (missingattcnt == 1)
appendStringInfo(&missingattsbuf, _("\"%s\""),
remoterel->attnames[i]);
else
appendStringInfo(&missingattsbuf, _(", \"%s\""),
remoterel->attnames[i]);
}
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg_plural("logical replication target relation \"%s.%s\" is missing replicated column: %s",
"logical replication target relation \"%s.%s\" is missing replicated columns: %s",
missingattcnt,
remoterel->nspname,
remoterel->relname,
missingattsbuf.data)));
}
}
/*
* Check if replica identity matches and mark the updatable flag.
*
* We allow for stricter replica identity (fewer columns) on subscriber as
* that will not stop us from finding unique tuple. IE, if publisher has
* identity (id,timestamp) and subscriber just (id) this will not be a
* problem, but in the opposite scenario it will.
*
* We just mark the relation entry as not updatable here if the local
* replica identity is found to be insufficient for applying
* updates/deletes (inserts don't care!) and leave it to
* check_relation_updatable() to throw the actual error if needed.
*/
static void
logicalrep_rel_mark_updatable(LogicalRepRelMapEntry *entry)
{
Bitmapset *idkey;
LogicalRepRelation *remoterel = &entry->remoterel;
int i;
entry->updatable = true;
idkey = RelationGetIndexAttrBitmap(entry->localrel,
INDEX_ATTR_BITMAP_IDENTITY_KEY);
/* fallback to PK if no replica identity */
if (idkey == NULL)
{
idkey = RelationGetIndexAttrBitmap(entry->localrel,
INDEX_ATTR_BITMAP_PRIMARY_KEY);
/*
* If no replica identity index and no PK, the published table must
* have replica identity FULL.
*/
if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL)
entry->updatable = false;
}
i = -1;
while ((i = bms_next_member(idkey, i)) >= 0)
{
int attnum = i + FirstLowInvalidHeapAttributeNumber;
if (!AttrNumberIsForUserDefinedAttr(attnum))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("logical replication target relation \"%s.%s\" uses "
"system columns in REPLICA IDENTITY index",
remoterel->nspname, remoterel->relname)));
attnum = AttrNumberGetAttrOffset(attnum);
if (entry->attrmap->attnums[attnum] < 0 ||
!bms_is_member(entry->attrmap->attnums[attnum], remoterel->attkeys))
{
entry->updatable = false;
break;
}
}
}
/*
* Open the local relation associated with the remote one.
*
* Rebuilds the Relcache mapping if it was invalidated by local DDL.
*/
LogicalRepRelMapEntry *
logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode)
{
LogicalRepRelMapEntry *entry;
bool found;
LogicalRepRelation *remoterel;
if (LogicalRepRelMap == NULL)
logicalrep_relmap_init();
/* Search for existing entry. */
entry = hash_search(LogicalRepRelMap, &remoteid,
HASH_FIND, &found);
if (!found)
elog(ERROR, "no relation map entry for remote relation ID %u",
remoteid);
remoterel = &entry->remoterel;
/* Ensure we don't leak a relcache refcount. */
if (entry->localrel)
elog(ERROR, "remote relation ID %u is already open", remoteid);
/*
* When opening and locking a relation, pending invalidation messages are
* processed which can invalidate the relation. Hence, if the entry is
* currently considered valid, try to open the local relation by OID and
* see if invalidation ensues.
*/
if (entry->localrelvalid)
{
entry->localrel = try_table_open(entry->localreloid, lockmode);
if (!entry->localrel)
{
/* Table was renamed or dropped. */
entry->localrelvalid = false;
}
else if (!entry->localrelvalid)
{
/* Note we release the no-longer-useful lock here. */
table_close(entry->localrel, lockmode);
entry->localrel = NULL;
}
}
/*
* If the entry has been marked invalid since we last had lock on it,
* re-open the local relation by name and rebuild all derived data.
*/
if (!entry->localrelvalid)
{
Oid relid;
TupleDesc desc;
MemoryContext oldctx;
int i;
Bitmapset *missingatts;
/* Release the no-longer-useful attrmap, if any. */
if (entry->attrmap)
{
free_attrmap(entry->attrmap);
entry->attrmap = NULL;
}
/* Try to find and lock the relation by name. */
relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname,
remoterel->relname, -1),
lockmode, true);
if (!OidIsValid(relid))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("logical replication target relation \"%s.%s\" does not exist",
remoterel->nspname, remoterel->relname)));
entry->localrel = table_open(relid, NoLock);
entry->localreloid = relid;
/* Check for supported relkind. */
CheckSubscriptionRelkind(entry->localrel->rd_rel->relkind,
remoterel->nspname, remoterel->relname);
/*
* Build the mapping of local attribute numbers to remote attribute
* numbers and validate that we don't miss any replicated columns as
* that would result in potentially unwanted data loss.
*/
desc = RelationGetDescr(entry->localrel);
oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
entry->attrmap = make_attrmap(desc->natts);
MemoryContextSwitchTo(oldctx);
/* check and report missing attrs, if any */
missingatts = bms_add_range(NULL, 0, remoterel->natts - 1);
for (i = 0; i < desc->natts; i++)
{
int attnum;
Form_pg_attribute attr = TupleDescAttr(desc, i);
if (attr->attisdropped || attr->attgenerated)
{
entry->attrmap->attnums[i] = -1;
continue;
}
attnum = logicalrep_rel_att_by_name(remoterel,
NameStr(attr->attname));
entry->attrmap->attnums[i] = attnum;
if (attnum >= 0)
missingatts = bms_del_member(missingatts, attnum);
}
logicalrep_report_missing_attrs(remoterel, missingatts);
/* be tidy */
bms_free(missingatts);
/*
* Set if the table's replica identity is enough to apply
* update/delete.
*/
logicalrep_rel_mark_updatable(entry);
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
/*
* Finding a usable index is an infrequent task. It occurs when an
* operation is first performed on the relation, or after invalidation
* of the relation cache entry (such as ANALYZE or CREATE/DROP index
* on the relation).
*/
entry->localindexoid = FindLogicalRepLocalIndex(entry->localrel, remoterel,
entry->attrmap);
entry->localrelvalid = true;
}
if (entry->state != SUBREL_STATE_READY)
entry->state = GetSubscriptionRelState(MySubscription->oid,
entry->localreloid,
&entry->statelsn);
return entry;
}
/*
* Close the previously opened logical relation.
*/
void
logicalrep_rel_close(LogicalRepRelMapEntry *rel, LOCKMODE lockmode)
{
table_close(rel->localrel, lockmode);
rel->localrel = NULL;
}
/*
* Partition cache: look up partition LogicalRepRelMapEntry's
*
* Unlike relation map cache, this is keyed by partition OID, not remote
* relation OID, because we only have to use this cache in the case where
* partitions are not directly mapped to any remote relation, such as when
* replication is occurring with one of their ancestors as target.
*/
/*
* Relcache invalidation callback
*/
static void
logicalrep_partmap_invalidate_cb(Datum arg, Oid reloid)
{
LogicalRepPartMapEntry *entry;
/* Just to be sure. */
if (LogicalRepPartMap == NULL)
return;
if (reloid != InvalidOid)
{
HASH_SEQ_STATUS status;
hash_seq_init(&status, LogicalRepPartMap);
/* TODO, use inverse lookup hashtable? */
while ((entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
{
if (entry->relmapentry.localreloid == reloid)
{
entry->relmapentry.localrelvalid = false;
hash_seq_term(&status);
break;
}
}
}
else
{
/* invalidate all cache entries */
HASH_SEQ_STATUS status;
hash_seq_init(&status, LogicalRepPartMap);
while ((entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
entry->relmapentry.localrelvalid = false;
}
}
/*
* Reset the entries in the partition map that refer to remoterel.
*
* Called when new relation mapping is sent by the publisher to update our
* expected view of incoming data from said publisher.
*
* Note that we don't update the remoterel information in the entry here,
* we will update the information in logicalrep_partition_open to avoid
* unnecessary work.
*/
void
logicalrep_partmap_reset_relmap(LogicalRepRelation *remoterel)
{
HASH_SEQ_STATUS status;
LogicalRepPartMapEntry *part_entry;
LogicalRepRelMapEntry *entry;
if (LogicalRepPartMap == NULL)
return;
hash_seq_init(&status, LogicalRepPartMap);
while ((part_entry = (LogicalRepPartMapEntry *) hash_seq_search(&status)) != NULL)
{
entry = &part_entry->relmapentry;
if (entry->remoterel.remoteid != remoterel->remoteid)
continue;
logicalrep_relmap_free_entry(entry);
memset(entry, 0, sizeof(LogicalRepRelMapEntry));
}
}
/*
* Initialize the partition map cache.
*/
static void
logicalrep_partmap_init(void)
{
HASHCTL ctl;
if (!LogicalRepPartMapContext)
LogicalRepPartMapContext =
AllocSetContextCreate(CacheMemoryContext,
"LogicalRepPartMapContext",
ALLOCSET_DEFAULT_SIZES);
/* Initialize the relation hash table. */
ctl.keysize = sizeof(Oid); /* partition OID */
ctl.entrysize = sizeof(LogicalRepPartMapEntry);
ctl.hcxt = LogicalRepPartMapContext;
LogicalRepPartMap = hash_create("logicalrep partition map cache", 64, &ctl,
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
/* Watch for invalidation events. */
CacheRegisterRelcacheCallback(logicalrep_partmap_invalidate_cb,
(Datum) 0);
}
/*
* logicalrep_partition_open
*
* Returned entry reuses most of the values of the root table's entry, save
* the attribute map, which can be different for the partition. However,
* we must physically copy all the data, in case the root table's entry
* gets freed/rebuilt.
*
* Note there's no logicalrep_partition_close, because the caller closes the
* component relation.
*/
LogicalRepRelMapEntry *
logicalrep_partition_open(LogicalRepRelMapEntry *root,
Relation partrel, AttrMap *map)
{
LogicalRepRelMapEntry *entry;
LogicalRepPartMapEntry *part_entry;
LogicalRepRelation *remoterel = &root->remoterel;
Oid partOid = RelationGetRelid(partrel);
AttrMap *attrmap = root->attrmap;
bool found;
MemoryContext oldctx;
if (LogicalRepPartMap == NULL)
logicalrep_partmap_init();
/* Search for existing entry. */
part_entry = (LogicalRepPartMapEntry *) hash_search(LogicalRepPartMap,
&partOid,
HASH_ENTER, &found);
entry = &part_entry->relmapentry;
/*
* We must always overwrite entry->localrel with the latest partition
* Relation pointer, because the Relation pointed to by the old value may
* have been cleared after the caller would have closed the partition
* relation after the last use of this entry. Note that localrelvalid is
* only updated by the relcache invalidation callback, so it may still be
* true irrespective of whether the Relation pointed to by localrel has
* been cleared or not.
*/
if (found && entry->localrelvalid)
{
entry->localrel = partrel;
return entry;
}
/* Switch to longer-lived context. */
oldctx = MemoryContextSwitchTo(LogicalRepPartMapContext);
if (!found)
{
memset(part_entry, 0, sizeof(LogicalRepPartMapEntry));
part_entry->partoid = partOid;
}
/* Release the no-longer-useful attrmap, if any. */
if (entry->attrmap)
{
free_attrmap(entry->attrmap);
entry->attrmap = NULL;
}
if (!entry->remoterel.remoteid)
{
int i;
/* Remote relation is copied as-is from the root entry. */
entry->remoterel.remoteid = remoterel->remoteid;
entry->remoterel.nspname = pstrdup(remoterel->nspname);
entry->remoterel.relname = pstrdup(remoterel->relname);
entry->remoterel.natts = remoterel->natts;
entry->remoterel.attnames = palloc(remoterel->natts * sizeof(char *));
entry->remoterel.atttyps = palloc(remoterel->natts * sizeof(Oid));
for (i = 0; i < remoterel->natts; i++)
{
entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
entry->remoterel.atttyps[i] = remoterel->atttyps[i];
}
entry->remoterel.replident = remoterel->replident;
entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
}
entry->localrel = partrel;
entry->localreloid = partOid;
/*
* If the partition's attributes don't match the root relation's, we'll
* need to make a new attrmap which maps partition attribute numbers to
* remoterel's, instead of the original which maps root relation's
* attribute numbers to remoterel's.
*
* Note that 'map' which comes from the tuple routing data structure
* contains 1-based attribute numbers (of the parent relation). However,
* the map in 'entry', a logical replication data structure, contains
* 0-based attribute numbers (of the remote relation).
*/
if (map)
{
AttrNumber attno;
entry->attrmap = make_attrmap(map->maplen);
for (attno = 0; attno < entry->attrmap->maplen; attno++)
{
AttrNumber root_attno = map->attnums[attno];
/* 0 means it's a dropped attribute. See comments atop AttrMap. */
if (root_attno == 0)
entry->attrmap->attnums[attno] = -1;
else
entry->attrmap->attnums[attno] = attrmap->attnums[root_attno - 1];
}
}
else
{
/* Lacking copy_attmap, do this the hard way. */
entry->attrmap = make_attrmap(attrmap->maplen);
memcpy(entry->attrmap->attnums, attrmap->attnums,
attrmap->maplen * sizeof(AttrNumber));
}
/* Set if the table's replica identity is enough to apply update/delete. */
logicalrep_rel_mark_updatable(entry);
/* state and statelsn are left set to 0. */
MemoryContextSwitchTo(oldctx);
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
/*
* Finding a usable index is an infrequent task. It occurs when an
* operation is first performed on the relation, or after invalidation of
* the relation cache entry (such as ANALYZE or CREATE/DROP index on the
* relation).
*
* We also prefer to run this code on the oldctx so that we do not leak
* anything in the LogicalRepPartMapContext (hence CacheMemoryContext).
*/
entry->localindexoid = FindLogicalRepLocalIndex(partrel, remoterel,
entry->attrmap);
entry->localrelvalid = true;
return entry;
}
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
/*
* Returns the oid of an index that can be used by the apply worker to scan
* the relation.
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
*
* We expect to call this function when REPLICA IDENTITY FULL is defined for
* the remote relation.
*
* If no suitable index is found, returns InvalidOid.
*/
static Oid
FindUsableIndexForReplicaIdentityFull(Relation localrel, AttrMap *attrmap)
{
List *idxlist = RelationGetIndexList(localrel);
ListCell *lc;
foreach(lc, idxlist)
{
Oid idxoid = lfirst_oid(lc);
bool isUsableIdx;
Relation idxRel;
IndexInfo *idxInfo;
idxRel = index_open(idxoid, AccessShareLock);
idxInfo = BuildIndexInfo(idxRel);
isUsableIdx = IsIndexUsableForReplicaIdentityFull(idxInfo, attrmap);
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
index_close(idxRel, AccessShareLock);
/* Return the first eligible index found */
if (isUsableIdx)
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
return idxoid;
}
return InvalidOid;
}
/*
* Returns true if the index is usable for replica identity full.
*
* The index must be btree or hash, non-partial, and the leftmost field must be
* a column (not an expression) that references the remote relation column. These
* limitations help to keep the index scan similar to PK/RI index scans.
*
* attrmap is a map of local attributes to remote ones. We can consult this
* map to check whether the local index attribute has a corresponding remote
* attribute.
*
* Note that the limitations of index scans for replica identity full only
* adheres to a subset of the limitations of PK/RI. For example, we support
* columns that are marked as [NULL] or we are not interested in the [NOT
* DEFERRABLE] aspect of constraints here. It works for us because we always
* compare the tuples for non-PK/RI index scans. See
* RelationFindReplTupleByIndex().
*
* The reasons why only Btree and Hash indexes can be considered as usable are:
*
* 1) Other index access methods don't have a fixed strategy for equality
* operation. Refer get_equal_strategy_number_for_am().
*
* 2) For indexes other than PK and REPLICA IDENTITY, we need to match the
* local and remote tuples. The equality routine tuples_equal() cannot accept
* a datatype (e.g. point or box) that does not have a default operator class
* for Btree or Hash.
*
* XXX: Note that BRIN and GIN indexes do not implement "amgettuple" which
* will be used later to fetch the tuples. See RelationFindReplTupleByIndex().
*
* XXX: To support partial indexes, the required changes are likely to be larger.
* If none of the tuples satisfy the expression for the index scan, we fall-back
* to sequential execution, which might not be a good idea in some cases.
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
*/
bool
IsIndexUsableForReplicaIdentityFull(IndexInfo *indexInfo, AttrMap *attrmap)
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
{
AttrNumber keycol;
/* Ensure that the index access method has a valid equal strategy */
if (get_equal_strategy_number_for_am(indexInfo->ii_Am) == InvalidStrategy)
return false;
/* The index must not be a partial index */
if (indexInfo->ii_Predicate != NIL)
return false;
Assert(indexInfo->ii_NumIndexAttrs >= 1);
/* The leftmost index field must not be an expression */
keycol = indexInfo->ii_IndexAttrNumbers[0];
if (!AttributeNumberIsValid(keycol))
return false;
/*
* And the leftmost index field must reference the remote relation column.
* This is because if it doesn't, the sequential scan is favorable over
* index scan in most cases.
*/
if (attrmap->maplen <= AttrNumberGetAttrOffset(keycol) ||
attrmap->attnums[AttrNumberGetAttrOffset(keycol)] < 0)
return false;
#ifdef USE_ASSERT_CHECKING
{
IndexAmRoutine *amroutine;
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
/* The given index access method must implement amgettuple. */
amroutine = GetIndexAmRoutineByAmId(indexInfo->ii_Am, false);
Assert(amroutine->amgettuple != NULL);
}
#endif
return true;
Allow the use of indexes other than PK and REPLICA IDENTITY on the subscriber. Using REPLICA IDENTITY FULL on the publisher can lead to a full table scan per tuple change on the subscription when REPLICA IDENTITY or PK index is not available. This makes REPLICA IDENTITY FULL impractical to use apart from some small number of use cases. This patch allows using indexes other than PRIMARY KEY or REPLICA IDENTITY on the subscriber during apply of update/delete. The index that can be used must be a btree index, not a partial index, and it must have at least one column reference (i.e. cannot consist of only expressions). We can uplift these restrictions in the future. There is no smart mechanism to pick the index. If there is more than one index that satisfies these requirements, we just pick the first one. We discussed using some of the optimizer's low-level APIs for this but ruled it out as that can be a maintenance burden in the long run. This patch improves the performance in the vast majority of cases and the improvement is proportional to the amount of data in the table. However, there could be some regression in a small number of cases where the indexes have a lot of duplicate and dead rows. It was discussed that those are mostly impractical cases but we can provide a table or subscription level option to disable this feature if required. Author: Onder Kalaci, Amit Kapila Reviewed-by: Peter Smith, Shi yu, Hou Zhijie, Vignesh C, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/CACawEhVLqmAAyPXdHEPv1ssU2c=dqOniiGz7G73HfyS7+nGV4w@mail.gmail.com
2023-03-15 04:06:38 +01:00
}
/*
* Get replica identity index or if it is not defined a primary key.
*
* If neither is defined, returns InvalidOid
*/
Oid
GetRelationIdentityOrPK(Relation rel)
{
Oid idxoid;
idxoid = RelationGetReplicaIndex(rel);
if (!OidIsValid(idxoid))
idxoid = RelationGetPrimaryKeyIndex(rel);
return idxoid;
}
/*
* Returns the index oid if we can use an index for subscriber. Otherwise,
* returns InvalidOid.
*/
static Oid
FindLogicalRepLocalIndex(Relation localrel, LogicalRepRelation *remoterel,
AttrMap *attrMap)
{
Oid idxoid;
/*
* We never need index oid for partitioned tables, always rely on leaf
* partition's index.
*/
if (localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
return InvalidOid;
/*
* Simple case, we already have a primary key or a replica identity index.
*/
idxoid = GetRelationIdentityOrPK(localrel);
if (OidIsValid(idxoid))
return idxoid;
if (remoterel->replident == REPLICA_IDENTITY_FULL)
{
/*
* We are looking for one more opportunity for using an index. If
* there are any indexes defined on the local relation, try to pick a
* suitable index.
*
* The index selection safely assumes that all the columns are going
* to be available for the index scan given that remote relation has
* replica identity full.
*
* Note that we are not using the planner to find the cheapest method
* to scan the relation as that would require us to either use lower
* level planner functions which would be a maintenance burden in the
* long run or use the full-fledged planner which could cause
* overhead.
*/
return FindUsableIndexForReplicaIdentityFull(localrel, attrMap);
}
return InvalidOid;
}