postgresql/src/backend/catalog/index.c

3505 lines
110 KiB
C

/*-------------------------------------------------------------------------
*
* index.c
* code to create and destroy POSTGRES index relations
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/catalog/index.c
*
*
* INTERFACE ROUTINES
* index_create() - Create a cataloged index relation
* index_drop() - Removes index relation from catalogs
* BuildIndexInfo() - Prepare to insert index tuples
* FormIndexDatum() - Construct datum vector for one index tuple
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <unistd.h>
#include "access/multixact.h"
#include "access/relscan.h"
#include "access/sysattr.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
#include "bootstrap/bootstrap.h"
#include "catalog/catalog.h"
#include "catalog/dependency.h"
#include "catalog/heap.h"
#include "catalog/index.h"
#include "catalog/objectaccess.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_constraint.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
#include "catalog/storage.h"
#include "commands/tablecmds.h"
#include "commands/trigger.h"
#include "executor/executor.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "parser/parser.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
#include "storage/procarray.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/guc.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
#include "utils/tuplesort.h"
#include "utils/snapmgr.h"
#include "utils/tqual.h"
/* Potentially set by contrib/pg_upgrade_support functions */
Oid binary_upgrade_next_index_pg_class_oid = InvalidOid;
/* state info for validate_index bulkdelete callback */
typedef struct
{
Tuplesortstate *tuplesort; /* for sorting the index TIDs */
/* statistics (for debug purposes only): */
double htups,
itups,
tups_inserted;
} v_i_state;
/* non-export function prototypes */
static bool relationHasPrimaryKey(Relation rel);
static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
IndexInfo *indexInfo,
List *indexColNames,
Oid accessMethodObjectId,
Oid *collationObjectId,
Oid *classObjectId);
static void InitializeAttributeOids(Relation indexRelation,
int numatts, Oid indexoid);
static void AppendAttributeTuples(Relation indexRelation, int numatts);
static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
IndexInfo *indexInfo,
Oid *collationOids,
Oid *classOids,
int16 *coloptions,
bool primary,
bool isexclusion,
bool immediate,
bool isvalid);
static void index_update_stats(Relation rel,
bool hasindex, bool isprimary,
double reltuples);
static void IndexCheckExclusion(Relation heapRelation,
Relation indexRelation,
IndexInfo *indexInfo);
static bool validate_index_callback(ItemPointer itemptr, void *opaque);
static void validate_index_heapscan(Relation heapRelation,
Relation indexRelation,
IndexInfo *indexInfo,
Snapshot snapshot,
v_i_state *state);
static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
static void SetReindexProcessing(Oid heapOid, Oid indexOid);
static void ResetReindexProcessing(void);
static void SetReindexPending(List *indexes);
static void RemoveReindexPending(Oid indexOid);
static void ResetReindexPending(void);
/*
* relationHasPrimaryKey
* See whether an existing relation has a primary key.
*
* Caller must have suitable lock on the relation.
*
* Note: we intentionally do not check IndexIsValid here; that's because this
* is used to enforce the rule that there can be only one indisprimary index,
* and we want that to be true even if said index is invalid.
*/
static bool
relationHasPrimaryKey(Relation rel)
{
bool result = false;
List *indexoidlist;
ListCell *indexoidscan;
/*
* Get the list of index OIDs for the table from the relcache, and look up
* each one in the pg_index syscache until we find one marked primary key
* (hopefully there isn't more than one such).
*/
indexoidlist = RelationGetIndexList(rel);
foreach(indexoidscan, indexoidlist)
{
Oid indexoid = lfirst_oid(indexoidscan);
HeapTuple indexTuple;
indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
if (!HeapTupleIsValid(indexTuple)) /* should not happen */
elog(ERROR, "cache lookup failed for index %u", indexoid);
result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
ReleaseSysCache(indexTuple);
if (result)
break;
}
list_free(indexoidlist);
return result;
}
/*
* index_check_primary_key
* Apply special checks needed before creating a PRIMARY KEY index
*
* This processing used to be in DefineIndex(), but has been split out
* so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
*
* We check for a pre-existing primary key, and that all columns of the index
* are simple column references (not expressions), and that all those
* columns are marked NOT NULL. If they aren't (which can only happen during
* ALTER TABLE ADD CONSTRAINT, since the parser forces such columns to be
* created NOT NULL during CREATE TABLE), do an ALTER SET NOT NULL to mark
* them so --- or fail if they are not in fact nonnull.
*
* Caller had better have at least ShareLock on the table, else the not-null
* checking isn't trustworthy.
*/
void
index_check_primary_key(Relation heapRel,
IndexInfo *indexInfo,
bool is_alter_table)
{
List *cmds;
int i;
/*
* If ALTER TABLE, check that there isn't already a PRIMARY KEY. In CREATE
* TABLE, we have faith that the parser rejected multiple pkey clauses;
* and CREATE INDEX doesn't have a way to say PRIMARY KEY, so it's no
* problem either.
*/
if (is_alter_table &&
relationHasPrimaryKey(heapRel))
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("multiple primary keys for table \"%s\" are not allowed",
RelationGetRelationName(heapRel))));
}
/*
* Check that all of the attributes in a primary key are marked as not
* null, otherwise attempt to ALTER TABLE .. SET NOT NULL
*/
cmds = NIL;
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
{
AttrNumber attnum = indexInfo->ii_KeyAttrNumbers[i];
HeapTuple atttuple;
Form_pg_attribute attform;
if (attnum == 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("primary keys cannot be expressions")));
/* System attributes are never null, so no need to check */
if (attnum < 0)
continue;
atttuple = SearchSysCache2(ATTNUM,
ObjectIdGetDatum(RelationGetRelid(heapRel)),
Int16GetDatum(attnum));
if (!HeapTupleIsValid(atttuple))
elog(ERROR, "cache lookup failed for attribute %d of relation %u",
attnum, RelationGetRelid(heapRel));
attform = (Form_pg_attribute) GETSTRUCT(atttuple);
if (!attform->attnotnull)
{
/* Add a subcommand to make this one NOT NULL */
AlterTableCmd *cmd = makeNode(AlterTableCmd);
cmd->subtype = AT_SetNotNull;
cmd->name = pstrdup(NameStr(attform->attname));
cmds = lappend(cmds, cmd);
}
ReleaseSysCache(atttuple);
}
/*
* XXX: Shouldn't the ALTER TABLE .. SET NOT NULL cascade to child tables?
* Currently, since the PRIMARY KEY itself doesn't cascade, we don't
* cascade the notnull constraint(s) either; but this is pretty debatable.
*
* XXX: possible future improvement: when being called from ALTER TABLE,
* it would be more efficient to merge this with the outer ALTER TABLE, so
* as to avoid two scans. But that seems to complicate DefineIndex's API
* unduly.
*/
if (cmds)
AlterTableInternal(RelationGetRelid(heapRel), cmds, false);
}
/*
* ConstructTupleDescriptor
*
* Build an index tuple descriptor for a new index
*/
static TupleDesc
ConstructTupleDescriptor(Relation heapRelation,
IndexInfo *indexInfo,
List *indexColNames,
Oid accessMethodObjectId,
Oid *collationObjectId,
Oid *classObjectId)
{
int numatts = indexInfo->ii_NumIndexAttrs;
ListCell *colnames_item = list_head(indexColNames);
ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
HeapTuple amtuple;
Form_pg_am amform;
TupleDesc heapTupDesc;
TupleDesc indexTupDesc;
int natts; /* #atts in heap rel --- for error checks */
int i;
/* We need access to the index AM's pg_am tuple */
amtuple = SearchSysCache1(AMOID,
ObjectIdGetDatum(accessMethodObjectId));
if (!HeapTupleIsValid(amtuple))
elog(ERROR, "cache lookup failed for access method %u",
accessMethodObjectId);
amform = (Form_pg_am) GETSTRUCT(amtuple);
/* ... and to the table's tuple descriptor */
heapTupDesc = RelationGetDescr(heapRelation);
natts = RelationGetForm(heapRelation)->relnatts;
/*
* allocate the new tuple descriptor
*/
indexTupDesc = CreateTemplateTupleDesc(numatts, false);
/*
* For simple index columns, we copy the pg_attribute row from the parent
* relation and modify it as necessary. For expressions we have to cons
* up a pg_attribute row the hard way.
*/
for (i = 0; i < numatts; i++)
{
AttrNumber atnum = indexInfo->ii_KeyAttrNumbers[i];
Form_pg_attribute to = indexTupDesc->attrs[i];
HeapTuple tuple;
Form_pg_type typeTup;
Form_pg_opclass opclassTup;
Oid keyType;
if (atnum != 0)
{
/* Simple index column */
Form_pg_attribute from;
if (atnum < 0)
{
/*
* here we are indexing on a system attribute (-1...-n)
*/
from = SystemAttributeDefinition(atnum,
heapRelation->rd_rel->relhasoids);
}
else
{
/*
* here we are indexing on a normal attribute (1...n)
*/
if (atnum > natts) /* safety check */
elog(ERROR, "invalid column number %d", atnum);
from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
}
/*
* now that we've determined the "from", let's copy the tuple desc
* data...
*/
memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
/*
* Fix the stuff that should not be the same as the underlying
* attr
*/
to->attnum = i + 1;
to->attstattarget = -1;
to->attcacheoff = -1;
to->attnotnull = false;
to->atthasdef = false;
to->attislocal = true;
to->attinhcount = 0;
to->attcollation = collationObjectId[i];
}
else
{
/* Expressional index */
Node *indexkey;
MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
if (indexpr_item == NULL) /* shouldn't happen */
elog(ERROR, "too few entries in indexprs list");
indexkey = (Node *) lfirst(indexpr_item);
indexpr_item = lnext(indexpr_item);
/*
* Lookup the expression type in pg_type for the type length etc.
*/
keyType = exprType(indexkey);
tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for type %u", keyType);
typeTup = (Form_pg_type) GETSTRUCT(tuple);
/*
* Assign some of the attributes values. Leave the rest as 0.
*/
to->attnum = i + 1;
to->atttypid = keyType;
to->attlen = typeTup->typlen;
to->attbyval = typeTup->typbyval;
to->attstorage = typeTup->typstorage;
to->attalign = typeTup->typalign;
to->attstattarget = -1;
to->attcacheoff = -1;
to->atttypmod = -1;
to->attislocal = true;
to->attcollation = collationObjectId[i];
ReleaseSysCache(tuple);
/*
* Make sure the expression yields a type that's safe to store in
* an index. We need this defense because we have index opclasses
* for pseudo-types such as "record", and the actually stored type
* had better be safe; eg, a named composite type is okay, an
* anonymous record type is not. The test is the same as for
* whether a table column is of a safe type (which is why we
* needn't check for the non-expression case).
*/
CheckAttributeType(NameStr(to->attname),
to->atttypid, to->attcollation,
NIL, false);
}
/*
* We do not yet have the correct relation OID for the index, so just
* set it invalid for now. InitializeAttributeOids() will fix it
* later.
*/
to->attrelid = InvalidOid;
/*
* Set the attribute name as specified by caller.
*/
if (colnames_item == NULL) /* shouldn't happen */
elog(ERROR, "too few entries in colnames list");
namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
colnames_item = lnext(colnames_item);
/*
* Check the opclass and index AM to see if either provides a keytype
* (overriding the attribute type). Opclass takes precedence.
*/
tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for opclass %u",
classObjectId[i]);
opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
if (OidIsValid(opclassTup->opckeytype))
keyType = opclassTup->opckeytype;
else
keyType = amform->amkeytype;
ReleaseSysCache(tuple);
if (OidIsValid(keyType) && keyType != to->atttypid)
{
/* index value and heap value have different types */
tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for type %u", keyType);
typeTup = (Form_pg_type) GETSTRUCT(tuple);
to->atttypid = keyType;
to->atttypmod = -1;
to->attlen = typeTup->typlen;
to->attbyval = typeTup->typbyval;
to->attalign = typeTup->typalign;
to->attstorage = typeTup->typstorage;
ReleaseSysCache(tuple);
}
}
ReleaseSysCache(amtuple);
return indexTupDesc;
}
/* ----------------------------------------------------------------
* InitializeAttributeOids
* ----------------------------------------------------------------
*/
static void
InitializeAttributeOids(Relation indexRelation,
int numatts,
Oid indexoid)
{
TupleDesc tupleDescriptor;
int i;
tupleDescriptor = RelationGetDescr(indexRelation);
for (i = 0; i < numatts; i += 1)
tupleDescriptor->attrs[i]->attrelid = indexoid;
}
/* ----------------------------------------------------------------
* AppendAttributeTuples
* ----------------------------------------------------------------
*/
static void
AppendAttributeTuples(Relation indexRelation, int numatts)
{
Relation pg_attribute;
CatalogIndexState indstate;
TupleDesc indexTupDesc;
int i;
/*
* open the attribute relation and its indexes
*/
pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
indstate = CatalogOpenIndexes(pg_attribute);
/*
* insert data from new index's tupdesc into pg_attribute
*/
indexTupDesc = RelationGetDescr(indexRelation);
for (i = 0; i < numatts; i++)
{
/*
* There used to be very grotty code here to set these fields, but I
* think it's unnecessary. They should be set already.
*/
Assert(indexTupDesc->attrs[i]->attnum == i + 1);
Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
}
CatalogCloseIndexes(indstate);
heap_close(pg_attribute, RowExclusiveLock);
}
/* ----------------------------------------------------------------
* UpdateIndexRelation
*
* Construct and insert a new entry in the pg_index catalog
* ----------------------------------------------------------------
*/
static void
UpdateIndexRelation(Oid indexoid,
Oid heapoid,
IndexInfo *indexInfo,
Oid *collationOids,
Oid *classOids,
int16 *coloptions,
bool primary,
bool isexclusion,
bool immediate,
bool isvalid)
{
int2vector *indkey;
oidvector *indcollation;
oidvector *indclass;
int2vector *indoption;
Datum exprsDatum;
Datum predDatum;
Datum values[Natts_pg_index];
bool nulls[Natts_pg_index];
Relation pg_index;
HeapTuple tuple;
int i;
/*
* Copy the index key, opclass, and indoption info into arrays (should we
* make the caller pass them like this to start with?)
*/
indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexAttrs);
indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
/*
* Convert the index expressions (if any) to a text datum
*/
if (indexInfo->ii_Expressions != NIL)
{
char *exprsString;
exprsString = nodeToString(indexInfo->ii_Expressions);
exprsDatum = CStringGetTextDatum(exprsString);
pfree(exprsString);
}
else
exprsDatum = (Datum) 0;
/*
* Convert the index predicate (if any) to a text datum. Note we convert
* implicit-AND format to normal explicit-AND for storage.
*/
if (indexInfo->ii_Predicate != NIL)
{
char *predString;
predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
predDatum = CStringGetTextDatum(predString);
pfree(predString);
}
else
predDatum = (Datum) 0;
/*
* open the system catalog index relation
*/
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
/*
* Build a pg_index tuple
*/
MemSet(nulls, false, sizeof(nulls));
values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
/* we set isvalid and isready the same way */
values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
values[Anum_pg_index_indexprs - 1] = exprsDatum;
if (exprsDatum == (Datum) 0)
nulls[Anum_pg_index_indexprs - 1] = true;
values[Anum_pg_index_indpred - 1] = predDatum;
if (predDatum == (Datum) 0)
nulls[Anum_pg_index_indpred - 1] = true;
tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
/*
* insert the tuple into the pg_index catalog
*/
simple_heap_insert(pg_index, tuple);
/* update the indexes on pg_index */
CatalogUpdateIndexes(pg_index, tuple);
/*
* close the relation and free the tuple
*/
heap_close(pg_index, RowExclusiveLock);
heap_freetuple(tuple);
}
/*
* index_create
*
* heapRelation: table to build index on (suitably locked by caller)
* indexRelationName: what it say
* indexRelationId: normally, pass InvalidOid to let this routine
* generate an OID for the index. During bootstrap this may be
* nonzero to specify a preselected OID.
* relFileNode: normally, pass InvalidOid to get new storage. May be
* nonzero to attach an existing valid build.
* indexInfo: same info executor uses to insert into the index
* indexColNames: column names to use for index (List of char *)
* accessMethodObjectId: OID of index AM to use
* tableSpaceId: OID of tablespace to use
* collationObjectId: array of collation OIDs, one per index column
* classObjectId: array of index opclass OIDs, one per index column
* coloptions: array of per-index-column indoption settings
* reloptions: AM-specific options
* isprimary: index is a PRIMARY KEY
* isconstraint: index is owned by PRIMARY KEY, UNIQUE, or EXCLUSION constraint
* deferrable: constraint is DEFERRABLE
* initdeferred: constraint is INITIALLY DEFERRED
* allow_system_table_mods: allow table to be a system catalog
* skip_build: true to skip the index_build() step for the moment; caller
* must do it later (typically via reindex_index())
* concurrent: if true, do not lock the table against writers. The index
* will be marked "invalid" and the caller must take additional steps
* to fix it up.
* is_internal: if true, post creation hook for new index
*
* Returns the OID of the created index.
*/
Oid
index_create(Relation heapRelation,
const char *indexRelationName,
Oid indexRelationId,
Oid relFileNode,
IndexInfo *indexInfo,
List *indexColNames,
Oid accessMethodObjectId,
Oid tableSpaceId,
Oid *collationObjectId,
Oid *classObjectId,
int16 *coloptions,
Datum reloptions,
bool isprimary,
bool isconstraint,
bool deferrable,
bool initdeferred,
bool allow_system_table_mods,
bool skip_build,
bool concurrent,
bool is_internal)
{
Oid heapRelationId = RelationGetRelid(heapRelation);
Relation pg_class;
Relation indexRelation;
TupleDesc indexTupDesc;
bool shared_relation;
bool mapped_relation;
bool is_exclusion;
Oid namespaceId;
int i;
char relpersistence;
is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
pg_class = heap_open(RelationRelationId, RowExclusiveLock);
/*
* The index will be in the same namespace as its parent table, and is
* shared across databases if and only if the parent is. Likewise, it
* will use the relfilenode map if and only if the parent does; and it
* inherits the parent's relpersistence.
*/
namespaceId = RelationGetNamespace(heapRelation);
shared_relation = heapRelation->rd_rel->relisshared;
mapped_relation = RelationIsMapped(heapRelation);
relpersistence = heapRelation->rd_rel->relpersistence;
/*
* check parameters
*/
if (indexInfo->ii_NumIndexAttrs < 1)
elog(ERROR, "must index at least one column");
if (!allow_system_table_mods &&
IsSystemRelation(heapRelation) &&
IsNormalProcessingMode())
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("user-defined indexes on system catalog tables are not supported")));
/*
* concurrent index build on a system catalog is unsafe because we tend to
* release locks before committing in catalogs
*/
if (concurrent &&
IsSystemRelation(heapRelation))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("concurrent index creation on system catalog tables is not supported")));
/*
* This case is currently not supported, but there's no way to ask for it
* in the grammar anyway, so it can't happen.
*/
if (concurrent && is_exclusion)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg_internal("concurrent index creation for exclusion constraints is not supported")));
/*
* We cannot allow indexing a shared relation after initdb (because
* there's no way to make the entry in other databases' pg_class).
*/
if (shared_relation && !IsBootstrapProcessingMode())
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("shared indexes cannot be created after initdb")));
/*
* Shared relations must be in pg_global, too (last-ditch check)
*/
if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
elog(ERROR, "shared relations must be placed in pg_global tablespace");
if (get_relname_relid(indexRelationName, namespaceId))
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_TABLE),
errmsg("relation \"%s\" already exists",
indexRelationName)));
/*
* construct tuple descriptor for index tuples
*/
indexTupDesc = ConstructTupleDescriptor(heapRelation,
indexInfo,
indexColNames,
accessMethodObjectId,
collationObjectId,
classObjectId);
/*
* Allocate an OID for the index, unless we were told what to use.
*
* The OID will be the relfilenode as well, so make sure it doesn't
* collide with either pg_class OIDs or existing physical files.
*/
if (!OidIsValid(indexRelationId))
{
/*
* Use binary-upgrade override for pg_class.oid/relfilenode, if
* supplied.
*/
if (IsBinaryUpgrade &&
OidIsValid(binary_upgrade_next_index_pg_class_oid))
{
indexRelationId = binary_upgrade_next_index_pg_class_oid;
binary_upgrade_next_index_pg_class_oid = InvalidOid;
}
else
{
indexRelationId =
GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
}
}
/*
* create the index relation's relcache entry and physical disk file. (If
* we fail further down, it's the smgr's responsibility to remove the disk
* file again.)
*/
indexRelation = heap_create(indexRelationName,
namespaceId,
tableSpaceId,
indexRelationId,
relFileNode,
indexTupDesc,
RELKIND_INDEX,
relpersistence,
shared_relation,
mapped_relation,
allow_system_table_mods);
Assert(indexRelationId == RelationGetRelid(indexRelation));
/*
* Obtain exclusive lock on it. Although no other backends can see it
* until we commit, this prevents deadlock-risk complaints from lock
* manager in cases such as CLUSTER.
*/
LockRelation(indexRelation, AccessExclusiveLock);
/*
* Fill in fields of the index's pg_class entry that are not set correctly
* by heap_create.
*
* XXX should have a cleaner way to create cataloged indexes
*/
indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
indexRelation->rd_rel->relam = accessMethodObjectId;
indexRelation->rd_rel->relhasoids = false;
/*
* store index's pg_class entry
*/
InsertPgClassTuple(pg_class, indexRelation,
RelationGetRelid(indexRelation),
(Datum) 0,
reloptions);
/* done with pg_class */
heap_close(pg_class, RowExclusiveLock);
/*
* now update the object id's of all the attribute tuple forms in the
* index relation's tuple descriptor
*/
InitializeAttributeOids(indexRelation,
indexInfo->ii_NumIndexAttrs,
indexRelationId);
/*
* append ATTRIBUTE tuples for the index
*/
AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
/* ----------------
* update pg_index
* (append INDEX tuple)
*
* Note that this stows away a representation of "predicate".
* (Or, could define a rule to maintain the predicate) --Nels, Feb '92
* ----------------
*/
UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
collationObjectId, classObjectId, coloptions,
isprimary, is_exclusion,
!deferrable,
!concurrent);
/*
* Register constraint and dependencies for the index.
*
* If the index is from a CONSTRAINT clause, construct a pg_constraint
* entry. The index will be linked to the constraint, which in turn is
* linked to the table. If it's not a CONSTRAINT, we need to make a
* dependency directly on the table.
*
* We don't need a dependency on the namespace, because there'll be an
* indirect dependency via our parent table.
*
* During bootstrap we can't register any dependencies, and we don't try
* to make a constraint either.
*/
if (!IsBootstrapProcessingMode())
{
ObjectAddress myself,
referenced;
myself.classId = RelationRelationId;
myself.objectId = indexRelationId;
myself.objectSubId = 0;
if (isconstraint)
{
char constraintType;
if (isprimary)
constraintType = CONSTRAINT_PRIMARY;
else if (indexInfo->ii_Unique)
constraintType = CONSTRAINT_UNIQUE;
else if (is_exclusion)
constraintType = CONSTRAINT_EXCLUSION;
else
{
elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
constraintType = 0; /* keep compiler quiet */
}
index_constraint_create(heapRelation,
indexRelationId,
indexInfo,
indexRelationName,
constraintType,
deferrable,
initdeferred,
false, /* already marked primary */
false, /* pg_index entry is OK */
false, /* no old dependencies */
allow_system_table_mods,
is_internal);
}
else
{
bool have_simple_col = false;
/* Create auto dependencies on simply-referenced columns */
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
{
if (indexInfo->ii_KeyAttrNumbers[i] != 0)
{
referenced.classId = RelationRelationId;
referenced.objectId = heapRelationId;
referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
have_simple_col = true;
}
}
/*
* If there are no simply-referenced columns, give the index an
* auto dependency on the whole table. In most cases, this will
* be redundant, but it might not be if the index expressions and
* predicate contain no Vars or only whole-row Vars.
*/
if (!have_simple_col)
{
referenced.classId = RelationRelationId;
referenced.objectId = heapRelationId;
referenced.objectSubId = 0;
recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
}
/* Non-constraint indexes can't be deferrable */
Assert(!deferrable);
Assert(!initdeferred);
}
/* Store dependency on collations */
/* The default collation is pinned, so don't bother recording it */
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
{
if (OidIsValid(collationObjectId[i]) &&
collationObjectId[i] != DEFAULT_COLLATION_OID)
{
referenced.classId = CollationRelationId;
referenced.objectId = collationObjectId[i];
referenced.objectSubId = 0;
recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
}
}
/* Store dependency on operator classes */
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
{
referenced.classId = OperatorClassRelationId;
referenced.objectId = classObjectId[i];
referenced.objectSubId = 0;
recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
}
/* Store dependencies on anything mentioned in index expressions */
if (indexInfo->ii_Expressions)
{
recordDependencyOnSingleRelExpr(&myself,
(Node *) indexInfo->ii_Expressions,
heapRelationId,
DEPENDENCY_NORMAL,
DEPENDENCY_AUTO);
}
/* Store dependencies on anything mentioned in predicate */
if (indexInfo->ii_Predicate)
{
recordDependencyOnSingleRelExpr(&myself,
(Node *) indexInfo->ii_Predicate,
heapRelationId,
DEPENDENCY_NORMAL,
DEPENDENCY_AUTO);
}
}
else
{
/* Bootstrap mode - assert we weren't asked for constraint support */
Assert(!isconstraint);
Assert(!deferrable);
Assert(!initdeferred);
}
/* Post creation hook for new index */
InvokeObjectPostCreateHookArg(RelationRelationId,
indexRelationId, 0, is_internal);
/*
* Advance the command counter so that we can see the newly-entered
* catalog tuples for the index.
*/
CommandCounterIncrement();
/*
* In bootstrap mode, we have to fill in the index strategy structure with
* information from the catalogs. If we aren't bootstrapping, then the
* relcache entry has already been rebuilt thanks to sinval update during
* CommandCounterIncrement.
*/
if (IsBootstrapProcessingMode())
RelationInitIndexAccessInfo(indexRelation);
else
Assert(indexRelation->rd_indexcxt != NULL);
/*
* If this is bootstrap (initdb) time, then we don't actually fill in the
* index yet. We'll be creating more indexes and classes later, so we
* delay filling them in until just before we're done with bootstrapping.
* Similarly, if the caller specified skip_build then filling the index is
* delayed till later (ALTER TABLE can save work in some cases with this).
* Otherwise, we call the AM routine that constructs the index.
*/
if (IsBootstrapProcessingMode())
{
index_register(heapRelationId, indexRelationId, indexInfo);
}
else if (skip_build)
{
/*
* Caller is responsible for filling the index later on. However,
* we'd better make sure that the heap relation is correctly marked as
* having an index.
*/
index_update_stats(heapRelation,
true,
isprimary,
-1.0);
/* Make the above update visible */
CommandCounterIncrement();
}
else
{
index_build(heapRelation, indexRelation, indexInfo, isprimary, false);
}
/*
* Close the index; but we keep the lock that we acquired above until end
* of transaction. Closing the heap is caller's responsibility.
*/
index_close(indexRelation, NoLock);
return indexRelationId;
}
/*
* index_constraint_create
*
* Set up a constraint associated with an index
*
* heapRelation: table owning the index (must be suitably locked by caller)
* indexRelationId: OID of the index
* indexInfo: same info executor uses to insert into the index
* constraintName: what it say (generally, should match name of index)
* constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
* CONSTRAINT_EXCLUSION
* deferrable: constraint is DEFERRABLE
* initdeferred: constraint is INITIALLY DEFERRED
* mark_as_primary: if true, set flags to mark index as primary key
* update_pgindex: if true, update pg_index row (else caller's done that)
* remove_old_dependencies: if true, remove existing dependencies of index
* on table's columns
* allow_system_table_mods: allow table to be a system catalog
* is_internal: index is constructed due to internal process
*/
void
index_constraint_create(Relation heapRelation,
Oid indexRelationId,
IndexInfo *indexInfo,
const char *constraintName,
char constraintType,
bool deferrable,
bool initdeferred,
bool mark_as_primary,
bool update_pgindex,
bool remove_old_dependencies,
bool allow_system_table_mods,
bool is_internal)
{
Oid namespaceId = RelationGetNamespace(heapRelation);
ObjectAddress myself,
referenced;
Oid conOid;
/* constraint creation support doesn't work while bootstrapping */
Assert(!IsBootstrapProcessingMode());
/* enforce system-table restriction */
if (!allow_system_table_mods &&
IsSystemRelation(heapRelation) &&
IsNormalProcessingMode())
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("user-defined indexes on system catalog tables are not supported")));
/* primary/unique constraints shouldn't have any expressions */
if (indexInfo->ii_Expressions &&
constraintType != CONSTRAINT_EXCLUSION)
elog(ERROR, "constraints cannot have index expressions");
/*
* If we're manufacturing a constraint for a pre-existing index, we need
* to get rid of the existing auto dependencies for the index (the ones
* that index_create() would have made instead of calling this function).
*
* Note: this code would not necessarily do the right thing if the index
* has any expressions or predicate, but we'd never be turning such an
* index into a UNIQUE or PRIMARY KEY constraint.
*/
if (remove_old_dependencies)
deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
RelationRelationId, DEPENDENCY_AUTO);
/*
* Construct a pg_constraint entry.
*/
conOid = CreateConstraintEntry(constraintName,
namespaceId,
constraintType,
deferrable,
initdeferred,
true,
RelationGetRelid(heapRelation),
indexInfo->ii_KeyAttrNumbers,
indexInfo->ii_NumIndexAttrs,
InvalidOid, /* no domain */
indexRelationId, /* index OID */
InvalidOid, /* no foreign key */
NULL,
NULL,
NULL,
NULL,
0,
' ',
' ',
' ',
indexInfo->ii_ExclusionOps,
NULL, /* no check constraint */
NULL,
NULL,
true, /* islocal */
0, /* inhcount */
true, /* noinherit */
is_internal);
/*
* Register the index as internally dependent on the constraint.
*
* Note that the constraint has a dependency on the table, so we don't
* need (or want) any direct dependency from the index to the table.
*/
myself.classId = RelationRelationId;
myself.objectId = indexRelationId;
myself.objectSubId = 0;
referenced.classId = ConstraintRelationId;
referenced.objectId = conOid;
referenced.objectSubId = 0;
recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
/*
* If the constraint is deferrable, create the deferred uniqueness
* checking trigger. (The trigger will be given an internal dependency on
* the constraint by CreateTrigger.)
*/
if (deferrable)
{
RangeVar *heapRel;
CreateTrigStmt *trigger;
heapRel = makeRangeVar(get_namespace_name(namespaceId),
pstrdup(RelationGetRelationName(heapRelation)),
-1);
trigger = makeNode(CreateTrigStmt);
trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
"PK_ConstraintTrigger" :
"Unique_ConstraintTrigger";
trigger->relation = heapRel;
trigger->funcname = SystemFuncName("unique_key_recheck");
trigger->args = NIL;
trigger->row = true;
trigger->timing = TRIGGER_TYPE_AFTER;
trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
trigger->columns = NIL;
trigger->whenClause = NULL;
trigger->isconstraint = true;
trigger->deferrable = true;
trigger->initdeferred = initdeferred;
trigger->constrrel = NULL;
(void) CreateTrigger(trigger, NULL, conOid, indexRelationId, true);
}
/*
* If needed, mark the table as having a primary key. We assume it can't
* have been so marked already, so no need to clear the flag in the other
* case.
*
* Note: this might better be done by callers. We do it here to avoid
* exposing index_update_stats() globally, but that wouldn't be necessary
* if relhaspkey went away.
*/
if (mark_as_primary)
index_update_stats(heapRelation,
true,
true,
-1.0);
/*
* If needed, mark the index as primary and/or deferred in pg_index.
*
* Note: When making an existing index into a constraint, caller must
* have a table lock that prevents concurrent table updates; otherwise,
* there is a risk that concurrent readers of the table will miss seeing
* this index at all.
*/
if (update_pgindex && (mark_as_primary || deferrable))
{
Relation pg_index;
HeapTuple indexTuple;
Form_pg_index indexForm;
bool dirty = false;
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
indexTuple = SearchSysCacheCopy1(INDEXRELID,
ObjectIdGetDatum(indexRelationId));
if (!HeapTupleIsValid(indexTuple))
elog(ERROR, "cache lookup failed for index %u", indexRelationId);
indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
if (mark_as_primary && !indexForm->indisprimary)
{
indexForm->indisprimary = true;
dirty = true;
}
if (deferrable && indexForm->indimmediate)
{
indexForm->indimmediate = false;
dirty = true;
}
if (dirty)
{
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
CatalogUpdateIndexes(pg_index, indexTuple);
InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
InvalidOid, is_internal);
}
heap_freetuple(indexTuple);
heap_close(pg_index, RowExclusiveLock);
}
}
/*
* index_drop
*
* NOTE: this routine should now only be called through performDeletion(),
* else associated dependencies won't be cleaned up.
*/
void
index_drop(Oid indexId, bool concurrent)
{
Oid heapId;
Relation userHeapRelation;
Relation userIndexRelation;
Relation indexRelation;
HeapTuple tuple;
bool hasexprs;
LockRelId heaprelid,
indexrelid;
LOCKTAG heaplocktag;
LOCKMODE lockmode;
/*
* To drop an index safely, we must grab exclusive lock on its parent
* table. Exclusive lock on the index alone is insufficient because
* another backend might be about to execute a query on the parent table.
* If it relies on a previously cached list of index OIDs, then it could
* attempt to access the just-dropped index. We must therefore take a
* table lock strong enough to prevent all queries on the table from
* proceeding until we commit and send out a shared-cache-inval notice
* that will make them update their index lists.
*
* In the concurrent case we avoid this requirement by disabling index use
* in multiple steps and waiting out any transactions that might be using
* the index, so we don't need exclusive lock on the parent table. Instead
* we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
* doing CREATE/DROP INDEX CONCURRENTLY on the same index. (We will get
* AccessExclusiveLock on the index below, once we're sure nobody else is
* using it.)
*/
heapId = IndexGetRelation(indexId, false);
lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock;
userHeapRelation = heap_open(heapId, lockmode);
userIndexRelation = index_open(indexId, lockmode);
/*
* We might still have open queries using it in our own session, which the
* above locking won't prevent, so test explicitly.
*/
CheckTableNotInUse(userIndexRelation, "DROP INDEX");
/*
* Drop Index Concurrently is more or less the reverse process of Create
* Index Concurrently.
*
* First we unset indisvalid so queries starting afterwards don't use the
* index to answer queries anymore. We have to keep indisready = true so
* transactions that are still scanning the index can continue to see
* valid index contents. For instance, if they are using READ COMMITTED
* mode, and another transaction makes changes and commits, they need to
* see those new tuples in the index.
*
* After all transactions that could possibly have used the index for
* queries end, we can unset indisready and indislive, then wait till
* nobody could be touching it anymore. (Note: we need indislive because
* this state must be distinct from the initial state during CREATE INDEX
* CONCURRENTLY, which has indislive true while indisready and indisvalid
* are false. That's because in that state, transactions must examine the
* index for HOT-safety decisions, while in this state we don't want them
* to open it at all.)
*
* Since all predicate locks on the index are about to be made invalid, we
* must promote them to predicate locks on the heap. In the
* non-concurrent case we can just do that now. In the concurrent case
* it's a bit trickier. The predicate locks must be moved when there are
* no index scans in progress on the index and no more can subsequently
* start, so that no new predicate locks can be made on the index. Also,
* they must be moved before heap inserts stop maintaining the index, else
* the conflict with the predicate lock on the index gap could be missed
* before the lock on the heap relation is in place to detect a conflict
* based on the heap tuple insert.
*/
if (concurrent)
{
/*
* We must commit our transaction in order to make the first pg_index
* state update visible to other sessions. If the DROP machinery has
* already performed any other actions (removal of other objects,
* pg_depend entries, etc), the commit would make those actions
* permanent, which would leave us with inconsistent catalog state if
* we fail partway through the following sequence. Since DROP INDEX
* CONCURRENTLY is restricted to dropping just one index that has no
* dependencies, we should get here before anything's been done ---
* but let's check that to be sure. We can verify that the current
* transaction has not executed any transactional updates by checking
* that no XID has been assigned.
*/
if (GetTopTransactionIdIfAny() != InvalidTransactionId)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
/*
* Mark index invalid by updating its pg_index entry
*/
index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
/*
* Invalidate the relcache for the table, so that after this commit
* all sessions will refresh any cached plans that might reference the
* index.
*/
CacheInvalidateRelcache(userHeapRelation);
/* save lockrelid and locktag for below, then close but keep locks */
heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
heap_close(userHeapRelation, NoLock);
index_close(userIndexRelation, NoLock);
/*
* We must commit our current transaction so that the indisvalid
* update becomes visible to other transactions; then start another.
* Note that any previously-built data structures are lost in the
* commit. The only data we keep past here are the relation IDs.
*
* Before committing, get a session-level lock on the table, to ensure
* that neither it nor the index can be dropped before we finish. This
* cannot block, even if someone else is waiting for access, because
* we already have the same lock within our transaction.
*/
LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
PopActiveSnapshot();
CommitTransactionCommand();
StartTransactionCommand();
/*
* Now we must wait until no running transaction could be using the
* index for a query. Note we do not need to worry about xacts that
* open the table for reading after this point; they will see the
* index as invalid when they open the relation.
*
* Note: the reason we use actual lock acquisition here, rather than
* just checking the ProcArray and sleeping, is that deadlock is
* possible if one of the transactions in question is blocked trying
* to acquire an exclusive lock on our table. The lock code will
* detect deadlock and error out properly.
*/
WaitForLockers(heaplocktag, AccessExclusiveLock);
/*
* No more predicate locks will be acquired on this index, and we're
* about to stop doing inserts into the index which could show
* conflicts with existing predicate locks, so now is the time to move
* them to the heap relation.
*/
userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
TransferPredicateLocksToHeapRelation(userIndexRelation);
/*
* Now we are sure that nobody uses the index for queries; they just
* might have it open for updating it. So now we can unset indisready
* and indislive, then wait till nobody could be using it at all
* anymore.
*/
index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
/*
* Invalidate the relcache for the table, so that after this commit
* all sessions will refresh the table's index list. Forgetting just
* the index's relcache entry is not enough.
*/
CacheInvalidateRelcache(userHeapRelation);
/*
* Close the relations again, though still holding session lock.
*/
heap_close(userHeapRelation, NoLock);
index_close(userIndexRelation, NoLock);
/*
* Again, commit the transaction to make the pg_index update visible
* to other sessions.
*/
CommitTransactionCommand();
StartTransactionCommand();
/*
* Wait till every transaction that saw the old index state has
* finished.
*/
WaitForLockers(heaplocktag, AccessExclusiveLock);
/*
* Re-open relations to allow us to complete our actions.
*
* At this point, nothing should be accessing the index, but lets
* leave nothing to chance and grab AccessExclusiveLock on the index
* before the physical deletion.
*/
userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
userIndexRelation = index_open(indexId, AccessExclusiveLock);
}
else
{
/* Not concurrent, so just transfer predicate locks and we're good */
TransferPredicateLocksToHeapRelation(userIndexRelation);
}
/*
* Schedule physical removal of the files
*/
RelationDropStorage(userIndexRelation);
/*
* Close and flush the index's relcache entry, to ensure relcache doesn't
* try to rebuild it while we're deleting catalog entries. We keep the
* lock though.
*/
index_close(userIndexRelation, NoLock);
RelationForgetRelation(indexId);
/*
* fix INDEX relation, and check for expressional index
*/
indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for index %u", indexId);
hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
simple_heap_delete(indexRelation, &tuple->t_self);
ReleaseSysCache(tuple);
heap_close(indexRelation, RowExclusiveLock);
/*
* if it has any expression columns, we might have stored statistics about
* them.
*/
if (hasexprs)
RemoveStatistics(indexId, 0);
/*
* fix ATTRIBUTE relation
*/
DeleteAttributeTuples(indexId);
/*
* fix RELATION relation
*/
DeleteRelationTuple(indexId);
/*
* We are presently too lazy to attempt to compute the new correct value
* of relhasindex (the next VACUUM will fix it if necessary). So there is
* no need to update the pg_class tuple for the owning relation. But we
* must send out a shared-cache-inval notice on the owning relation to
* ensure other backends update their relcache lists of indexes. (In the
* concurrent case, this is redundant but harmless.)
*/
CacheInvalidateRelcache(userHeapRelation);
/*
* Close owning rel, but keep lock
*/
heap_close(userHeapRelation, NoLock);
/*
* Release the session locks before we go.
*/
if (concurrent)
{
UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
}
}
/* ----------------------------------------------------------------
* index_build support
* ----------------------------------------------------------------
*/
/* ----------------
* BuildIndexInfo
* Construct an IndexInfo record for an open index
*
* IndexInfo stores the information about the index that's needed by
* FormIndexDatum, which is used for both index_build() and later insertion
* of individual index tuples. Normally we build an IndexInfo for an index
* just once per command, and then use it for (potentially) many tuples.
* ----------------
*/
IndexInfo *
BuildIndexInfo(Relation index)
{
IndexInfo *ii = makeNode(IndexInfo);
Form_pg_index indexStruct = index->rd_index;
int i;
int numKeys;
/* check the number of keys, and copy attr numbers into the IndexInfo */
numKeys = indexStruct->indnatts;
if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
elog(ERROR, "invalid indnatts %d for index %u",
numKeys, RelationGetRelid(index));
ii->ii_NumIndexAttrs = numKeys;
for (i = 0; i < numKeys; i++)
ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
/* fetch any expressions needed for expressional indexes */
ii->ii_Expressions = RelationGetIndexExpressions(index);
ii->ii_ExpressionsState = NIL;
/* fetch index predicate if any */
ii->ii_Predicate = RelationGetIndexPredicate(index);
ii->ii_PredicateState = NIL;
/* fetch exclusion constraint info if any */
if (indexStruct->indisexclusion)
{
RelationGetExclusionInfo(index,
&ii->ii_ExclusionOps,
&ii->ii_ExclusionProcs,
&ii->ii_ExclusionStrats);
}
else
{
ii->ii_ExclusionOps = NULL;
ii->ii_ExclusionProcs = NULL;
ii->ii_ExclusionStrats = NULL;
}
/* other info */
ii->ii_Unique = indexStruct->indisunique;
ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
/* initialize index-build state to default */
ii->ii_Concurrent = false;
ii->ii_BrokenHotChain = false;
return ii;
}
/* ----------------
* FormIndexDatum
* Construct values[] and isnull[] arrays for a new index tuple.
*
* indexInfo Info about the index
* slot Heap tuple for which we must prepare an index entry
* estate executor state for evaluating any index expressions
* values Array of index Datums (output area)
* isnull Array of is-null indicators (output area)
*
* When there are no index expressions, estate may be NULL. Otherwise it
* must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
* context must point to the heap tuple passed in.
*
* Notice we don't actually call index_form_tuple() here; we just prepare
* its input arrays values[] and isnull[]. This is because the index AM
* may wish to alter the data before storage.
* ----------------
*/
void
FormIndexDatum(IndexInfo *indexInfo,
TupleTableSlot *slot,
EState *estate,
Datum *values,
bool *isnull)
{
ListCell *indexpr_item;
int i;
if (indexInfo->ii_Expressions != NIL &&
indexInfo->ii_ExpressionsState == NIL)
{
/* First time through, set up expression evaluation state */
indexInfo->ii_ExpressionsState = (List *)
ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
estate);
/* Check caller has set up context correctly */
Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
}
indexpr_item = list_head(indexInfo->ii_ExpressionsState);
for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
{
int keycol = indexInfo->ii_KeyAttrNumbers[i];
Datum iDatum;
bool isNull;
if (keycol != 0)
{
/*
* Plain index column; get the value we need directly from the
* heap tuple.
*/
iDatum = slot_getattr(slot, keycol, &isNull);
}
else
{
/*
* Index expression --- need to evaluate it.
*/
if (indexpr_item == NULL)
elog(ERROR, "wrong number of index expressions");
iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
GetPerTupleExprContext(estate),
&isNull,
NULL);
indexpr_item = lnext(indexpr_item);
}
values[i] = iDatum;
isnull[i] = isNull;
}
if (indexpr_item != NULL)
elog(ERROR, "wrong number of index expressions");
}
/*
* index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
*
* This routine updates the pg_class row of either an index or its parent
* relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
* to ensure we can do all the necessary work in just one update.
*
* hasindex: set relhasindex to this value
* isprimary: if true, set relhaspkey true; else no change
* reltuples: if >= 0, set reltuples to this value; else no change
*
* If reltuples >= 0, relpages and relallvisible are also updated (using
* RelationGetNumberOfBlocks() and visibilitymap_count()).
*
* NOTE: an important side-effect of this operation is that an SI invalidation
* message is sent out to all backends --- including me --- causing relcache
* entries to be flushed or updated with the new data. This must happen even
* if we find that no change is needed in the pg_class row. When updating
* a heap entry, this ensures that other backends find out about the new
* index. When updating an index, it's important because some index AMs
* expect a relcache flush to occur after REINDEX.
*/
static void
index_update_stats(Relation rel,
bool hasindex,
bool isprimary,
double reltuples)
{
Oid relid = RelationGetRelid(rel);
Relation pg_class;
HeapTuple tuple;
Form_pg_class rd_rel;
bool dirty;
/*
* We always update the pg_class row using a non-transactional,
* overwrite-in-place update. There are several reasons for this:
*
* 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
*
* 2. We could be reindexing pg_class itself, in which case we can't move
* its pg_class row because CatalogUpdateIndexes might not know about all
* the indexes yet (see reindex_relation).
*
* 3. Because we execute CREATE INDEX with just share lock on the parent
* rel (to allow concurrent index creations), an ordinary update could
* suffer a tuple-concurrently-updated failure against another CREATE
* INDEX committing at about the same time. We can avoid that by having
* them both do nontransactional updates (we assume they will both be
* trying to change the pg_class row to the same thing, so it doesn't
* matter which goes first).
*
* 4. Even with just a single CREATE INDEX, there's a risk factor because
* someone else might be trying to open the rel while we commit, and this
* creates a race condition as to whether he will see both or neither of
* the pg_class row versions as valid. Again, a non-transactional update
* avoids the risk. It is indeterminate which state of the row the other
* process will see, but it doesn't matter (if he's only taking
* AccessShareLock, then it's not critical that he see relhasindex true).
*
* It is safe to use a non-transactional update even though our
* transaction could still fail before committing. Setting relhasindex
* true is safe even if there are no indexes (VACUUM will eventually fix
* it), likewise for relhaspkey. And of course the new relpages and
* reltuples counts are correct regardless. However, we don't want to
* change relpages (or relallvisible) if the caller isn't providing an
* updated reltuples count, because that would bollix the
* reltuples/relpages ratio which is what's really important.
*/
pg_class = heap_open(RelationRelationId, RowExclusiveLock);
/*
* Make a copy of the tuple to update. Normally we use the syscache, but
* we can't rely on that during bootstrap or while reindexing pg_class
* itself.
*/
if (IsBootstrapProcessingMode() ||
ReindexIsProcessingHeap(RelationRelationId))
{
/* don't assume syscache will work */
HeapScanDesc pg_class_scan;
ScanKeyData key[1];
ScanKeyInit(&key[0],
ObjectIdAttributeNumber,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(relid));
pg_class_scan = heap_beginscan_catalog(pg_class, 1, key);
tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
tuple = heap_copytuple(tuple);
heap_endscan(pg_class_scan);
}
else
{
/* normal case, use syscache */
tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
}
if (!HeapTupleIsValid(tuple))
elog(ERROR, "could not find tuple for relation %u", relid);
rd_rel = (Form_pg_class) GETSTRUCT(tuple);
/* Apply required updates, if any, to copied tuple */
dirty = false;
if (rd_rel->relhasindex != hasindex)
{
rd_rel->relhasindex = hasindex;
dirty = true;
}
if (isprimary)
{
if (!rd_rel->relhaspkey)
{
rd_rel->relhaspkey = true;
dirty = true;
}
}
if (reltuples >= 0)
{
BlockNumber relpages = RelationGetNumberOfBlocks(rel);
BlockNumber relallvisible;
if (rd_rel->relkind != RELKIND_INDEX)
relallvisible = visibilitymap_count(rel);
else /* don't bother for indexes */
relallvisible = 0;
if (rd_rel->relpages != (int32) relpages)
{
rd_rel->relpages = (int32) relpages;
dirty = true;
}
if (rd_rel->reltuples != (float4) reltuples)
{
rd_rel->reltuples = (float4) reltuples;
dirty = true;
}
if (rd_rel->relallvisible != (int32) relallvisible)
{
rd_rel->relallvisible = (int32) relallvisible;
dirty = true;
}
}
/*
* If anything changed, write out the tuple
*/
if (dirty)
{
heap_inplace_update(pg_class, tuple);
/* the above sends a cache inval message */
}
else
{
/* no need to change tuple, but force relcache inval anyway */
CacheInvalidateRelcacheByTuple(tuple);
}
heap_freetuple(tuple);
heap_close(pg_class, RowExclusiveLock);
}
/*
* index_build - invoke access-method-specific index build procedure
*
* On entry, the index's catalog entries are valid, and its physical disk
* file has been created but is empty. We call the AM-specific build
* procedure to fill in the index contents. We then update the pg_class
* entries of the index and heap relation as needed, using statistics
* returned by ambuild as well as data passed by the caller.
*
* isprimary tells whether to mark the index as a primary-key index.
* isreindex indicates we are recreating a previously-existing index.
*
* Note: when reindexing an existing index, isprimary can be false even if
* the index is a PK; it's already properly marked and need not be re-marked.
*
* Note: before Postgres 8.2, the passed-in heap and index Relations
* were automatically closed by this routine. This is no longer the case.
* The caller opened 'em, and the caller should close 'em.
*/
void
index_build(Relation heapRelation,
Relation indexRelation,
IndexInfo *indexInfo,
bool isprimary,
bool isreindex)
{
RegProcedure procedure;
IndexBuildResult *stats;
Oid save_userid;
int save_sec_context;
int save_nestlevel;
/*
* sanity checks
*/
Assert(RelationIsValid(indexRelation));
Assert(PointerIsValid(indexRelation->rd_am));
procedure = indexRelation->rd_am->ambuild;
Assert(RegProcedureIsValid(procedure));
ereport(DEBUG1,
(errmsg("building index \"%s\" on table \"%s\"",
RelationGetRelationName(indexRelation),
RelationGetRelationName(heapRelation))));
/*
* Switch to the table owner's userid, so that any index functions are run
* as that user. Also lock down security-restricted operations and
* arrange to make GUC variable changes local to this command.
*/
GetUserIdAndSecContext(&save_userid, &save_sec_context);
SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
save_sec_context | SECURITY_RESTRICTED_OPERATION);
save_nestlevel = NewGUCNestLevel();
/*
* Call the access method's build procedure
*/
stats = (IndexBuildResult *)
DatumGetPointer(OidFunctionCall3(procedure,
PointerGetDatum(heapRelation),
PointerGetDatum(indexRelation),
PointerGetDatum(indexInfo)));
Assert(PointerIsValid(stats));
/*
* If this is an unlogged index, we may need to write out an init fork for
* it -- but we must first check whether one already exists. If, for
* example, an unlogged relation is truncated in the transaction that
* created it, or truncated twice in a subsequent transaction, the
* relfilenode won't change, and nothing needs to be done here.
*/
if (heapRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
!smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
{
RegProcedure ambuildempty = indexRelation->rd_am->ambuildempty;
RelationOpenSmgr(indexRelation);
smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
OidFunctionCall1(ambuildempty, PointerGetDatum(indexRelation));
}
/*
* If we found any potentially broken HOT chains, mark the index as not
* being usable until the current transaction is below the event horizon.
* See src/backend/access/heap/README.HOT for discussion.
*
* However, when reindexing an existing index, we should do nothing here.
* Any HOT chains that are broken with respect to the index must predate
* the index's original creation, so there is no need to change the
* index's usability horizon. Moreover, we *must not* try to change the
* index's pg_index entry while reindexing pg_index itself, and this
* optimization nicely prevents that.
*
* We also need not set indcheckxmin during a concurrent index build,
* because we won't set indisvalid true until all transactions that care
* about the broken HOT chains are gone.
*
* Therefore, this code path can only be taken during non-concurrent
* CREATE INDEX. Thus the fact that heap_update will set the pg_index
* tuple's xmin doesn't matter, because that tuple was created in the
* current transaction anyway. That also means we don't need to worry
* about any concurrent readers of the tuple; no other transaction can see
* it yet.
*/
if (indexInfo->ii_BrokenHotChain && !isreindex &&
!indexInfo->ii_Concurrent)
{
Oid indexId = RelationGetRelid(indexRelation);
Relation pg_index;
HeapTuple indexTuple;
Form_pg_index indexForm;
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
indexTuple = SearchSysCacheCopy1(INDEXRELID,
ObjectIdGetDatum(indexId));
if (!HeapTupleIsValid(indexTuple))
elog(ERROR, "cache lookup failed for index %u", indexId);
indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
/* If it's a new index, indcheckxmin shouldn't be set ... */
Assert(!indexForm->indcheckxmin);
indexForm->indcheckxmin = true;
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
CatalogUpdateIndexes(pg_index, indexTuple);
heap_freetuple(indexTuple);
heap_close(pg_index, RowExclusiveLock);
}
/*
* Update heap and index pg_class rows
*/
index_update_stats(heapRelation,
true,
isprimary,
stats->heap_tuples);
index_update_stats(indexRelation,
false,
false,
stats->index_tuples);
/* Make the updated catalog row versions visible */
CommandCounterIncrement();
/*
* If it's for an exclusion constraint, make a second pass over the heap
* to verify that the constraint is satisfied. We must not do this until
* the index is fully valid. (Broken HOT chains shouldn't matter, though;
* see comments for IndexCheckExclusion.)
*/
if (indexInfo->ii_ExclusionOps != NULL)
IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
/* Roll back any GUC changes executed by index functions */
AtEOXact_GUC(false, save_nestlevel);
/* Restore userid and security context */
SetUserIdAndSecContext(save_userid, save_sec_context);
}
/*
* IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
*
* This is called back from an access-method-specific index build procedure
* after the AM has done whatever setup it needs. The parent heap relation
* is scanned to find tuples that should be entered into the index. Each
* such tuple is passed to the AM's callback routine, which does the right
* things to add it to the new index. After we return, the AM's index
* build procedure does whatever cleanup it needs.
*
* The total count of heap tuples is returned. This is for updating pg_class
* statistics. (It's annoying not to be able to do that here, but we want
* to merge that update with others; see index_update_stats.) Note that the
* index AM itself must keep track of the number of index tuples; we don't do
* so here because the AM might reject some of the tuples for its own reasons,
* such as being unable to store NULLs.
*
* A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
* any potentially broken HOT chains. Currently, we set this if there are
* any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without
* trying very hard to detect whether they're really incompatible with the
* chain tip.
*/
double
IndexBuildHeapScan(Relation heapRelation,
Relation indexRelation,
IndexInfo *indexInfo,
bool allow_sync,
IndexBuildCallback callback,
void *callback_state)
{
bool is_system_catalog;
bool checking_uniqueness;
HeapScanDesc scan;
HeapTuple heapTuple;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
double reltuples;
List *predicate;
TupleTableSlot *slot;
EState *estate;
ExprContext *econtext;
Snapshot snapshot;
TransactionId OldestXmin;
BlockNumber root_blkno = InvalidBlockNumber;
OffsetNumber root_offsets[MaxHeapTuplesPerPage];
/*
* sanity checks
*/
Assert(OidIsValid(indexRelation->rd_rel->relam));
/* Remember if it's a system catalog */
is_system_catalog = IsSystemRelation(heapRelation);
/* See whether we're verifying uniqueness/exclusion properties */
checking_uniqueness = (indexInfo->ii_Unique ||
indexInfo->ii_ExclusionOps != NULL);
/*
* Need an EState for evaluation of index expressions and partial-index
* predicates. Also a slot to hold the current tuple.
*/
estate = CreateExecutorState();
econtext = GetPerTupleExprContext(estate);
slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
/* Set up execution state for predicate, if any. */
predicate = (List *)
ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
estate);
/*
* Prepare for scan of the base relation. In a normal index build, we use
* SnapshotAny because we must retrieve all tuples and do our own time
* qual checks (because we have to index RECENTLY_DEAD tuples). In a
* concurrent build, or during bootstrap, we take a regular MVCC snapshot
* and index whatever's live according to that.
*/
if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
{
snapshot = RegisterSnapshot(GetTransactionSnapshot());
OldestXmin = InvalidTransactionId; /* not used */
}
else
{
snapshot = SnapshotAny;
/* okay to ignore lazy VACUUMs here */
OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true);
}
scan = heap_beginscan_strat(heapRelation, /* relation */
snapshot, /* snapshot */
0, /* number of keys */
NULL, /* scan key */
true, /* buffer access strategy OK */
allow_sync); /* syncscan OK? */
reltuples = 0;
/*
* Scan all tuples in the base relation.
*/
while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
bool tupleIsAlive;
CHECK_FOR_INTERRUPTS();
/*
* When dealing with a HOT-chain of updated tuples, we want to index
* the values of the live tuple (if any), but index it under the TID
* of the chain's root tuple. This approach is necessary to preserve
* the HOT-chain structure in the heap. So we need to be able to find
* the root item offset for every tuple that's in a HOT-chain. When
* first reaching a new page of the relation, call
* heap_get_root_tuples() to build a map of root item offsets on the
* page.
*
* It might look unsafe to use this information across buffer
* lock/unlock. However, we hold ShareLock on the table so no
* ordinary insert/update/delete should occur; and we hold pin on the
* buffer continuously while visiting the page, so no pruning
* operation can occur either.
*
* Also, although our opinions about tuple liveness could change while
* we scan the page (due to concurrent transaction commits/aborts),
* the chain root locations won't, so this info doesn't need to be
* rebuilt after waiting for another transaction.
*
* Note the implied assumption that there is no more than one live
* tuple per HOT-chain --- else we could create more than one index
* entry pointing to the same root tuple.
*/
if (scan->rs_cblock != root_blkno)
{
Page page = BufferGetPage(scan->rs_cbuf);
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
heap_get_root_tuples(page, root_offsets);
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
root_blkno = scan->rs_cblock;
}
if (snapshot == SnapshotAny)
{
/* do our own time qual check */
bool indexIt;
TransactionId xwait;
recheck:
/*
* We could possibly get away with not locking the buffer here,
* since caller should hold ShareLock on the relation, but let's
* be conservative about it. (This remark is still correct even
* with HOT-pruning: our pin on the buffer prevents pruning.)
*/
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
scan->rs_cbuf))
{
case HEAPTUPLE_DEAD:
/* Definitely dead, we can ignore it */
indexIt = false;
tupleIsAlive = false;
break;
case HEAPTUPLE_LIVE:
/* Normal case, index and unique-check it */
indexIt = true;
tupleIsAlive = true;
break;
case HEAPTUPLE_RECENTLY_DEAD:
/*
* If tuple is recently deleted then we must index it
* anyway to preserve MVCC semantics. (Pre-existing
* transactions could try to use the index after we finish
* building it, and may need to see such tuples.)
*
* However, if it was HOT-updated then we must only index
* the live tuple at the end of the HOT-chain. Since this
* breaks semantics for pre-existing snapshots, mark the
* index as unusable for them.
*/
if (HeapTupleIsHotUpdated(heapTuple))
{
indexIt = false;
/* mark the index as unsafe for old snapshots */
indexInfo->ii_BrokenHotChain = true;
}
else
indexIt = true;
/* In any case, exclude the tuple from unique-checking */
tupleIsAlive = false;
break;
case HEAPTUPLE_INSERT_IN_PROGRESS:
/*
* Since caller should hold ShareLock or better, normally
* the only way to see this is if it was inserted earlier
* in our own transaction. However, it can happen in
* system catalogs, since we tend to release write lock
* before commit there. Give a warning if neither case
* applies.
*/
xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
if (!TransactionIdIsCurrentTransactionId(xwait))
{
if (!is_system_catalog)
elog(WARNING, "concurrent insert in progress within table \"%s\"",
RelationGetRelationName(heapRelation));
/*
* If we are performing uniqueness checks, indexing
* such a tuple could lead to a bogus uniqueness
* failure. In that case we wait for the inserting
* transaction to finish and check again.
*/
if (checking_uniqueness)
{
/*
* Must drop the lock on the buffer before we wait
*/
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
XactLockTableWait(xwait);
goto recheck;
}
}
/*
* We must index such tuples, since if the index build
* commits then they're good.
*/
indexIt = true;
tupleIsAlive = true;
break;
case HEAPTUPLE_DELETE_IN_PROGRESS:
/*
* As with INSERT_IN_PROGRESS case, this is unexpected
* unless it's our own deletion or a system catalog.
*/
xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
if (!TransactionIdIsCurrentTransactionId(xwait))
{
if (!is_system_catalog)
elog(WARNING, "concurrent delete in progress within table \"%s\"",
RelationGetRelationName(heapRelation));
/*
* If we are performing uniqueness checks, assuming
* the tuple is dead could lead to missing a
* uniqueness violation. In that case we wait for the
* deleting transaction to finish and check again.
*
* Also, if it's a HOT-updated tuple, we should not
* index it but rather the live tuple at the end of
* the HOT-chain. However, the deleting transaction
* could abort, possibly leaving this tuple as live
* after all, in which case it has to be indexed. The
* only way to know what to do is to wait for the
* deleting transaction to finish and check again.
*/
if (checking_uniqueness ||
HeapTupleIsHotUpdated(heapTuple))
{
/*
* Must drop the lock on the buffer before we wait
*/
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
XactLockTableWait(xwait);
goto recheck;
}
/*
* Otherwise index it but don't check for uniqueness,
* the same as a RECENTLY_DEAD tuple.
*/
indexIt = true;
}
else if (HeapTupleIsHotUpdated(heapTuple))
{
/*
* It's a HOT-updated tuple deleted by our own xact.
* We can assume the deletion will commit (else the
* index contents don't matter), so treat the same as
* RECENTLY_DEAD HOT-updated tuples.
*/
indexIt = false;
/* mark the index as unsafe for old snapshots */
indexInfo->ii_BrokenHotChain = true;
}
else
{
/*
* It's a regular tuple deleted by our own xact. Index
* it but don't check for uniqueness, the same as a
* RECENTLY_DEAD tuple.
*/
indexIt = true;
}
/* In any case, exclude the tuple from unique-checking */
tupleIsAlive = false;
break;
default:
elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
indexIt = tupleIsAlive = false; /* keep compiler quiet */
break;
}
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
if (!indexIt)
continue;
}
else
{
/* heap_getnext did the time qual check */
tupleIsAlive = true;
}
reltuples += 1;
MemoryContextReset(econtext->ecxt_per_tuple_memory);
/* Set up for predicate or expression evaluation */
ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
/*
* In a partial index, discard tuples that don't satisfy the
* predicate.
*/
if (predicate != NIL)
{
if (!ExecQual(predicate, econtext, false))
continue;
}
/*
* For the current heap tuple, extract all the attributes we use in
* this index, and note which are null. This also performs evaluation
* of any expressions needed.
*/
FormIndexDatum(indexInfo,
slot,
estate,
values,
isnull);
/*
* You'd think we should go ahead and build the index tuple here, but
* some index AMs want to do further processing on the data first. So
* pass the values[] and isnull[] arrays, instead.
*/
if (HeapTupleIsHeapOnly(heapTuple))
{
/*
* For a heap-only tuple, pretend its TID is that of the root. See
* src/backend/access/heap/README.HOT for discussion.
*/
HeapTupleData rootTuple;
OffsetNumber offnum;
rootTuple = *heapTuple;
offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
Assert(OffsetNumberIsValid(root_offsets[offnum - 1]));
ItemPointerSetOffsetNumber(&rootTuple.t_self,
root_offsets[offnum - 1]);
/* Call the AM's callback routine to process the tuple */
callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
callback_state);
}
else
{
/* Call the AM's callback routine to process the tuple */
callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
callback_state);
}
}
heap_endscan(scan);
/* we can now forget our snapshot, if set */
if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
UnregisterSnapshot(snapshot);
ExecDropSingleTupleTableSlot(slot);
FreeExecutorState(estate);
/* These may have been pointing to the now-gone estate */
indexInfo->ii_ExpressionsState = NIL;
indexInfo->ii_PredicateState = NIL;
return reltuples;
}
/*
* IndexCheckExclusion - verify that a new exclusion constraint is satisfied
*
* When creating an exclusion constraint, we first build the index normally
* and then rescan the heap to check for conflicts. We assume that we only
* need to validate tuples that are live according to an up-to-date snapshot,
* and that these were correctly indexed even in the presence of broken HOT
* chains. This should be OK since we are holding at least ShareLock on the
* table, meaning there can be no uncommitted updates from other transactions.
* (Note: that wouldn't necessarily work for system catalogs, since many
* operations release write lock early on the system catalogs.)
*/
static void
IndexCheckExclusion(Relation heapRelation,
Relation indexRelation,
IndexInfo *indexInfo)
{
HeapScanDesc scan;
HeapTuple heapTuple;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
List *predicate;
TupleTableSlot *slot;
EState *estate;
ExprContext *econtext;
Snapshot snapshot;
/*
* If we are reindexing the target index, mark it as no longer being
* reindexed, to forestall an Assert in index_beginscan when we try to use
* the index for probes. This is OK because the index is now fully valid.
*/
if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
ResetReindexProcessing();
/*
* Need an EState for evaluation of index expressions and partial-index
* predicates. Also a slot to hold the current tuple.
*/
estate = CreateExecutorState();
econtext = GetPerTupleExprContext(estate);
slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
/* Set up execution state for predicate, if any. */
predicate = (List *)
ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
estate);
/*
* Scan all live tuples in the base relation.
*/
snapshot = RegisterSnapshot(GetLatestSnapshot());
scan = heap_beginscan_strat(heapRelation, /* relation */
snapshot, /* snapshot */
0, /* number of keys */
NULL, /* scan key */
true, /* buffer access strategy OK */
true); /* syncscan OK */
while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
CHECK_FOR_INTERRUPTS();
MemoryContextReset(econtext->ecxt_per_tuple_memory);
/* Set up for predicate or expression evaluation */
ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
/*
* In a partial index, ignore tuples that don't satisfy the predicate.
*/
if (predicate != NIL)
{
if (!ExecQual(predicate, econtext, false))
continue;
}
/*
* Extract index column values, including computing expressions.
*/
FormIndexDatum(indexInfo,
slot,
estate,
values,
isnull);
/*
* Check that this tuple has no conflicts.
*/
check_exclusion_constraint(heapRelation,
indexRelation, indexInfo,
&(heapTuple->t_self), values, isnull,
estate, true, false);
}
heap_endscan(scan);
UnregisterSnapshot(snapshot);
ExecDropSingleTupleTableSlot(slot);
FreeExecutorState(estate);
/* These may have been pointing to the now-gone estate */
indexInfo->ii_ExpressionsState = NIL;
indexInfo->ii_PredicateState = NIL;
}
/*
* validate_index - support code for concurrent index builds
*
* We do a concurrent index build by first inserting the catalog entry for the
* index via index_create(), marking it not indisready and not indisvalid.
* Then we commit our transaction and start a new one, then we wait for all
* transactions that could have been modifying the table to terminate. Now
* we know that any subsequently-started transactions will see the index and
* honor its constraints on HOT updates; so while existing HOT-chains might
* be broken with respect to the index, no currently live tuple will have an
* incompatible HOT update done to it. We now build the index normally via
* index_build(), while holding a weak lock that allows concurrent
* insert/update/delete. Also, we index only tuples that are valid
* as of the start of the scan (see IndexBuildHeapScan), whereas a normal
* build takes care to include recently-dead tuples. This is OK because
* we won't mark the index valid until all transactions that might be able
* to see those tuples are gone. The reason for doing that is to avoid
* bogus unique-index failures due to concurrent UPDATEs (we might see
* different versions of the same row as being valid when we pass over them,
* if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
* does not contain any tuples added to the table while we built the index.
*
* Next, we mark the index "indisready" (but still not "indisvalid") and
* commit the second transaction and start a third. Again we wait for all
* transactions that could have been modifying the table to terminate. Now
* we know that any subsequently-started transactions will see the index and
* insert their new tuples into it. We then take a new reference snapshot
* which is passed to validate_index(). Any tuples that are valid according
* to this snap, but are not in the index, must be added to the index.
* (Any tuples committed live after the snap will be inserted into the
* index by their originating transaction. Any tuples committed dead before
* the snap need not be indexed, because we will wait out all transactions
* that might care about them before we mark the index valid.)
*
* validate_index() works by first gathering all the TIDs currently in the
* index, using a bulkdelete callback that just stores the TIDs and doesn't
* ever say "delete it". (This should be faster than a plain indexscan;
* also, not all index AMs support full-index indexscan.) Then we sort the
* TIDs, and finally scan the table doing a "merge join" against the TID list
* to see which tuples are missing from the index. Thus we will ensure that
* all tuples valid according to the reference snapshot are in the index.
*
* Building a unique index this way is tricky: we might try to insert a
* tuple that is already dead or is in process of being deleted, and we
* mustn't have a uniqueness failure against an updated version of the same
* row. We could try to check the tuple to see if it's already dead and tell
* index_insert() not to do the uniqueness check, but that still leaves us
* with a race condition against an in-progress update. To handle that,
* we expect the index AM to recheck liveness of the to-be-inserted tuple
* before it declares a uniqueness error.
*
* After completing validate_index(), we wait until all transactions that
* were alive at the time of the reference snapshot are gone; this is
* necessary to be sure there are none left with a transaction snapshot
* older than the reference (and hence possibly able to see tuples we did
* not index). Then we mark the index "indisvalid" and commit. Subsequent
* transactions will be able to use it for queries.
*
* Doing two full table scans is a brute-force strategy. We could try to be
* cleverer, eg storing new tuples in a special area of the table (perhaps
* making the table append-only by setting use_fsm). However that would
* add yet more locking issues.
*/
void
validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
{
Relation heapRelation,
indexRelation;
IndexInfo *indexInfo;
IndexVacuumInfo ivinfo;
v_i_state state;
Oid save_userid;
int save_sec_context;
int save_nestlevel;
/* Open and lock the parent heap relation */
heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
/* And the target index relation */
indexRelation = index_open(indexId, RowExclusiveLock);
/*
* Fetch info needed for index_insert. (You might think this should be
* passed in from DefineIndex, but its copy is long gone due to having
* been built in a previous transaction.)
*/
indexInfo = BuildIndexInfo(indexRelation);
/* mark build is concurrent just for consistency */
indexInfo->ii_Concurrent = true;
/*
* Switch to the table owner's userid, so that any index functions are run
* as that user. Also lock down security-restricted operations and
* arrange to make GUC variable changes local to this command.
*/
GetUserIdAndSecContext(&save_userid, &save_sec_context);
SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
save_sec_context | SECURITY_RESTRICTED_OPERATION);
save_nestlevel = NewGUCNestLevel();
/*
* Scan the index and gather up all the TIDs into a tuplesort object.
*/
ivinfo.index = indexRelation;
ivinfo.analyze_only = false;
ivinfo.estimated_count = true;
ivinfo.message_level = DEBUG2;
ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
ivinfo.strategy = NULL;
state.tuplesort = tuplesort_begin_datum(TIDOID, TIDLessOperator,
InvalidOid, false,
maintenance_work_mem,
false);
state.htups = state.itups = state.tups_inserted = 0;
(void) index_bulk_delete(&ivinfo, NULL,
validate_index_callback, (void *) &state);
/* Execute the sort */
tuplesort_performsort(state.tuplesort);
/*
* Now scan the heap and "merge" it with the index
*/
validate_index_heapscan(heapRelation,
indexRelation,
indexInfo,
snapshot,
&state);
/* Done with tuplesort object */
tuplesort_end(state.tuplesort);
elog(DEBUG2,
"validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
state.htups, state.itups, state.tups_inserted);
/* Roll back any GUC changes executed by index functions */
AtEOXact_GUC(false, save_nestlevel);
/* Restore userid and security context */
SetUserIdAndSecContext(save_userid, save_sec_context);
/* Close rels, but keep locks */
index_close(indexRelation, NoLock);
heap_close(heapRelation, NoLock);
}
/*
* validate_index_callback - bulkdelete callback to collect the index TIDs
*/
static bool
validate_index_callback(ItemPointer itemptr, void *opaque)
{
v_i_state *state = (v_i_state *) opaque;
tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false);
state->itups += 1;
return false; /* never actually delete anything */
}
/*
* validate_index_heapscan - second table scan for concurrent index build
*
* This has much code in common with IndexBuildHeapScan, but it's enough
* different that it seems cleaner to have two routines not one.
*/
static void
validate_index_heapscan(Relation heapRelation,
Relation indexRelation,
IndexInfo *indexInfo,
Snapshot snapshot,
v_i_state *state)
{
HeapScanDesc scan;
HeapTuple heapTuple;
Datum values[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
List *predicate;
TupleTableSlot *slot;
EState *estate;
ExprContext *econtext;
BlockNumber root_blkno = InvalidBlockNumber;
OffsetNumber root_offsets[MaxHeapTuplesPerPage];
bool in_index[MaxHeapTuplesPerPage];
/* state variables for the merge */
ItemPointer indexcursor = NULL;
bool tuplesort_empty = false;
/*
* sanity checks
*/
Assert(OidIsValid(indexRelation->rd_rel->relam));
/*
* Need an EState for evaluation of index expressions and partial-index
* predicates. Also a slot to hold the current tuple.
*/
estate = CreateExecutorState();
econtext = GetPerTupleExprContext(estate);
slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
/* Arrange for econtext's scan tuple to be the tuple under test */
econtext->ecxt_scantuple = slot;
/* Set up execution state for predicate, if any. */
predicate = (List *)
ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
estate);
/*
* Prepare for scan of the base relation. We need just those tuples
* satisfying the passed-in reference snapshot. We must disable syncscan
* here, because it's critical that we read from block zero forward to
* match the sorted TIDs.
*/
scan = heap_beginscan_strat(heapRelation, /* relation */
snapshot, /* snapshot */
0, /* number of keys */
NULL, /* scan key */
true, /* buffer access strategy OK */
false); /* syncscan not OK */
/*
* Scan all tuples matching the snapshot.
*/
while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
ItemPointer heapcursor = &heapTuple->t_self;
ItemPointerData rootTuple;
OffsetNumber root_offnum;
CHECK_FOR_INTERRUPTS();
state->htups += 1;
/*
* As commented in IndexBuildHeapScan, we should index heap-only
* tuples under the TIDs of their root tuples; so when we advance onto
* a new heap page, build a map of root item offsets on the page.
*
* This complicates merging against the tuplesort output: we will
* visit the live tuples in order by their offsets, but the root
* offsets that we need to compare against the index contents might be
* ordered differently. So we might have to "look back" within the
* tuplesort output, but only within the current page. We handle that
* by keeping a bool array in_index[] showing all the
* already-passed-over tuplesort output TIDs of the current page. We
* clear that array here, when advancing onto a new heap page.
*/
if (scan->rs_cblock != root_blkno)
{
Page page = BufferGetPage(scan->rs_cbuf);
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
heap_get_root_tuples(page, root_offsets);
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
memset(in_index, 0, sizeof(in_index));
root_blkno = scan->rs_cblock;
}
/* Convert actual tuple TID to root TID */
rootTuple = *heapcursor;
root_offnum = ItemPointerGetOffsetNumber(heapcursor);
if (HeapTupleIsHeapOnly(heapTuple))
{
root_offnum = root_offsets[root_offnum - 1];
Assert(OffsetNumberIsValid(root_offnum));
ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
}
/*
* "merge" by skipping through the index tuples until we find or pass
* the current root tuple.
*/
while (!tuplesort_empty &&
(!indexcursor ||
ItemPointerCompare(indexcursor, &rootTuple) < 0))
{
Datum ts_val;
bool ts_isnull;
if (indexcursor)
{
/*
* Remember index items seen earlier on the current heap page
*/
if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
pfree(indexcursor);
}
tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
&ts_val, &ts_isnull);
Assert(tuplesort_empty || !ts_isnull);
indexcursor = (ItemPointer) DatumGetPointer(ts_val);
}
/*
* If the tuplesort has overshot *and* we didn't see a match earlier,
* then this tuple is missing from the index, so insert it.
*/
if ((tuplesort_empty ||
ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
!in_index[root_offnum - 1])
{
MemoryContextReset(econtext->ecxt_per_tuple_memory);
/* Set up for predicate or expression evaluation */
ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
/*
* In a partial index, discard tuples that don't satisfy the
* predicate.
*/
if (predicate != NIL)
{
if (!ExecQual(predicate, econtext, false))
continue;
}
/*
* For the current heap tuple, extract all the attributes we use
* in this index, and note which are null. This also performs
* evaluation of any expressions needed.
*/
FormIndexDatum(indexInfo,
slot,
estate,
values,
isnull);
/*
* You'd think we should go ahead and build the index tuple here,
* but some index AMs want to do further processing on the data
* first. So pass the values[] and isnull[] arrays, instead.
*/
/*
* If the tuple is already committed dead, you might think we
* could suppress uniqueness checking, but this is no longer true
* in the presence of HOT, because the insert is actually a proxy
* for a uniqueness check on the whole HOT-chain. That is, the
* tuple we have here could be dead because it was already
* HOT-updated, and if so the updating transaction will not have
* thought it should insert index entries. The index AM will
* check the whole HOT-chain and correctly detect a conflict if
* there is one.
*/
index_insert(indexRelation,
values,
isnull,
&rootTuple,
heapRelation,
indexInfo->ii_Unique ?
UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
state->tups_inserted += 1;
}
}
heap_endscan(scan);
ExecDropSingleTupleTableSlot(slot);
FreeExecutorState(estate);
/* These may have been pointing to the now-gone estate */
indexInfo->ii_ExpressionsState = NIL;
indexInfo->ii_PredicateState = NIL;
}
/*
* index_set_state_flags - adjust pg_index state flags
*
* This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
* flags that denote the index's state. Because the update is not
* transactional and will not roll back on error, this must only be used as
* the last step in a transaction that has not made any transactional catalog
* updates!
*
* Note that heap_inplace_update does send a cache inval message for the
* tuple, so other sessions will hear about the update as soon as we commit.
*
* NB: In releases prior to PostgreSQL 9.4, the use of a non-transactional
* update here would have been unsafe; now that MVCC rules apply even for
* system catalog scans, we could potentially use a transactional update here
* instead.
*/
void
index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
{
Relation pg_index;
HeapTuple indexTuple;
Form_pg_index indexForm;
/* Assert that current xact hasn't done any transactional updates */
Assert(GetTopTransactionIdIfAny() == InvalidTransactionId);
/* Open pg_index and fetch a writable copy of the index's tuple */
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
indexTuple = SearchSysCacheCopy1(INDEXRELID,
ObjectIdGetDatum(indexId));
if (!HeapTupleIsValid(indexTuple))
elog(ERROR, "cache lookup failed for index %u", indexId);
indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
/* Perform the requested state change on the copy */
switch (action)
{
case INDEX_CREATE_SET_READY:
/* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
Assert(indexForm->indislive);
Assert(!indexForm->indisready);
Assert(!indexForm->indisvalid);
indexForm->indisready = true;
break;
case INDEX_CREATE_SET_VALID:
/* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
Assert(indexForm->indislive);
Assert(indexForm->indisready);
Assert(!indexForm->indisvalid);
indexForm->indisvalid = true;
break;
case INDEX_DROP_CLEAR_VALID:
/*
* Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
*
* If indisready == true we leave it set so the index still gets
* maintained by active transactions. We only need to ensure that
* indisvalid is false. (We don't assert that either is initially
* true, though, since we want to be able to retry a DROP INDEX
* CONCURRENTLY that failed partway through.)
*
* Note: the CLUSTER logic assumes that indisclustered cannot be
* set on any invalid index, so clear that flag too.
*/
indexForm->indisvalid = false;
indexForm->indisclustered = false;
break;
case INDEX_DROP_SET_DEAD:
/*
* Clear indisready/indislive during DROP INDEX CONCURRENTLY
*
* We clear both indisready and indislive, because we not only
* want to stop updates, we want to prevent sessions from touching
* the index at all.
*/
Assert(!indexForm->indisvalid);
indexForm->indisready = false;
indexForm->indislive = false;
break;
}
/* ... and write it back in-place */
heap_inplace_update(pg_index, indexTuple);
heap_close(pg_index, RowExclusiveLock);
}
/*
* IndexGetRelation: given an index's relation OID, get the OID of the
* relation it is an index on. Uses the system cache.
*/
Oid
IndexGetRelation(Oid indexId, bool missing_ok)
{
HeapTuple tuple;
Form_pg_index index;
Oid result;
tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
if (!HeapTupleIsValid(tuple))
{
if (missing_ok)
return InvalidOid;
elog(ERROR, "cache lookup failed for index %u", indexId);
}
index = (Form_pg_index) GETSTRUCT(tuple);
Assert(index->indexrelid == indexId);
result = index->indrelid;
ReleaseSysCache(tuple);
return result;
}
/*
* reindex_index - This routine is used to recreate a single index
*/
void
reindex_index(Oid indexId, bool skip_constraint_checks)
{
Relation iRel,
heapRelation;
Oid heapId;
IndexInfo *indexInfo;
volatile bool skipped_constraint = false;
/*
* Open and lock the parent heap relation. ShareLock is sufficient since
* we only need to be sure no schema or data changes are going on.
*/
heapId = IndexGetRelation(indexId, false);
heapRelation = heap_open(heapId, ShareLock);
/*
* Open the target index relation and get an exclusive lock on it, to
* ensure that no one else is touching this particular index.
*/
iRel = index_open(indexId, AccessExclusiveLock);
/*
* Don't allow reindex on temp tables of other backends ... their local
* buffer manager is not going to cope.
*/
if (RELATION_IS_OTHER_TEMP(iRel))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot reindex temporary tables of other sessions")));
/*
* Also check for active uses of the index in the current transaction; we
* don't want to reindex underneath an open indexscan.
*/
CheckTableNotInUse(iRel, "REINDEX INDEX");
/*
* All predicate locks on the index are about to be made invalid. Promote
* them to relation locks on the heap.
*/
TransferPredicateLocksToHeapRelation(iRel);
PG_TRY();
{
/* Suppress use of the target index while rebuilding it */
SetReindexProcessing(heapId, indexId);
/* Fetch info needed for index_build */
indexInfo = BuildIndexInfo(iRel);
/* If requested, skip checking uniqueness/exclusion constraints */
if (skip_constraint_checks)
{
if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
skipped_constraint = true;
indexInfo->ii_Unique = false;
indexInfo->ii_ExclusionOps = NULL;
indexInfo->ii_ExclusionProcs = NULL;
indexInfo->ii_ExclusionStrats = NULL;
}
/* We'll build a new physical relation for the index */
RelationSetNewRelfilenode(iRel, InvalidTransactionId,
InvalidMultiXactId);
/* Initialize the index and rebuild */
/* Note: we do not need to re-establish pkey setting */
index_build(heapRelation, iRel, indexInfo, false, true);
}
PG_CATCH();
{
/* Make sure flag gets cleared on error exit */
ResetReindexProcessing();
PG_RE_THROW();
}
PG_END_TRY();
ResetReindexProcessing();
/*
* If the index is marked invalid/not-ready/dead (ie, it's from a failed
* CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
* and we didn't skip a uniqueness check, we can now mark it valid. This
* allows REINDEX to be used to clean up in such cases.
*
* We can also reset indcheckxmin, because we have now done a
* non-concurrent index build, *except* in the case where index_build
* found some still-broken HOT chains. If it did, and we don't have to
* change any of the other flags, we just leave indcheckxmin alone (note
* that index_build won't have changed it, because this is a reindex).
* This is okay and desirable because not updating the tuple leaves the
* index's usability horizon (recorded as the tuple's xmin value) the same
* as it was.
*
* But, if the index was invalid/not-ready/dead and there were broken HOT
* chains, we had better force indcheckxmin true, because the normal
* argument that the HOT chains couldn't conflict with the index is
* suspect for an invalid index. (A conflict is definitely possible if
* the index was dead. It probably shouldn't happen otherwise, but let's
* be conservative.) In this case advancing the usability horizon is
* appropriate.
*
* Another reason for avoiding unnecessary updates here is that while
* reindexing pg_index itself, we must not try to update tuples in it.
* pg_index's indexes should always have these flags in their clean state,
* so that won't happen.
*/
if (!skipped_constraint)
{
Relation pg_index;
HeapTuple indexTuple;
Form_pg_index indexForm;
bool index_bad;
pg_index = heap_open(IndexRelationId, RowExclusiveLock);
indexTuple = SearchSysCacheCopy1(INDEXRELID,
ObjectIdGetDatum(indexId));
if (!HeapTupleIsValid(indexTuple))
elog(ERROR, "cache lookup failed for index %u", indexId);
indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
index_bad = (!indexForm->indisvalid ||
!indexForm->indisready ||
!indexForm->indislive);
if (index_bad ||
(indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
{
if (!indexInfo->ii_BrokenHotChain)
indexForm->indcheckxmin = false;
else if (index_bad)
indexForm->indcheckxmin = true;
indexForm->indisvalid = true;
indexForm->indisready = true;
indexForm->indislive = true;
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
CatalogUpdateIndexes(pg_index, indexTuple);
/*
* Invalidate the relcache for the table, so that after we commit
* all sessions will refresh the table's index list. This ensures
* that if anyone misses seeing the pg_index row during this
* update, they'll refresh their list before attempting any update
* on the table.
*/
CacheInvalidateRelcache(heapRelation);
}
heap_close(pg_index, RowExclusiveLock);
}
/* Close rels, but keep locks */
index_close(iRel, NoLock);
heap_close(heapRelation, NoLock);
}
/*
* reindex_relation - This routine is used to recreate all indexes
* of a relation (and optionally its toast relation too, if any).
*
* "flags" is a bitmask that can include any combination of these bits:
*
* REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
*
* REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
* rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
* indexes are inconsistent with it. This makes things tricky if the relation
* is a system catalog that we might consult during the reindexing. To deal
* with that case, we mark all of the indexes as pending rebuild so that they
* won't be trusted until rebuilt. The caller is required to call us *without*
* having made the rebuilt table visible by doing CommandCounterIncrement;
* we'll do CCI after having collected the index list. (This way we can still
* use catalog indexes while collecting the list.)
*
* REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
* constraint conditions, else don't. To avoid deadlocks, VACUUM FULL or
* CLUSTER on a system catalog must omit this flag. REINDEX should be used to
* rebuild an index if constraint inconsistency is suspected. For optimal
* performance, other callers should include the flag only after transforming
* the data in a manner that risks a change in constraint validity.
*
* Returns true if any indexes were rebuilt (including toast table's index
* when relevant). Note that a CommandCounterIncrement will occur after each
* index rebuild.
*/
bool
reindex_relation(Oid relid, int flags)
{
Relation rel;
Oid toast_relid;
List *indexIds;
bool is_pg_class;
bool result;
/*
* Open and lock the relation. ShareLock is sufficient since we only need
* to prevent schema and data changes in it. The lock level used here
* should match ReindexTable().
*/
rel = heap_open(relid, ShareLock);
toast_relid = rel->rd_rel->reltoastrelid;
/*
* Get the list of index OIDs for this relation. (We trust to the
* relcache to get this with a sequential scan if ignoring system
* indexes.)
*/
indexIds = RelationGetIndexList(rel);
/*
* reindex_index will attempt to update the pg_class rows for the relation
* and index. If we are processing pg_class itself, we want to make sure
* that the updates do not try to insert index entries into indexes we
* have not processed yet. (When we are trying to recover from corrupted
* indexes, that could easily cause a crash.) We can accomplish this
* because CatalogUpdateIndexes will use the relcache's index list to know
* which indexes to update. We just force the index list to be only the
* stuff we've processed.
*
* It is okay to not insert entries into the indexes we have not processed
* yet because all of this is transaction-safe. If we fail partway
* through, the updated rows are dead and it doesn't matter whether they
* have index entries. Also, a new pg_class index will be created with a
* correct entry for its own pg_class row because we do
* RelationSetNewRelfilenode() before we do index_build().
*
* Note that we also clear pg_class's rd_oidindex until the loop is done,
* so that that index can't be accessed either. This means we cannot
* safely generate new relation OIDs while in the loop; shouldn't be a
* problem.
*/
is_pg_class = (RelationGetRelid(rel) == RelationRelationId);
/* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
if (is_pg_class)
(void) RelationGetIndexAttrBitmap(rel, false);
PG_TRY();
{
List *doneIndexes;
ListCell *indexId;
if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
{
/* Suppress use of all the indexes until they are rebuilt */
SetReindexPending(indexIds);
/*
* Make the new heap contents visible --- now things might be
* inconsistent!
*/
CommandCounterIncrement();
}
/* Reindex all the indexes. */
doneIndexes = NIL;
foreach(indexId, indexIds)
{
Oid indexOid = lfirst_oid(indexId);
if (is_pg_class)
RelationSetIndexList(rel, doneIndexes, InvalidOid);
reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS));
CommandCounterIncrement();
/* Index should no longer be in the pending list */
Assert(!ReindexIsProcessingIndex(indexOid));
if (is_pg_class)
doneIndexes = lappend_oid(doneIndexes, indexOid);
}
}
PG_CATCH();
{
/* Make sure list gets cleared on error exit */
ResetReindexPending();
PG_RE_THROW();
}
PG_END_TRY();
ResetReindexPending();
if (is_pg_class)
RelationSetIndexList(rel, indexIds, ClassOidIndexId);
/*
* Close rel, but continue to hold the lock.
*/
heap_close(rel, NoLock);
result = (indexIds != NIL);
/*
* If the relation has a secondary toast rel, reindex that too while we
* still hold the lock on the master table.
*/
if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
result |= reindex_relation(toast_relid, flags);
return result;
}
/* ----------------------------------------------------------------
* System index reindexing support
*
* When we are busy reindexing a system index, this code provides support
* for preventing catalog lookups from using that index. We also make use
* of this to catch attempted uses of user indexes during reindexing of
* those indexes.
* ----------------------------------------------------------------
*/
static Oid currentlyReindexedHeap = InvalidOid;
static Oid currentlyReindexedIndex = InvalidOid;
static List *pendingReindexedIndexes = NIL;
/*
* ReindexIsProcessingHeap
* True if heap specified by OID is currently being reindexed.
*/
bool
ReindexIsProcessingHeap(Oid heapOid)
{
return heapOid == currentlyReindexedHeap;
}
/*
* ReindexIsCurrentlyProcessingIndex
* True if index specified by OID is currently being reindexed.
*/
static bool
ReindexIsCurrentlyProcessingIndex(Oid indexOid)
{
return indexOid == currentlyReindexedIndex;
}
/*
* ReindexIsProcessingIndex
* True if index specified by OID is currently being reindexed,
* or should be treated as invalid because it is awaiting reindex.
*/
bool
ReindexIsProcessingIndex(Oid indexOid)
{
return indexOid == currentlyReindexedIndex ||
list_member_oid(pendingReindexedIndexes, indexOid);
}
/*
* SetReindexProcessing
* Set flag that specified heap/index are being reindexed.
*
* NB: caller must use a PG_TRY block to ensure ResetReindexProcessing is done.
*/
static void
SetReindexProcessing(Oid heapOid, Oid indexOid)
{
Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
/* Reindexing is not re-entrant. */
if (OidIsValid(currentlyReindexedHeap))
elog(ERROR, "cannot reindex while reindexing");
currentlyReindexedHeap = heapOid;
currentlyReindexedIndex = indexOid;
/* Index is no longer "pending" reindex. */
RemoveReindexPending(indexOid);
}
/*
* ResetReindexProcessing
* Unset reindexing status.
*/
static void
ResetReindexProcessing(void)
{
currentlyReindexedHeap = InvalidOid;
currentlyReindexedIndex = InvalidOid;
}
/*
* SetReindexPending
* Mark the given indexes as pending reindex.
*
* NB: caller must use a PG_TRY block to ensure ResetReindexPending is done.
* Also, we assume that the current memory context stays valid throughout.
*/
static void
SetReindexPending(List *indexes)
{
/* Reindexing is not re-entrant. */
if (pendingReindexedIndexes)
elog(ERROR, "cannot reindex while reindexing");
pendingReindexedIndexes = list_copy(indexes);
}
/*
* RemoveReindexPending
* Remove the given index from the pending list.
*/
static void
RemoveReindexPending(Oid indexOid)
{
pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
indexOid);
}
/*
* ResetReindexPending
* Unset reindex-pending status.
*/
static void
ResetReindexPending(void)
{
pendingReindexedIndexes = NIL;
}