diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 80ed7d829b..20604d73e4 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -1,4 +1,4 @@
-
+
@@ -2420,6 +2420,17 @@
If true, the table was last clustered on this index
+
+ indisvalid
+ bool
+
+ If true, the index is currently valid for queries.
+ False means the index is possibly incomplete: it must still be
+ inserted into by INSERT/UPDATE operations, but it cannot safely be
+ used for queries, and if it is unique, the uniqueness shouldn't be
+ relied on either.
+
+
indkeyint2vector
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml
index 1afa120766..10ab84278d 100644
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -1,4 +1,4 @@
-
+
Index Access Method Interface Definition
@@ -648,6 +648,16 @@ amrestrpos (IndexScanDesc scan);
+
+ Furthermore, immediately before raising a uniqueness violation
+ according to the above rules, the access method must recheck the
+ liveness of the row being inserted. If it is committed dead then
+ no error should be raised. (This case cannot occur during the
+ ordinary scenario of inserting a row that's just been created by
+ the current transaction. It can happen during
+ CREATE UNIQUE INDEX CONCURRENTLY>, however.)
+
+
We require the index access method to apply these tests itself, which
means that it must reach into the heap to check the commit status of
diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml
index 10669c0155..223bb81cfc 100644
--- a/doc/src/sgml/indices.sgml
+++ b/doc/src/sgml/indices.sgml
@@ -1,4 +1,4 @@
-
+
Indexes
@@ -90,6 +90,17 @@ CREATE INDEX test1_id_index ON test1 (id);
significantly speed up queries with joins.
+
+ Creating an index on a large table can take a long time. By default,
+ PostgreSQL allows reads (selects) to occur
+ on the table in parallel with index creation, but writes (inserts,
+ updates, deletes) are blocked until the index build is finished.
+ It is possible to allow writes to occur in parallel with index
+ creation, but there are several caveats to be aware of —
+ for more information see .
+
+
After an index is created, the system has to keep it synchronized with the
table. This adds overhead to data manipulation operations.
diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml
index 8ebb820519..1a1e95db50 100644
--- a/doc/src/sgml/mvcc.sgml
+++ b/doc/src/sgml/mvcc.sgml
@@ -1,4 +1,4 @@
-
+
Concurrency Control
@@ -622,7 +622,8 @@ SELECT SUM(value) FROM mytab WHERE class = 2;
- Acquired by VACUUM (without ).
+ Acquired by VACUUM (without )
+ and by CREATE INDEX CONCURRENTLY>.
@@ -641,7 +642,8 @@ SELECT SUM(value) FROM mytab WHERE class = 2;
- Acquired by CREATE INDEX.
+ Acquired by CREATE INDEX
+ (without ).
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 20a89713bf..be0ca63f2c 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -1,5 +1,5 @@
@@ -20,7 +20,7 @@ PostgreSQL documentation
-CREATE [ UNIQUE ] INDEX name ON table [ USING method ]
+CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] name ON table [ USING method ]
( { column | ( expression ) } [ opclass ] [, ...] )
[ WITH ( storage_parameter = value [, ... ] ) ]
[ TABLESPACE tablespace ]
@@ -110,6 +110,21 @@ CREATE [ UNIQUE ] INDEX name ON
+
+ CONCURRENTLY
+
+
+ When this option is used, PostgreSQL> will build the
+ index without taking any locks that prevent concurrent inserts,
+ updates, or deletes on the table; whereas a standard index build
+ locks out writes (but not reads) on the table until it's done.
+ There are several caveats to be aware of when using this option
+ — see .
+
+
+
+
name
@@ -239,6 +254,82 @@ CREATE [ UNIQUE ] INDEX name ON
+
+
+ Building Indexes Concurrently
+
+
+ index
+ building concurrently
+
+
+
+ Creating an index for a large table can be a long operation. In large data
+ warehousing applications it can easily take hours or even days to build
+ indexes. It's important to understand the impact creating indexes has on a
+ system.
+
+
+
+ Normally PostgreSQL> locks the table to be indexed against
+ writes and performs the entire index build with a single scan of the
+ table. Other transactions can still read the table, but if they try to
+ insert, update, or delete rows in the table they will block until the
+ index build is finished.
+
+
+
+ PostgreSQL> also supports building indexes without locking
+ out writes. This method is invoked by specifying the
+ CONCURRENTLY> option of CREATE INDEX>.
+ When this option is used,
+ PostgreSQL> must perform two scans of the table, and in
+ addition it must wait for all existing transactions to terminate. Thus
+ this method requires more total work than a standard index build and takes
+ significantly longer to complete. However, since it allows normal
+ operations to continue while the index is built, this method is useful for
+ adding new indexes in a production environment. Of course, the extra CPU
+ and I/O load imposed by the index creation may slow other operations.
+
+
+
+ If a problem arises during the second scan of the table, such as a
+ uniqueness violation in a unique index, the CREATE INDEX>
+ command will fail but leave behind an invalid> index. This index
+ will be ignored for querying purposes because it may be incomplete;
+ however it will still consume update overhead. The recommended recovery
+ method in such cases is to drop the index and try again to perform
+ CREATE INDEX CONCURRENTLY>. (Another possibility is to rebuild
+ the index with REINDEX>. However, since REINDEX>
+ does not support concurrent builds, this option is unlikely to seem
+ attractive.)
+
+
+
+ Another caveat when building a unique index concurrently is that the
+ uniqueness constraint is already being enforced against other transactions
+ when the second table scan begins. This means that constraint violations
+ could be reported in other queries prior to the index becoming available
+ for use, or even in cases where the index build eventually fails. Also,
+ if a failure does occur in the second scan, the invalid> index
+ continues to enforce its uniqueness constraint afterwards.
+
+
+
+ Concurrent builds of expression indexes and partial indexes are supported.
+ Errors occurring in the evaluation of these expressions could cause
+ behavior similar to that described above for unique constraint violations.
+
+
+
+ Regular index builds permit other regular index builds on the
+ same table to occur in parallel, but only one concurrent index build
+ can occur on a table at a time. In both cases, no other types of schema
+ modification on the table are allowed meanwhile. Another difference
+ is that a regular CREATE INDEX> command can be performed within
+ a transaction block, but CREATE INDEX CONCURRENTLY> cannot.
+
+
@@ -339,15 +430,22 @@ Is this example correct?
To create a GiST index on a point attribute so that we
can efficiently use box operators on the result of the
conversion function:
-
CREATE INDEX pointloc
ON points USING GIST (point2box(location) box_ops);
SELECT * FROM points
WHERE point2box(points.pointloc) = boxes.box;
+
-->
+
+ To create an index without locking out writes to the table:
+
+CREATE INDEX CONCURRENTLY sales_quantity_index ON sales_table (quantity);
+
+
+
diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml
index dbe10ca762..9502a0daf5 100644
--- a/doc/src/sgml/ref/reindex.sgml
+++ b/doc/src/sgml/ref/reindex.sgml
@@ -1,5 +1,5 @@
@@ -30,7 +30,7 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } nam
REINDEX rebuilds an index using the data
stored in the index's table, replacing the old copy of the index. There are
- three main reasons to use REINDEX:
+ several scenarios in which to use REINDEX:
@@ -61,6 +61,18 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } nam
for an index, and wish to ensure that the change has taken full effect.
+
+
+
+ An index build with the CONCURRENTLY> option failed, leaving
+ an invalid> index. Such indexes are useless but it can be
+ convenient to use REINDEX> to rebuild them. Note that
+ REINDEX> will not perform a concurrent build. To build the
+ index without interfering with production you should drop the index and
+ reissue the CREATE INDEX CONCURRENTLY> command.
+
+
+
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index 597949aa2d..910d654443 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.142 2006/07/25 19:13:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.143 2006/08/25 04:06:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -244,8 +244,33 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
}
/*
- * Otherwise we have a definite conflict.
+ * Otherwise we have a definite conflict. But before
+ * complaining, look to see if the tuple we want to insert
+ * is itself now committed dead --- if so, don't complain.
+ * This is a waste of time in normal scenarios but we must
+ * do it to support CREATE INDEX CONCURRENTLY.
*/
+ htup.t_self = itup->t_tid;
+ if (heap_fetch(heapRel, SnapshotSelf, &htup, &hbuffer,
+ false, NULL))
+ {
+ /* Normal case --- it's still live */
+ ReleaseBuffer(hbuffer);
+ }
+ else if (htup.t_data != NULL)
+ {
+ /*
+ * It's been deleted, so no error, and no need to
+ * continue searching
+ */
+ break;
+ }
+ else
+ {
+ /* couldn't find the tuple?? */
+ elog(ERROR, "failed to fetch tuple being inserted");
+ }
+
ereport(ERROR,
(errcode(ERRCODE_UNIQUE_VIOLATION),
errmsg("duplicate key violates unique constraint \"%s\"",
diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y
index b25ea11a0b..6300eafb9a 100644
--- a/src/backend/bootstrap/bootparse.y
+++ b/src/backend/bootstrap/bootparse.y
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/bootstrap/bootparse.y,v 1.83 2006/07/31 01:16:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/bootstrap/bootparse.y,v 1.84 2006/08/25 04:06:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -257,7 +257,7 @@ Boot_DeclareIndexStmt:
$10,
NULL, NIL, NIL,
false, false, false,
- false, false, true, false);
+ false, false, true, false, false);
do_end();
}
;
@@ -275,7 +275,7 @@ Boot_DeclareUniqueIndexStmt:
$11,
NULL, NIL, NIL,
true, false, false,
- false, false, true, false);
+ false, false, true, false, false);
do_end();
}
;
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 0da209ff21..60a30ce372 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.272 2006/07/31 20:09:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.273 2006/08/25 04:06:46 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -34,6 +34,7 @@
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "catalog/pg_constraint.h"
+#include "catalog/pg_operator.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
@@ -49,8 +50,17 @@
#include "utils/memutils.h"
#include "utils/relcache.h"
#include "utils/syscache.h"
+#include "utils/tuplesort.h"
+/* state info for validate_index bulkdelete callback */
+typedef struct
+{
+ Tuplesortstate *tuplesort; /* for sorting the index TIDs */
+ /* statistics (for debug purposes only): */
+ double htups, itups, tups_inserted;
+} v_i_state;
+
/* non-export function prototypes */
static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
IndexInfo *indexInfo,
@@ -61,9 +71,16 @@ static void AppendAttributeTuples(Relation indexRelation, int numatts);
static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
IndexInfo *indexInfo,
Oid *classOids,
- bool primary);
+ bool primary,
+ bool isvalid);
static void index_update_stats(Relation rel, bool hasindex, bool isprimary,
Oid reltoastidxid, double reltuples);
+static bool validate_index_callback(ItemPointer itemptr, void *opaque);
+static void validate_index_heapscan(Relation heapRelation,
+ Relation indexRelation,
+ IndexInfo *indexInfo,
+ Snapshot snapshot,
+ v_i_state *state);
static Oid IndexGetRelation(Oid indexId);
@@ -308,6 +325,8 @@ AppendAttributeTuples(Relation indexRelation, int numatts)
/* ----------------------------------------------------------------
* UpdateIndexRelation
+ *
+ * Construct and insert a new entry in the pg_index catalog
* ----------------------------------------------------------------
*/
static void
@@ -315,7 +334,8 @@ UpdateIndexRelation(Oid indexoid,
Oid heapoid,
IndexInfo *indexInfo,
Oid *classOids,
- bool primary)
+ bool primary,
+ bool isvalid)
{
int2vector *indkey;
oidvector *indclass;
@@ -383,6 +403,7 @@ UpdateIndexRelation(Oid indexoid,
values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
+ values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
values[Anum_pg_index_indexprs - 1] = exprsDatum;
@@ -427,7 +448,10 @@ UpdateIndexRelation(Oid indexoid,
* isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint
* allow_system_table_mods: allow table to be a system catalog
* skip_build: true to skip the index_build() step for the moment; caller
- * must do it later (typically via reindex_index())
+ * must do it later (typically via reindex_index())
+ * concurrent: if true, do not lock the table against writers. The index
+ * will be marked "invalid" and the caller must take additional steps
+ * to fix it up.
*
* Returns OID of the created index.
*/
@@ -443,7 +467,8 @@ index_create(Oid heapRelationId,
bool isprimary,
bool isconstraint,
bool allow_system_table_mods,
- bool skip_build)
+ bool skip_build,
+ bool concurrent)
{
Relation pg_class;
Relation heapRelation;
@@ -456,9 +481,12 @@ index_create(Oid heapRelationId,
pg_class = heap_open(RelationRelationId, RowExclusiveLock);
/*
- * Only SELECT ... FOR UPDATE/SHARE are allowed while doing this
+ * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
+ * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
+ * (but not VACUUM).
*/
- heapRelation = heap_open(heapRelationId, ShareLock);
+ heapRelation = heap_open(heapRelationId,
+ (concurrent ? ShareUpdateExclusiveLock : ShareLock));
/*
* The index will be in the same namespace as its parent table, and is
@@ -480,6 +508,16 @@ index_create(Oid heapRelationId,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("user-defined indexes on system catalog tables are not supported")));
+ /*
+ * concurrent index build on a system catalog is unsafe because we tend
+ * to release locks before committing in catalogs
+ */
+ if (concurrent &&
+ IsSystemRelation(heapRelation))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("concurrent index creation on system catalog tables is not supported")));
+
/*
* We cannot allow indexing a shared relation after initdb (because
* there's no way to make the entry in other databases' pg_class).
@@ -578,7 +616,7 @@ index_create(Oid heapRelationId,
* ----------------
*/
UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
- classObjectId, isprimary);
+ classObjectId, isprimary, !concurrent);
/*
* Register constraint and dependencies for the index.
@@ -745,9 +783,8 @@ index_create(Oid heapRelationId,
}
/*
- * Close the heap and index; but we keep the ShareLock on the heap and
- * the exclusive lock on the index that we acquired above, until end of
- * transaction.
+ * Close the heap and index; but we keep the locks that we acquired above
+ * until end of transaction.
*/
index_close(indexRelation, NoLock);
heap_close(heapRelation, NoLock);
@@ -895,6 +932,7 @@ BuildIndexInfo(Relation index)
/* other info */
ii->ii_Unique = indexStruct->indisunique;
+ ii->ii_Concurrent = false; /* assume normal case */
return ii;
}
@@ -1327,13 +1365,22 @@ IndexBuildHeapScan(Relation heapRelation,
estate);
/*
- * Ok, begin our scan of the base relation. We use SnapshotAny because we
- * must retrieve all tuples and do our own time qual checks.
+ * Prepare for scan of the base relation. In a normal index build,
+ * we use SnapshotAny because we must retrieve all tuples and do our own
+ * time qual checks (because we have to index RECENTLY_DEAD tuples).
+ * In a concurrent build, we take a regular MVCC snapshot and index
+ * whatever's live according to that. During bootstrap we just use
+ * SnapshotNow.
*/
if (IsBootstrapProcessingMode())
{
snapshot = SnapshotNow;
- OldestXmin = InvalidTransactionId;
+ OldestXmin = InvalidTransactionId; /* not used */
+ }
+ else if (indexInfo->ii_Concurrent)
+ {
+ snapshot = CopySnapshot(GetTransactionSnapshot());
+ OldestXmin = InvalidTransactionId; /* not used */
}
else
{
@@ -1344,8 +1391,8 @@ IndexBuildHeapScan(Relation heapRelation,
scan = heap_beginscan(heapRelation, /* relation */
snapshot, /* seeself */
- 0, /* number of keys */
- NULL); /* scan key */
+ 0, /* number of keys */
+ NULL); /* scan key */
reltuples = 0;
@@ -1374,10 +1421,12 @@ IndexBuildHeapScan(Relation heapRelation,
scan->rs_cbuf))
{
case HEAPTUPLE_DEAD:
+ /* Definitely dead, we can ignore it */
indexIt = false;
tupleIsAlive = false;
break;
case HEAPTUPLE_LIVE:
+ /* Normal case, index and unique-check it */
indexIt = true;
tupleIsAlive = true;
break;
@@ -1388,6 +1437,7 @@ IndexBuildHeapScan(Relation heapRelation,
* anyway to preserve MVCC semantics. (Pre-existing
* transactions could try to use the index after we
* finish building it, and may need to see such tuples.)
+ * Exclude it from unique-checking, however.
*/
indexIt = true;
tupleIsAlive = false;
@@ -1499,6 +1549,309 @@ IndexBuildHeapScan(Relation heapRelation,
}
+/*
+ * validate_index - support code for concurrent index builds
+ *
+ * We do a concurrent index build by first building the index normally via
+ * index_create(), while holding a weak lock that allows concurrent
+ * insert/update/delete. Also, we index only tuples that are valid
+ * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
+ * build takes care to include recently-dead tuples. This is OK because
+ * we won't mark the index valid until all transactions that might be able
+ * to see those tuples are gone. The reason for doing that is to avoid
+ * bogus unique-index failures due to concurrent UPDATEs (we might see
+ * different versions of the same row as being valid when we pass over them,
+ * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
+ * does not contain any tuples added to the table while we built the index.
+ *
+ * Next, we commit the transaction so that the index becomes visible to other
+ * backends, but it is marked not "indisvalid" to prevent the planner from
+ * relying on it for indexscans. Then we wait for all transactions that
+ * could have been modifying the table to terminate. At this point we
+ * know that any subsequently-started transactions will see the index and
+ * insert their new tuples into it. We then take a new reference snapshot
+ * which is passed to validate_index(). Any tuples that are valid according
+ * to this snap, but are not in the index, must be added to the index.
+ * (Any tuples committed live after the snap will be inserted into the
+ * index by their originating transaction. Any tuples committed dead before
+ * the snap need not be indexed, because we will wait out all transactions
+ * that might care about them before we mark the index valid.)
+ *
+ * validate_index() works by first gathering all the TIDs currently in the
+ * index, using a bulkdelete callback that just stores the TIDs and doesn't
+ * ever say "delete it". (This should be faster than a plain indexscan;
+ * also, not all index AMs support full-index indexscan.) Then we sort the
+ * TIDs, and finally scan the table doing a "merge join" against the TID list
+ * to see which tuples are missing from the index. Thus we will ensure that
+ * all tuples valid according to the reference snapshot are in the index.
+ *
+ * Building a unique index this way is tricky: we might try to insert a
+ * tuple that is already dead or is in process of being deleted, and we
+ * mustn't have a uniqueness failure against an updated version of the same
+ * row. We can check the tuple to see if it's already dead and tell
+ * index_insert() not to do the uniqueness check, but that still leaves us
+ * with a race condition against an in-progress update. To handle that,
+ * we expect the index AM to recheck liveness of the to-be-inserted tuple
+ * before it declares a uniqueness error.
+ *
+ * After completing validate_index(), we wait until all transactions that
+ * were alive at the time of the reference snapshot are gone; this is
+ * necessary to be sure there are none left with a serializable snapshot
+ * older than the reference (and hence possibly able to see tuples we did
+ * not index). Then we mark the index valid and commit.
+ *
+ * Doing two full table scans is a brute-force strategy. We could try to be
+ * cleverer, eg storing new tuples in a special area of the table (perhaps
+ * making the table append-only by setting use_fsm). However that would
+ * add yet more locking issues.
+ */
+void
+validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
+{
+ Relation heapRelation, indexRelation;
+ IndexInfo *indexInfo;
+ IndexVacuumInfo ivinfo;
+ v_i_state state;
+
+ /* Open and lock the parent heap relation */
+ heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
+ /* And the target index relation */
+ indexRelation = index_open(indexId, RowExclusiveLock);
+
+ /*
+ * Fetch info needed for index_insert. (You might think this should
+ * be passed in from DefineIndex, but its copy is long gone due to
+ * having been built in a previous transaction.)
+ */
+ indexInfo = BuildIndexInfo(indexRelation);
+
+ /* mark build is concurrent just for consistency */
+ indexInfo->ii_Concurrent = true;
+
+ /*
+ * Scan the index and gather up all the TIDs into a tuplesort object.
+ */
+ ivinfo.index = indexRelation;
+ ivinfo.vacuum_full = false;
+ ivinfo.message_level = DEBUG2;
+ ivinfo.num_heap_tuples = -1;
+
+ state.tuplesort = tuplesort_begin_datum(TIDOID,
+ TIDLessOperator,
+ maintenance_work_mem,
+ false);
+ state.htups = state.itups = state.tups_inserted = 0;
+
+ (void) index_bulk_delete(&ivinfo, NULL,
+ validate_index_callback, (void *) &state);
+
+ /* Execute the sort */
+ tuplesort_performsort(state.tuplesort);
+
+ /*
+ * Now scan the heap and "merge" it with the index
+ */
+ validate_index_heapscan(heapRelation,
+ indexRelation,
+ indexInfo,
+ snapshot,
+ &state);
+
+ /* Done with tuplesort object */
+ tuplesort_end(state.tuplesort);
+
+ elog(DEBUG2,
+ "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
+ state.htups, state.itups, state.tups_inserted);
+
+ /* Close rels, but keep locks */
+ index_close(indexRelation, NoLock);
+ heap_close(heapRelation, NoLock);
+}
+
+/*
+ * validate_index_callback - bulkdelete callback to collect the index TIDs
+ */
+static bool
+validate_index_callback(ItemPointer itemptr, void *opaque)
+{
+ v_i_state *state = (v_i_state *) opaque;
+
+ tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false);
+ state->itups += 1;
+ return false; /* never actually delete anything */
+}
+
+/*
+ * validate_index_heapscan - second table scan for concurrent index build
+ *
+ * This has much code in common with IndexBuildHeapScan, but it's enough
+ * different that it seems cleaner to have two routines not one.
+ */
+static void
+validate_index_heapscan(Relation heapRelation,
+ Relation indexRelation,
+ IndexInfo *indexInfo,
+ Snapshot snapshot,
+ v_i_state *state)
+{
+ HeapScanDesc scan;
+ HeapTuple heapTuple;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ List *predicate;
+ TupleTableSlot *slot;
+ EState *estate;
+ ExprContext *econtext;
+ /* state variables for the merge */
+ ItemPointer indexcursor = NULL;
+ bool tuplesort_empty = false;
+
+ /*
+ * sanity checks
+ */
+ Assert(OidIsValid(indexRelation->rd_rel->relam));
+
+ /*
+ * Need an EState for evaluation of index expressions and partial-index
+ * predicates. Also a slot to hold the current tuple.
+ */
+ estate = CreateExecutorState();
+ econtext = GetPerTupleExprContext(estate);
+ slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /* Set up execution state for predicate, if any. */
+ predicate = (List *)
+ ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
+ estate);
+
+ /*
+ * Prepare for scan of the base relation. We need just those tuples
+ * satisfying the passed-in reference snapshot.
+ */
+ scan = heap_beginscan(heapRelation, /* relation */
+ snapshot, /* seeself */
+ 0, /* number of keys */
+ NULL); /* scan key */
+
+ /*
+ * Scan all tuples matching the snapshot.
+ */
+ while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ ItemPointer heapcursor = &heapTuple->t_self;
+
+ CHECK_FOR_INTERRUPTS();
+
+ state->htups += 1;
+
+ /*
+ * "merge" by skipping through the index tuples until we find or
+ * pass the current heap tuple.
+ */
+ while (!tuplesort_empty &&
+ (!indexcursor ||
+ ItemPointerCompare(indexcursor, heapcursor) < 0))
+ {
+ Datum ts_val;
+ bool ts_isnull;
+
+ if (indexcursor)
+ pfree(indexcursor);
+ tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
+ &ts_val, &ts_isnull);
+ Assert(tuplesort_empty || !ts_isnull);
+ indexcursor = (ItemPointer) DatumGetPointer(ts_val);
+ }
+
+ if (tuplesort_empty ||
+ ItemPointerCompare(indexcursor, heapcursor) > 0)
+ {
+ /*
+ * We've overshot which means this heap tuple is missing from the
+ * index, so insert it.
+ */
+ bool check_unique;
+
+ MemoryContextReset(econtext->ecxt_per_tuple_memory);
+
+ /* Set up for predicate or expression evaluation */
+ ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
+
+ /*
+ * In a partial index, discard tuples that don't satisfy the
+ * predicate.
+ */
+ if (predicate != NIL)
+ {
+ if (!ExecQual(predicate, econtext, false))
+ continue;
+ }
+
+ /*
+ * For the current heap tuple, extract all the attributes we use
+ * in this index, and note which are null. This also performs
+ * evaluation of any expressions needed.
+ */
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ values,
+ isnull);
+
+ /*
+ * If the tuple is already committed dead, we still have to
+ * put it in the index (because some xacts might be able to
+ * see it), but we might as well suppress uniqueness checking.
+ * This is just an optimization because the index AM is not
+ * supposed to raise a uniqueness failure anyway.
+ */
+ if (indexInfo->ii_Unique)
+ {
+ /* must hold a buffer lock to call HeapTupleSatisfiesNow */
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ if (HeapTupleSatisfiesNow(heapTuple->t_data, scan->rs_cbuf))
+ check_unique = true;
+ else
+ check_unique = false;
+
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+ }
+ else
+ check_unique = false;
+
+ /*
+ * You'd think we should go ahead and build the index tuple here,
+ * but some index AMs want to do further processing on the data
+ * first. So pass the values[] and isnull[] arrays, instead.
+ */
+ index_insert(indexRelation,
+ values,
+ isnull,
+ heapcursor,
+ heapRelation,
+ check_unique);
+
+ state->tups_inserted += 1;
+ }
+ }
+
+ heap_endscan(scan);
+
+ ExecDropSingleTupleTableSlot(slot);
+
+ FreeExecutorState(estate);
+
+ /* These may have been pointing to the now-gone estate */
+ indexInfo->ii_ExpressionsState = NIL;
+ indexInfo->ii_PredicateState = NIL;
+}
+
+
/*
* IndexGetRelation: given an index's relation OID, get the OID of the
* relation it is an index on. Uses the system cache.
@@ -1530,9 +1883,12 @@ void
reindex_index(Oid indexId)
{
Relation iRel,
- heapRelation;
+ heapRelation,
+ pg_index;
Oid heapId;
bool inplace;
+ HeapTuple indexTuple;
+ Form_pg_index indexForm;
/*
* Open and lock the parent heap relation. ShareLock is sufficient since
@@ -1600,6 +1956,28 @@ reindex_index(Oid indexId)
PG_END_TRY();
ResetReindexProcessing();
+ /*
+ * If the index is marked invalid (ie, it's from a failed CREATE INDEX
+ * CONCURRENTLY), we can now mark it valid. This allows REINDEX to be
+ * used to clean up in such cases.
+ */
+ pg_index = heap_open(IndexRelationId, RowExclusiveLock);
+
+ indexTuple = SearchSysCacheCopy(INDEXRELID,
+ ObjectIdGetDatum(indexId),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", indexId);
+ indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+ if (!indexForm->indisvalid)
+ {
+ indexForm->indisvalid = true;
+ simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
+ CatalogUpdateIndexes(pg_index, indexTuple);
+ }
+ heap_close(pg_index, RowExclusiveLock);
+
/* Close rels, but keep locks */
index_close(iRel, NoLock);
heap_close(heapRelation, NoLock);
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index eebe602ee0..aa3e7b8fef 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/catalog/toasting.c,v 1.1 2006/07/31 01:16:37 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/toasting.c,v 1.2 2006/08/25 04:06:47 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -218,6 +218,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid)
indexInfo->ii_Predicate = NIL;
indexInfo->ii_PredicateState = NIL;
indexInfo->ii_Unique = true;
+ indexInfo->ii_Concurrent = false;
classObjectId[0] = OID_BTREE_OPS_OID;
classObjectId[1] = INT4_BTREE_OPS_OID;
@@ -227,7 +228,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid)
BTREE_AM_OID,
rel->rd_rel->reltablespace,
classObjectId, (Datum) 0,
- true, false, true, false);
+ true, false, true, false, false);
/*
* Store the toast table's OID in the parent relation's pg_class row
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 97a5a72199..e96235c6f5 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.146 2006/07/31 01:16:37 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.147 2006/08/25 04:06:48 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -18,6 +18,7 @@
#include "access/genam.h"
#include "access/heapam.h"
#include "access/reloptions.h"
+#include "access/transam.h"
#include "access/xact.h"
#include "catalog/catalog.h"
#include "catalog/dependency.h"
@@ -85,6 +86,7 @@ static bool relationHasPrimaryKey(Relation rel);
* 'skip_build': make the catalog entries but leave the index file empty;
* it will be filled later.
* 'quiet': suppress the NOTICE chatter ordinarily provided for constraints.
+ * 'concurrent': avoid blocking writers to the table while building.
*/
void
DefineIndex(RangeVar *heapRelation,
@@ -102,7 +104,8 @@ DefineIndex(RangeVar *heapRelation,
bool is_alter_table,
bool check_rights,
bool skip_build,
- bool quiet)
+ bool quiet,
+ bool concurrent)
{
Oid *classObjectId;
Oid accessMethodId;
@@ -116,6 +119,12 @@ DefineIndex(RangeVar *heapRelation,
Datum reloptions;
IndexInfo *indexInfo;
int numberOfAttributes;
+ uint32 ixcnt;
+ LockRelId heaprelid;
+ Snapshot snapshot;
+ Relation pg_index;
+ HeapTuple indexTuple;
+ Form_pg_index indexForm;
/*
* count attributes in index
@@ -133,8 +142,16 @@ DefineIndex(RangeVar *heapRelation,
/*
* Open heap relation, acquire a suitable lock on it, remember its OID
+ *
+ * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
+ * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
+ * (but not VACUUM).
*/
- rel = heap_openrv(heapRelation, ShareLock);
+ rel = heap_openrv(heapRelation,
+ (concurrent ? ShareUpdateExclusiveLock : ShareLock));
+
+ relationId = RelationGetRelid(rel);
+ namespaceId = RelationGetNamespace(rel);
/* Note: during bootstrap may see uncataloged relation */
if (rel->rd_rel->relkind != RELKIND_RELATION &&
@@ -144,8 +161,13 @@ DefineIndex(RangeVar *heapRelation,
errmsg("\"%s\" is not a table",
heapRelation->relname)));
- relationId = RelationGetRelid(rel);
- namespaceId = RelationGetNamespace(rel);
+ /*
+ * Don't try to CREATE INDEX on temp tables of other backends.
+ */
+ if (isOtherTempNamespace(namespaceId))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot create indexes on temporary tables of other sessions")));
/*
* Verify we (still) have CREATE rights in the rel's namespace.
@@ -391,6 +413,7 @@ DefineIndex(RangeVar *heapRelation,
indexInfo->ii_Predicate = make_ands_implicit(predicate);
indexInfo->ii_PredicateState = NIL;
indexInfo->ii_Unique = unique;
+ indexInfo->ii_Concurrent = concurrent;
classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
ComputeIndexAttrs(indexInfo, classObjectId, attributeList,
@@ -410,10 +433,122 @@ DefineIndex(RangeVar *heapRelation,
primary ? "PRIMARY KEY" : "UNIQUE",
indexRelationName, RelationGetRelationName(rel))));
- index_create(relationId, indexRelationName, indexRelationId,
- indexInfo, accessMethodId, tablespaceId, classObjectId,
- reloptions, primary, isconstraint,
- allowSystemTableMods, skip_build);
+ indexRelationId =
+ index_create(relationId, indexRelationName, indexRelationId,
+ indexInfo, accessMethodId, tablespaceId, classObjectId,
+ reloptions, primary, isconstraint,
+ allowSystemTableMods, skip_build, concurrent);
+
+ if (!concurrent)
+ return; /* We're done, in the standard case */
+
+ /*
+ * Phase 2 of concurrent index build (see comments for validate_index()
+ * for an overview of how this works)
+ *
+ * We must commit our current transaction so that the index becomes
+ * visible; then start another. Note that all the data structures
+ * we just built are lost in the commit. The only data we keep past
+ * here are the relation IDs.
+ *
+ * Before committing, get a session-level lock on the table, to ensure
+ * that neither it nor the index can be dropped before we finish.
+ * This cannot block, even if someone else is waiting for access, because
+ * we already have the same lock within our transaction.
+ *
+ * Note: we don't currently bother with a session lock on the index,
+ * because there are no operations that could change its state while
+ * we hold lock on the parent table. This might need to change later.
+ */
+ heaprelid = rel->rd_lockInfo.lockRelId;
+ LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
+
+ CommitTransactionCommand();
+ StartTransactionCommand();
+
+ /* Establish transaction snapshot ... else GetLatestSnapshot complains */
+ (void) GetTransactionSnapshot();
+
+ /*
+ * Now we must wait until no running transaction could have the table open
+ * with the old list of indexes. If we can take an exclusive lock then
+ * there are none now and anybody who opens it later will get the new
+ * index in their relcache entry. Alternatively, if our Xmin reaches our
+ * own (new) transaction then we know no transactions that started before
+ * the index was visible are left anyway.
+ */
+ for (;;)
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ if (ConditionalLockRelationOid(relationId, ExclusiveLock))
+ {
+ /* Release the lock right away to avoid blocking anyone */
+ UnlockRelationOid(relationId, ExclusiveLock);
+ break;
+ }
+
+ if (TransactionIdEquals(GetLatestSnapshot()->xmin,
+ GetTopTransactionId()))
+ break;
+
+ pg_usleep(1000000L); /* 1 sec */
+ }
+
+ /*
+ * Now take the "reference snapshot" that will be used by validate_index()
+ * to filter candidate tuples. All other transactions running at this
+ * time will have to be out-waited before we can commit, because we can't
+ * guarantee that tuples deleted just before this will be in the index.
+ *
+ * We also set ActiveSnapshot to this snap, since functions in indexes
+ * may need a snapshot.
+ */
+ snapshot = CopySnapshot(GetTransactionSnapshot());
+ ActiveSnapshot = snapshot;
+
+ /*
+ * Scan the index and the heap, insert any missing index entries.
+ */
+ validate_index(relationId, indexRelationId, snapshot);
+
+ /*
+ * The index is now valid in the sense that it contains all currently
+ * interesting tuples. But since it might not contain tuples deleted
+ * just before the reference snap was taken, we have to wait out any
+ * transactions older than the reference snap. We can do this by
+ * waiting for each xact explicitly listed in the snap.
+ *
+ * Note: GetSnapshotData() never stores our own xid into a snap,
+ * hence we need not check for that.
+ */
+ for (ixcnt = 0; ixcnt < snapshot->xcnt; ixcnt++)
+ XactLockTableWait(snapshot->xip[ixcnt]);
+
+ /* Index can now be marked valid -- update its pg_index entry */
+ pg_index = heap_open(IndexRelationId, RowExclusiveLock);
+
+ indexTuple = SearchSysCacheCopy(INDEXRELID,
+ ObjectIdGetDatum(indexRelationId),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(indexTuple))
+ elog(ERROR, "cache lookup failed for index %u", indexRelationId);
+ indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+
+ Assert(indexForm->indexrelid = indexRelationId);
+ Assert(!indexForm->indisvalid);
+
+ indexForm->indisvalid = true;
+
+ simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
+ CatalogUpdateIndexes(pg_index, indexTuple);
+
+ heap_close(pg_index, RowExclusiveLock);
+
+ /*
+ * Last thing to do is release the session-level lock on the parent table.
+ */
+ UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index a1f7603337..7e8884496d 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.200 2006/08/21 00:57:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.201 2006/08/25 04:06:48 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -3832,7 +3832,8 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
true, /* is_alter_table */
check_rights,
skip_build,
- quiet);
+ quiet,
+ false);
}
/*
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 40e35a3796..391846bee2 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.348 2006/08/21 00:57:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.349 2006/08/25 04:06:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -2049,6 +2049,7 @@ _copyIndexStmt(IndexStmt *from)
COPY_SCALAR_FIELD(unique);
COPY_SCALAR_FIELD(primary);
COPY_SCALAR_FIELD(isconstraint);
+ COPY_SCALAR_FIELD(concurrent);
return newnode;
}
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index b4d0632c03..3cb4b8aee3 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -18,7 +18,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.282 2006/08/21 00:57:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.283 2006/08/25 04:06:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -962,6 +962,7 @@ _equalIndexStmt(IndexStmt *a, IndexStmt *b)
COMPARE_SCALAR_FIELD(unique);
COMPARE_SCALAR_FIELD(primary);
COMPARE_SCALAR_FIELD(isconstraint);
+ COMPARE_SCALAR_FIELD(concurrent);
return true;
}
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index a4b4044385..7b126b7f54 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.283 2006/08/21 00:57:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.284 2006/08/25 04:06:50 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@@ -1353,6 +1353,7 @@ _outIndexStmt(StringInfo str, IndexStmt *node)
WRITE_BOOL_FIELD(unique);
WRITE_BOOL_FIELD(primary);
WRITE_BOOL_FIELD(isconstraint);
+ WRITE_BOOL_FIELD(concurrent);
}
static void
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 47e39c6a78..5e3c7d9857 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.124 2006/08/05 00:22:49 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.125 2006/08/25 04:06:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -138,6 +138,18 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, RelOptInfo *rel)
indexRelation = index_open(indexoid, lmode);
index = indexRelation->rd_index;
+ /*
+ * Ignore invalid indexes, since they can't safely be used for
+ * queries. Note that this is OK because the data structure
+ * we are constructing is only used by the planner --- the
+ * executor still needs to insert into "invalid" indexes!
+ */
+ if (!index->indisvalid)
+ {
+ index_close(indexRelation, NoLock);
+ continue;
+ }
+
info = makeNode(IndexOptInfo);
info->indexoid = index->indexrelid;
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 8eb50fb573..ae3469c86c 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.347 2006/08/21 00:57:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.348 2006/08/25 04:06:51 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1488,6 +1488,7 @@ transformIndexConstraints(ParseState *pstate, CreateStmtContext *cxt)
index->tableSpace = constraint->indexspace;
index->indexParams = NIL;
index->whereClause = NULL;
+ index->concurrent = false;
/*
* Make sure referenced keys exist. If we are making a PRIMARY KEY
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 60761ae6bc..a77e73a43f 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.557 2006/08/21 00:57:25 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.558 2006/08/25 04:06:51 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@@ -364,7 +364,8 @@ static void doNegateFloat(Value *v);
CACHE CALLED CASCADE CASCADED CASE CAST CHAIN CHAR_P
CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT
- COMMITTED CONNECTION CONSTRAINT CONSTRAINTS CONVERSION_P CONVERT COPY CREATE CREATEDB
+ COMMITTED CONCURRENTLY CONNECTION CONSTRAINT CONSTRAINTS
+ CONVERSION_P CONVERT COPY CREATE CREATEDB
CREATEROLE CREATEUSER CROSS CSV CURRENT_DATE CURRENT_ROLE CURRENT_TIME
CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
@@ -3638,20 +3639,22 @@ opt_granted_by: GRANTED BY RoleId { $$ = $3; }
/*****************************************************************************
*
- * QUERY:
- * create index on
- * [ using ] "(" (
[ using ] )+ ")"
- * [ tablespace ] [ where ]
+ * QUERY: CREATE INDEX
+ *
+ * Note: we can't factor CONCURRENTLY into a separate production without
+ * making it a reserved word.
*
* Note: we cannot put TABLESPACE clause after WHERE clause unless we are
* willing to make TABLESPACE a fully reserved word.
*****************************************************************************/
-IndexStmt: CREATE index_opt_unique INDEX index_name ON qualified_name
- access_method_clause '(' index_params ')' opt_definition OptTableSpace where_clause
+IndexStmt: CREATE index_opt_unique INDEX index_name
+ ON qualified_name access_method_clause '(' index_params ')'
+ opt_definition OptTableSpace where_clause
{
IndexStmt *n = makeNode(IndexStmt);
n->unique = $2;
+ n->concurrent = false;
n->idxname = $4;
n->relation = $6;
n->accessMethod = $7;
@@ -3661,6 +3664,22 @@ IndexStmt: CREATE index_opt_unique INDEX index_name ON qualified_name
n->whereClause = $13;
$$ = (Node *)n;
}
+ | CREATE index_opt_unique INDEX CONCURRENTLY index_name
+ ON qualified_name access_method_clause '(' index_params ')'
+ opt_definition OptTableSpace where_clause
+ {
+ IndexStmt *n = makeNode(IndexStmt);
+ n->unique = $2;
+ n->concurrent = true;
+ n->idxname = $5;
+ n->relation = $7;
+ n->accessMethod = $8;
+ n->indexParams = $10;
+ n->options = $12;
+ n->tableSpace = $13;
+ n->whereClause = $14;
+ $$ = (Node *)n;
+ }
;
index_opt_unique:
@@ -8491,6 +8510,7 @@ unreserved_keyword:
| COMMENT
| COMMIT
| COMMITTED
+ | CONCURRENTLY
| CONNECTION
| CONSTRAINTS
| CONVERSION_P
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c
index e799d68ae6..9867982cdb 100644
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.175 2006/08/12 02:52:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.176 2006/08/25 04:06:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -84,6 +84,7 @@ static const ScanKeyword ScanKeywords[] = {
{"comment", COMMENT},
{"commit", COMMIT},
{"committed", COMMITTED},
+ {"concurrently", CONCURRENTLY},
{"connection", CONNECTION},
{"constraint", CONSTRAINT},
{"constraints", CONSTRAINTS},
diff --git a/src/backend/storage/page/itemptr.c b/src/backend/storage/page/itemptr.c
index 1fcd6cb0ac..08f2273ab9 100644
--- a/src/backend/storage/page/itemptr.c
+++ b/src/backend/storage/page/itemptr.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/page/itemptr.c,v 1.17 2006/07/14 14:52:23 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/page/itemptr.c,v 1.18 2006/08/25 04:06:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,13 +16,14 @@
#include "storage/itemptr.h"
+
/*
* ItemPointerEquals
* Returns true if both item pointers point to the same item,
* otherwise returns false.
*
* Note:
- * Assumes that the disk item pointers are not NULL.
+ * Asserts that the disk item pointers are both valid!
*/
bool
ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
@@ -35,3 +36,30 @@ ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
else
return false;
}
+
+/*
+ * ItemPointerCompare
+ * Generic btree-style comparison for item pointers.
+ */
+int32
+ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
+{
+ /*
+ * Don't use ItemPointerGetBlockNumber or ItemPointerGetOffsetNumber here,
+ * because they assert ip_posid != 0 which might not be true for a
+ * user-supplied TID.
+ */
+ BlockNumber b1 = BlockIdGetBlockNumber(&(arg1->ip_blkid));
+ BlockNumber b2 = BlockIdGetBlockNumber(&(arg2->ip_blkid));
+
+ if (b1 < b2)
+ return -1;
+ else if (b1 > b2)
+ return 1;
+ else if (arg1->ip_posid < arg2->ip_posid)
+ return -1;
+ else if (arg1->ip_posid > arg2->ip_posid)
+ return 1;
+ else
+ return 0;
+}
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index ce86a90ba3..781026b323 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.266 2006/08/15 18:26:58 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.267 2006/08/25 04:06:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -781,6 +781,9 @@ ProcessUtility(Node *parsetree,
{
IndexStmt *stmt = (IndexStmt *) parsetree;
+ if (stmt->concurrent)
+ PreventTransactionChain(stmt, "CREATE INDEX CONCURRENTLY");
+
CheckRelationOwnership(stmt->relation, true);
DefineIndex(stmt->relation, /* relation */
@@ -795,10 +798,11 @@ ProcessUtility(Node *parsetree,
stmt->unique,
stmt->primary,
stmt->isconstraint,
- false, /* is_alter_table */
- true, /* check_rights */
- false, /* skip_build */
- false); /* quiet */
+ false, /* is_alter_table */
+ true, /* check_rights */
+ false, /* skip_build */
+ false, /* quiet */
+ stmt->concurrent); /* concurrent */
}
break;
diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c
index 814670521e..1362d0a3f8 100644
--- a/src/backend/utils/adt/tid.c
+++ b/src/backend/utils/adt/tid.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tid.c,v 1.54 2006/07/21 20:51:32 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tid.c,v 1.55 2006/08/25 04:06:53 tgl Exp $
*
* NOTES
* input routine largely stolen from boxin().
@@ -158,36 +158,13 @@ tidsend(PG_FUNCTION_ARGS)
* PUBLIC ROUTINES *
*****************************************************************************/
-static int32
-tid_cmp_internal(ItemPointer arg1, ItemPointer arg2)
-{
- /*
- * Don't use ItemPointerGetBlockNumber or ItemPointerGetOffsetNumber here,
- * because they assert ip_posid != 0 which might not be true for a
- * user-supplied TID.
- */
- BlockNumber b1 = BlockIdGetBlockNumber(&(arg1->ip_blkid));
- BlockNumber b2 = BlockIdGetBlockNumber(&(arg2->ip_blkid));
-
- if (b1 < b2)
- return -1;
- else if (b1 > b2)
- return 1;
- else if (arg1->ip_posid < arg2->ip_posid)
- return -1;
- else if (arg1->ip_posid > arg2->ip_posid)
- return 1;
- else
- return 0;
-}
-
Datum
tideq(PG_FUNCTION_ARGS)
{
ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
- PG_RETURN_BOOL(tid_cmp_internal(arg1,arg2) == 0);
+ PG_RETURN_BOOL(ItemPointerCompare(arg1,arg2) == 0);
}
Datum
@@ -196,7 +173,7 @@ tidne(PG_FUNCTION_ARGS)
ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
- PG_RETURN_BOOL(tid_cmp_internal(arg1,arg2) != 0);
+ PG_RETURN_BOOL(ItemPointerCompare(arg1,arg2) != 0);
}
Datum
@@ -205,7 +182,7 @@ tidlt(PG_FUNCTION_ARGS)
ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
- PG_RETURN_BOOL(tid_cmp_internal(arg1,arg2) < 0);
+ PG_RETURN_BOOL(ItemPointerCompare(arg1,arg2) < 0);
}
Datum
@@ -214,7 +191,7 @@ tidle(PG_FUNCTION_ARGS)
ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
- PG_RETURN_BOOL(tid_cmp_internal(arg1,arg2) <= 0);
+ PG_RETURN_BOOL(ItemPointerCompare(arg1,arg2) <= 0);
}
Datum
@@ -223,7 +200,7 @@ tidgt(PG_FUNCTION_ARGS)
ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
- PG_RETURN_BOOL(tid_cmp_internal(arg1,arg2) > 0);
+ PG_RETURN_BOOL(ItemPointerCompare(arg1,arg2) > 0);
}
Datum
@@ -232,7 +209,7 @@ tidge(PG_FUNCTION_ARGS)
ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
- PG_RETURN_BOOL(tid_cmp_internal(arg1,arg2) >= 0);
+ PG_RETURN_BOOL(ItemPointerCompare(arg1,arg2) >= 0);
}
Datum
@@ -241,7 +218,7 @@ bttidcmp(PG_FUNCTION_ARGS)
ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
- PG_RETURN_INT32(tid_cmp_internal(arg1, arg2));
+ PG_RETURN_INT32(ItemPointerCompare(arg1, arg2));
}
Datum
@@ -250,7 +227,7 @@ tidlarger(PG_FUNCTION_ARGS)
ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
- PG_RETURN_ITEMPOINTER(tid_cmp_internal(arg1,arg2) >= 0 ? arg1 : arg2);
+ PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1,arg2) >= 0 ? arg1 : arg2);
}
Datum
@@ -259,7 +236,7 @@ tidsmaller(PG_FUNCTION_ARGS)
ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
- PG_RETURN_ITEMPOINTER(tid_cmp_internal(arg1,arg2) <= 0 ? arg1 : arg2);
+ PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1,arg2) <= 0 ? arg1 : arg2);
}
diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c
index 3233dfc2fa..7bdf6d15dc 100644
--- a/src/bin/psql/common.c
+++ b/src/bin/psql/common.c
@@ -3,7 +3,7 @@
*
* Copyright (c) 2000-2006, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.124 2006/08/13 21:10:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.125 2006/08/25 04:06:54 tgl Exp $
*/
#include "postgres_fe.h"
#include "common.h"
@@ -1075,22 +1075,71 @@ command_no_begin(const char *query)
* Commands not allowed within transactions. The statements checked for
* here should be exactly those that call PreventTransactionChain() in the
* backend.
- *
- * Note: we are a bit sloppy about CLUSTER, which is transactional in some
- * variants but not others.
*/
if (wordlen == 6 && pg_strncasecmp(query, "vacuum", 6) == 0)
return true;
if (wordlen == 7 && pg_strncasecmp(query, "cluster", 7) == 0)
- return true;
+ {
+ /* CLUSTER with any arguments is allowed in transactions */
+ query += wordlen;
+
+ query = skip_white_space(query);
+
+ if (isalpha((unsigned char) query[0]))
+ return false; /* has additional words */
+ return true; /* it's CLUSTER without arguments */
+ }
+
+ if (wordlen == 6 && pg_strncasecmp(query, "create", 6) == 0)
+ {
+ query += wordlen;
+
+ query = skip_white_space(query);
+
+ wordlen = 0;
+ while (isalpha((unsigned char) query[wordlen]))
+ wordlen += PQmblen(&query[wordlen], pset.encoding);
+
+ if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0)
+ return true;
+ if (wordlen == 10 && pg_strncasecmp(query, "tablespace", 10) == 0)
+ return true;
+
+ /* CREATE [UNIQUE] INDEX CONCURRENTLY isn't allowed in xacts */
+ if (wordlen == 6 && pg_strncasecmp(query, "unique", 6) == 0)
+ {
+ query += wordlen;
+
+ query = skip_white_space(query);
+
+ wordlen = 0;
+ while (isalpha((unsigned char) query[wordlen]))
+ wordlen += PQmblen(&query[wordlen], pset.encoding);
+ }
+
+ if (wordlen == 5 && pg_strncasecmp(query, "index", 5) == 0)
+ {
+ query += wordlen;
+
+ query = skip_white_space(query);
+
+ wordlen = 0;
+ while (isalpha((unsigned char) query[wordlen]))
+ wordlen += PQmblen(&query[wordlen], pset.encoding);
+
+ if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0)
+ return true;
+ }
+
+ return false;
+ }
/*
- * Note: these tests will match CREATE SYSTEM, DROP SYSTEM, and REINDEX
- * TABLESPACE, which aren't really valid commands so we don't care much.
- * The other six possible matches are correct.
+ * Note: these tests will match DROP SYSTEM and REINDEX TABLESPACE,
+ * which aren't really valid commands so we don't care much.
+ * The other four possible matches are correct.
*/
- if ((wordlen == 6 && pg_strncasecmp(query, "create", 6) == 0) ||
- (wordlen == 4 && pg_strncasecmp(query, "drop", 4) == 0) ||
+ if ((wordlen == 4 && pg_strncasecmp(query, "drop", 4) == 0) ||
(wordlen == 7 && pg_strncasecmp(query, "reindex", 7) == 0))
{
query += wordlen;
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 4e4f2dd4b3..49db39074d 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -3,7 +3,7 @@
*
* Copyright (c) 2000-2006, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/bin/psql/describe.c,v 1.142 2006/07/27 19:52:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/describe.c,v 1.143 2006/08/25 04:06:54 tgl Exp $
*/
#include "postgres_fe.h"
#include "describe.h"
@@ -942,7 +942,7 @@ describeOneTableDetails(const char *schemaname,
PGresult *result;
printfPQExpBuffer(&buf,
- "SELECT i.indisunique, i.indisprimary, i.indisclustered, a.amname, c2.relname,\n"
+ "SELECT i.indisunique, i.indisprimary, i.indisclustered, i.indisvalid, a.amname, c2.relname,\n"
" pg_catalog.pg_get_expr(i.indpred, i.indrelid, true)\n"
"FROM pg_catalog.pg_index i, pg_catalog.pg_class c, pg_catalog.pg_class c2, pg_catalog.pg_am a\n"
"WHERE i.indexrelid = c.oid AND c.oid = '%s' AND c.relam = a.oid\n"
@@ -962,9 +962,10 @@ describeOneTableDetails(const char *schemaname,
char *indisunique = PQgetvalue(result, 0, 0);
char *indisprimary = PQgetvalue(result, 0, 1);
char *indisclustered = PQgetvalue(result, 0, 2);
- char *indamname = PQgetvalue(result, 0, 3);
- char *indtable = PQgetvalue(result, 0, 4);
- char *indpred = PQgetvalue(result, 0, 5);
+ char *indisvalid = PQgetvalue(result, 0, 3);
+ char *indamname = PQgetvalue(result, 0, 4);
+ char *indtable = PQgetvalue(result, 0, 5);
+ char *indpred = PQgetvalue(result, 0, 6);
int count_footers = 0;
if (strcmp(indisprimary, "t") == 0)
@@ -985,6 +986,9 @@ describeOneTableDetails(const char *schemaname,
if (strcmp(indisclustered, "t") == 0)
appendPQExpBuffer(&tmpbuf, _(", clustered"));
+ if (strcmp(indisvalid, "t") != 0)
+ appendPQExpBuffer(&tmpbuf, _(", invalid"));
+
footers = pg_malloc_zero(4 * sizeof(*footers));
footers[count_footers++] = pg_strdup(tmpbuf.data);
add_tablespace_footer(tableinfo.relkind, tableinfo.tablespace,
@@ -1067,7 +1071,7 @@ describeOneTableDetails(const char *schemaname,
if (tableinfo.hasindex)
{
printfPQExpBuffer(&buf,
- "SELECT c2.relname, i.indisprimary, i.indisunique, i.indisclustered, "
+ "SELECT c2.relname, i.indisprimary, i.indisunique, i.indisclustered, i.indisvalid, "
"pg_catalog.pg_get_indexdef(i.indexrelid, 0, true), c2.reltablespace\n"
"FROM pg_catalog.pg_class c, pg_catalog.pg_class c2, pg_catalog.pg_index i\n"
"WHERE c.oid = '%s' AND c.oid = i.indrelid AND i.indexrelid = c2.oid\n"
@@ -1201,7 +1205,7 @@ describeOneTableDetails(const char *schemaname,
? " UNIQUE,"
: ""));
/* Everything after "USING" is echoed verbatim */
- indexdef = PQgetvalue(result1, i, 4);
+ indexdef = PQgetvalue(result1, i, 5);
usingpos = strstr(indexdef, " USING ");
if (usingpos)
indexdef = usingpos + 7;
@@ -1211,11 +1215,14 @@ describeOneTableDetails(const char *schemaname,
if (strcmp(PQgetvalue(result1, i, 3), "t") == 0)
appendPQExpBuffer(&buf, " CLUSTER");
+ if (strcmp(PQgetvalue(result1, i, 4), "t") != 0)
+ appendPQExpBuffer(&buf, " INVALID");
+
/* Print tablespace of the index on the same line */
count_footers += 1;
initPQExpBuffer(&tmpbuf);
if (add_tablespace_footer('i',
- atooid(PQgetvalue(result1, i, 5)),
+ atooid(PQgetvalue(result1, i, 6)),
footers, &count_footers, tmpbuf, false))
{
appendPQExpBuffer(&buf, ", ");
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 448f9e4ecf..6266b591ca 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.352 2006/08/21 00:57:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.353 2006/08/25 04:06:54 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 200608191
+#define CATALOG_VERSION_NO 200608211
#endif
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index f54508fb5b..881ef90279 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/index.h,v 1.70 2006/07/31 01:16:37 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/index.h,v 1.71 2006/08/25 04:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -39,7 +39,8 @@ extern Oid index_create(Oid heapRelationId,
bool isprimary,
bool isconstraint,
bool allow_system_table_mods,
- bool skip_build);
+ bool skip_build,
+ bool concurrent);
extern void index_drop(Oid indexId);
@@ -64,6 +65,8 @@ extern double IndexBuildHeapScan(Relation heapRelation,
IndexBuildCallback callback,
void *callback_state);
+extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot);
+
extern void reindex_index(Oid indexId);
extern bool reindex_relation(Oid relid, bool toast_too);
diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h
index e098079b93..68d7eae7ab 100644
--- a/src/include/catalog/pg_attribute.h
+++ b/src/include/catalog/pg_attribute.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_attribute.h,v 1.123 2006/07/10 16:20:51 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_attribute.h,v 1.124 2006/08/25 04:06:55 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
@@ -460,9 +460,10 @@ DATA(insert ( 1259 tableoid 26 0 4 -7 0 -1 -1 t p i t f f t 0));
{ 0, {"indisunique"}, 16, -1, 1, 4, 0, -1, -1, true, 'p', 'c', true, false, false, true, 0 }, \
{ 0, {"indisprimary"}, 16, -1, 1, 5, 0, -1, -1, true, 'p', 'c', true, false, false, true, 0 }, \
{ 0, {"indisclustered"}, 16, -1, 1, 6, 0, -1, -1, true, 'p', 'c', true, false, false, true, 0 }, \
-{ 0, {"indkey"}, 22, -1, -1, 7, 1, -1, -1, false, 'p', 'i', true, false, false, true, 0 }, \
-{ 0, {"indclass"}, 30, -1, -1, 8, 1, -1, -1, false, 'p', 'i', true, false, false, true, 0 }, \
-{ 0, {"indexprs"}, 25, -1, -1, 9, 0, -1, -1, false, 'x', 'i', false, false, false, true, 0 }, \
-{ 0, {"indpred"}, 25, -1, -1, 10, 0, -1, -1, false, 'x', 'i', false, false, false, true, 0 }
+{ 0, {"indisvalid"}, 16, -1, 1, 7, 0, -1, -1, true, 'p', 'c', true, false, false, true, 0 }, \
+{ 0, {"indkey"}, 22, -1, -1, 8, 1, -1, -1, false, 'p', 'i', true, false, false, true, 0 }, \
+{ 0, {"indclass"}, 30, -1, -1, 9, 1, -1, -1, false, 'p', 'i', true, false, false, true, 0 }, \
+{ 0, {"indexprs"}, 25, -1, -1, 10, 0, -1, -1, false, 'x', 'i', false, false, false, true, 0 }, \
+{ 0, {"indpred"}, 25, -1, -1, 11, 0, -1, -1, false, 'x', 'i', false, false, false, true, 0 }
#endif /* PG_ATTRIBUTE_H */
diff --git a/src/include/catalog/pg_index.h b/src/include/catalog/pg_index.h
index bc7026cc25..506fbc293d 100644
--- a/src/include/catalog/pg_index.h
+++ b/src/include/catalog/pg_index.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_index.h,v 1.39 2006/03/05 15:58:54 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_index.h,v 1.40 2006/08/25 04:06:55 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
@@ -41,6 +41,7 @@ CATALOG(pg_index,2610) BKI_WITHOUT_OIDS
bool indisunique; /* is this a unique index? */
bool indisprimary; /* is this index for primary key? */
bool indisclustered; /* is this the index last clustered by? */
+ bool indisvalid; /* is this index valid for use by queries? */
/* VARIABLE LENGTH FIELDS: */
int2vector indkey; /* column numbers of indexed cols, or 0 */
@@ -63,16 +64,17 @@ typedef FormData_pg_index *Form_pg_index;
* compiler constants for pg_index
* ----------------
*/
-#define Natts_pg_index 10
+#define Natts_pg_index 11
#define Anum_pg_index_indexrelid 1
#define Anum_pg_index_indrelid 2
#define Anum_pg_index_indnatts 3
#define Anum_pg_index_indisunique 4
#define Anum_pg_index_indisprimary 5
#define Anum_pg_index_indisclustered 6
-#define Anum_pg_index_indkey 7
-#define Anum_pg_index_indclass 8
-#define Anum_pg_index_indexprs 9
-#define Anum_pg_index_indpred 10
+#define Anum_pg_index_indisvalid 7
+#define Anum_pg_index_indkey 8
+#define Anum_pg_index_indclass 9
+#define Anum_pg_index_indexprs 10
+#define Anum_pg_index_indpred 11
#endif /* PG_INDEX_H */
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index 902931d02a..26983fc198 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/commands/defrem.h,v 1.75 2006/07/18 17:42:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/commands/defrem.h,v 1.76 2006/08/25 04:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -33,7 +33,8 @@ extern void DefineIndex(RangeVar *heapRelation,
bool is_alter_table,
bool check_rights,
bool skip_build,
- bool quiet);
+ bool quiet,
+ bool concurrent);
extern void RemoveIndex(RangeVar *relation, DropBehavior behavior);
extern void ReindexIndex(RangeVar *indexRelation);
extern void ReindexTable(RangeVar *relation);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 47b7c01f23..35ee8a20d0 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.159 2006/08/12 02:52:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.160 2006/08/25 04:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -37,6 +37,7 @@
* Predicate partial-index predicate, or NIL if none
* PredicateState exec state for predicate, or NIL if none
* Unique is it a unique index?
+ * Concurrent are we doing a concurrent index build?
* ----------------
*/
typedef struct IndexInfo
@@ -49,6 +50,7 @@ typedef struct IndexInfo
List *ii_Predicate; /* list of Expr */
List *ii_PredicateState; /* list of ExprState */
bool ii_Unique;
+ bool ii_Concurrent;
} IndexInfo;
/* ----------------
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index a3ae58d17b..7aa7bfd38e 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.324 2006/08/21 00:57:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.325 2006/08/25 04:06:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1463,6 +1463,7 @@ typedef struct IndexStmt
bool unique; /* is index unique? */
bool primary; /* is index on primary key? */
bool isconstraint; /* is it from a CONSTRAINT clause? */
+ bool concurrent; /* should this be a concurrent index build? */
} IndexStmt;
/* ----------------------
diff --git a/src/include/storage/itemptr.h b/src/include/storage/itemptr.h
index c6eaaf1911..b558eb33eb 100644
--- a/src/include/storage/itemptr.h
+++ b/src/include/storage/itemptr.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/itemptr.h,v 1.28 2006/03/05 15:58:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/itemptr.h,v 1.29 2006/08/25 04:06:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -141,5 +141,6 @@ typedef ItemPointerData *ItemPointer;
*/
extern bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2);
+extern int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2);
#endif /* ITEMPTR_H */
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 715462e3d5..1604d87377 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -360,3 +360,53 @@ INSERT INTO func_index_heap VALUES('QWERTY');
create unique index hash_f8_index_1 on hash_f8_heap(abs(random));
create unique index hash_f8_index_2 on hash_f8_heap((seqno + 1), random);
create unique index hash_f8_index_3 on hash_f8_heap(random) where seqno > 1000;
+--
+-- Try some concurrent index builds
+--
+-- Unfortunately this only tests about half the code paths because there are
+-- no concurrent updates happening to the table at the same time.
+CREATE TABLE concur_heap (f1 text, f2 text);
+-- empty table
+CREATE INDEX CONCURRENTLY concur_index1 ON concur_heap(f2,f1);
+INSERT INTO concur_heap VALUES ('a','b');
+INSERT INTO concur_heap VALUES ('b','b');
+-- unique index
+CREATE UNIQUE INDEX CONCURRENTLY concur_index2 ON concur_heap(f1);
+-- check if constraint is set up properly to be enforced
+INSERT INTO concur_heap VALUES ('b','x');
+ERROR: duplicate key violates unique constraint "concur_index2"
+-- check if constraint is enforced properly at build time
+CREATE UNIQUE INDEX CONCURRENTLY concur_index3 ON concur_heap(f2);
+ERROR: could not create unique index
+DETAIL: Table contains duplicated values.
+-- test that expression indexes and partial indexes work concurrently
+CREATE INDEX CONCURRENTLY concur_index4 on concur_heap(f2) WHERE f1='a';
+CREATE INDEX CONCURRENTLY concur_index5 on concur_heap(f2) WHERE f1='x';
+CREATE INDEX CONCURRENTLY concur_index6 on concur_heap((f2||f1));
+-- You can't do a concurrent index build in a transaction
+BEGIN;
+CREATE INDEX CONCURRENTLY concur_index7 ON concur_heap(f1);
+ERROR: CREATE INDEX CONCURRENTLY cannot run inside a transaction block
+COMMIT;
+-- But you can do a regular index build in a transaction
+BEGIN;
+CREATE INDEX std_index on concur_heap(f2);
+COMMIT;
+-- check to make sure that the failed indexes were cleaned up properly and the
+-- successful indexes are created properly. Notably that they do NOT have the
+-- "invalid" flag set.
+\d concur_heap
+Table "public.concur_heap"
+ Column | Type | Modifiers
+--------+------+-----------
+ f1 | text |
+ f2 | text |
+Indexes:
+ "concur_index2" UNIQUE, btree (f1)
+ "concur_index1" btree (f2, f1)
+ "concur_index4" btree (f2) WHERE f1 = 'a'::text
+ "concur_index5" btree (f2) WHERE f1 = 'x'::text
+ "concur_index6" btree ((f2 || f1))
+ "std_index" btree (f2)
+
+DROP TABLE concur_heap;
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 37df3ee2ca..33211e967b 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -219,3 +219,43 @@ INSERT INTO func_index_heap VALUES('QWERTY');
create unique index hash_f8_index_1 on hash_f8_heap(abs(random));
create unique index hash_f8_index_2 on hash_f8_heap((seqno + 1), random);
create unique index hash_f8_index_3 on hash_f8_heap(random) where seqno > 1000;
+
+--
+-- Try some concurrent index builds
+--
+-- Unfortunately this only tests about half the code paths because there are
+-- no concurrent updates happening to the table at the same time.
+
+CREATE TABLE concur_heap (f1 text, f2 text);
+-- empty table
+CREATE INDEX CONCURRENTLY concur_index1 ON concur_heap(f2,f1);
+INSERT INTO concur_heap VALUES ('a','b');
+INSERT INTO concur_heap VALUES ('b','b');
+-- unique index
+CREATE UNIQUE INDEX CONCURRENTLY concur_index2 ON concur_heap(f1);
+-- check if constraint is set up properly to be enforced
+INSERT INTO concur_heap VALUES ('b','x');
+-- check if constraint is enforced properly at build time
+CREATE UNIQUE INDEX CONCURRENTLY concur_index3 ON concur_heap(f2);
+-- test that expression indexes and partial indexes work concurrently
+CREATE INDEX CONCURRENTLY concur_index4 on concur_heap(f2) WHERE f1='a';
+CREATE INDEX CONCURRENTLY concur_index5 on concur_heap(f2) WHERE f1='x';
+CREATE INDEX CONCURRENTLY concur_index6 on concur_heap((f2||f1));
+
+-- You can't do a concurrent index build in a transaction
+BEGIN;
+CREATE INDEX CONCURRENTLY concur_index7 ON concur_heap(f1);
+COMMIT;
+
+-- But you can do a regular index build in a transaction
+BEGIN;
+CREATE INDEX std_index on concur_heap(f2);
+COMMIT;
+
+-- check to make sure that the failed indexes were cleaned up properly and the
+-- successful indexes are created properly. Notably that they do NOT have the
+-- "invalid" flag set.
+
+\d concur_heap
+
+DROP TABLE concur_heap;