diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 80ed7d829b..20604d73e4 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1,4 +1,4 @@ - + @@ -2420,6 +2420,17 @@ If true, the table was last clustered on this index + + indisvalid + bool + + If true, the index is currently valid for queries. + False means the index is possibly incomplete: it must still be + inserted into by INSERT/UPDATE operations, but it cannot safely be + used for queries, and if it is unique, the uniqueness shouldn't be + relied on either. + + indkey int2vector diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index 1afa120766..10ab84278d 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -1,4 +1,4 @@ - + Index Access Method Interface Definition @@ -648,6 +648,16 @@ amrestrpos (IndexScanDesc scan); + + Furthermore, immediately before raising a uniqueness violation + according to the above rules, the access method must recheck the + liveness of the row being inserted. If it is committed dead then + no error should be raised. (This case cannot occur during the + ordinary scenario of inserting a row that's just been created by + the current transaction. It can happen during + CREATE UNIQUE INDEX CONCURRENTLY, however.) + + We require the index access method to apply these tests itself, which means that it must reach into the heap to check the commit status of diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index 10669c0155..223bb81cfc 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -1,4 +1,4 @@ - + Indexes @@ -90,6 +90,17 @@ CREATE INDEX test1_id_index ON test1 (id); significantly speed up queries with joins. + + Creating an index on a large table can take a long time. By default, + PostgreSQL allows reads (selects) to occur + on the table in parallel with index creation, but writes (inserts, + updates, deletes) are blocked until the index build is finished. + It is possible to allow writes to occur in parallel with index + creation, but there are several caveats to be aware of — + for more information see . + + After an index is created, the system has to keep it synchronized with the table. This adds overhead to data manipulation operations. diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml index 8ebb820519..1a1e95db50 100644 --- a/doc/src/sgml/mvcc.sgml +++ b/doc/src/sgml/mvcc.sgml @@ -1,4 +1,4 @@ - + Concurrency Control @@ -622,7 +622,8 @@ SELECT SUM(value) FROM mytab WHERE class = 2; - Acquired by VACUUM (without ). + Acquired by VACUUM (without ) + and by CREATE INDEX CONCURRENTLY. @@ -641,7 +642,8 @@ SELECT SUM(value) FROM mytab WHERE class = 2; - Acquired by CREATE INDEX. + Acquired by CREATE INDEX + (without ). diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 20a89713bf..be0ca63f2c 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -1,5 +1,5 @@ @@ -20,7 +20,7 @@ PostgreSQL documentation -CREATE [ UNIQUE ] INDEX name ON table [ USING method ] +CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] name ON table [ USING method ] ( { column | ( expression ) } [ opclass ] [, ...] ) [ WITH ( storage_parameter = value [, ... ] ) ] [ TABLESPACE tablespace ] @@ -110,6 +110,21 @@ CREATE [ UNIQUE ] INDEX name ON + + CONCURRENTLY + + + When this option is used, PostgreSQL will build the + index without taking any locks that prevent concurrent inserts, + updates, or deletes on the table; whereas a standard index build + locks out writes (but not reads) on the table until it's done. + There are several caveats to be aware of when using this option + — see . + + + + name @@ -239,6 +254,82 @@ CREATE [ UNIQUE ] INDEX name ON + + + Building Indexes Concurrently + + + index + building concurrently + + + + Creating an index for a large table can be a long operation. In large data + warehousing applications it can easily take hours or even days to build + indexes. It's important to understand the impact creating indexes has on a + system. + + + + Normally PostgreSQL locks the table to be indexed against + writes and performs the entire index build with a single scan of the + table. Other transactions can still read the table, but if they try to + insert, update, or delete rows in the table they will block until the + index build is finished. + + + + PostgreSQL also supports building indexes without locking + out writes. This method is invoked by specifying the + CONCURRENTLY option of CREATE INDEX. + When this option is used, + PostgreSQL must perform two scans of the table, and in + addition it must wait for all existing transactions to terminate. Thus + this method requires more total work than a standard index build and takes + significantly longer to complete. However, since it allows normal + operations to continue while the index is built, this method is useful for + adding new indexes in a production environment. Of course, the extra CPU + and I/O load imposed by the index creation may slow other operations. + + + + If a problem arises during the second scan of the table, such as a + uniqueness violation in a unique index, the CREATE INDEX + command will fail but leave behind an invalid index. This index + will be ignored for querying purposes because it may be incomplete; + however it will still consume update overhead. The recommended recovery + method in such cases is to drop the index and try again to perform + CREATE INDEX CONCURRENTLY. (Another possibility is to rebuild + the index with REINDEX. However, since REINDEX + does not support concurrent builds, this option is unlikely to seem + attractive.) + + + + Another caveat when building a unique index concurrently is that the + uniqueness constraint is already being enforced against other transactions + when the second table scan begins. This means that constraint violations + could be reported in other queries prior to the index becoming available + for use, or even in cases where the index build eventually fails. Also, + if a failure does occur in the second scan, the invalid index + continues to enforce its uniqueness constraint afterwards. + + + + Concurrent builds of expression indexes and partial indexes are supported. + Errors occurring in the evaluation of these expressions could cause + behavior similar to that described above for unique constraint violations. + + + + Regular index builds permit other regular index builds on the + same table to occur in parallel, but only one concurrent index build + can occur on a table at a time. In both cases, no other types of schema + modification on the table are allowed meanwhile. Another difference + is that a regular CREATE INDEX command can be performed within + a transaction block, but CREATE INDEX CONCURRENTLY cannot. + + @@ -339,15 +430,22 @@ Is this example correct? To create a GiST index on a point attribute so that we can efficiently use box operators on the result of the conversion function: - CREATE INDEX pointloc ON points USING GIST (point2box(location) box_ops); SELECT * FROM points WHERE point2box(points.pointloc) = boxes.box; + --> + + To create an index without locking out writes to the table: + +CREATE INDEX CONCURRENTLY sales_quantity_index ON sales_table (quantity); + + + diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml index dbe10ca762..9502a0daf5 100644 --- a/doc/src/sgml/ref/reindex.sgml +++ b/doc/src/sgml/ref/reindex.sgml @@ -1,5 +1,5 @@ @@ -30,7 +30,7 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } nam REINDEX rebuilds an index using the data stored in the index's table, replacing the old copy of the index. There are - three main reasons to use REINDEX: + several scenarios in which to use REINDEX: @@ -61,6 +61,18 @@ REINDEX { INDEX | TABLE | DATABASE | SYSTEM } nam for an index, and wish to ensure that the change has taken full effect. + + + + An index build with the CONCURRENTLY option failed, leaving + an invalid index. Such indexes are useless but it can be + convenient to use REINDEX to rebuild them. Note that + REINDEX will not perform a concurrent build. To build the + index without interfering with production you should drop the index and + reissue the CREATE INDEX CONCURRENTLY command. + + + diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 597949aa2d..910d654443 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.142 2006/07/25 19:13:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.143 2006/08/25 04:06:46 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -244,8 +244,33 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, } /* - * Otherwise we have a definite conflict. + * Otherwise we have a definite conflict. But before + * complaining, look to see if the tuple we want to insert + * is itself now committed dead --- if so, don't complain. + * This is a waste of time in normal scenarios but we must + * do it to support CREATE INDEX CONCURRENTLY. */ + htup.t_self = itup->t_tid; + if (heap_fetch(heapRel, SnapshotSelf, &htup, &hbuffer, + false, NULL)) + { + /* Normal case --- it's still live */ + ReleaseBuffer(hbuffer); + } + else if (htup.t_data != NULL) + { + /* + * It's been deleted, so no error, and no need to + * continue searching + */ + break; + } + else + { + /* couldn't find the tuple?? */ + elog(ERROR, "failed to fetch tuple being inserted"); + } + ereport(ERROR, (errcode(ERRCODE_UNIQUE_VIOLATION), errmsg("duplicate key violates unique constraint \"%s\"", diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y index b25ea11a0b..6300eafb9a 100644 --- a/src/backend/bootstrap/bootparse.y +++ b/src/backend/bootstrap/bootparse.y @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/bootstrap/bootparse.y,v 1.83 2006/07/31 01:16:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/bootstrap/bootparse.y,v 1.84 2006/08/25 04:06:46 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -257,7 +257,7 @@ Boot_DeclareIndexStmt: $10, NULL, NIL, NIL, false, false, false, - false, false, true, false); + false, false, true, false, false); do_end(); } ; @@ -275,7 +275,7 @@ Boot_DeclareUniqueIndexStmt: $11, NULL, NIL, NIL, true, false, false, - false, false, true, false); + false, false, true, false, false); do_end(); } ; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 0da209ff21..60a30ce372 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.272 2006/07/31 20:09:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.273 2006/08/25 04:06:46 tgl Exp $ * * * INTERFACE ROUTINES @@ -34,6 +34,7 @@ #include "catalog/index.h" #include "catalog/indexing.h" #include "catalog/pg_constraint.h" +#include "catalog/pg_operator.h" #include "catalog/pg_opclass.h" #include "catalog/pg_type.h" #include "executor/executor.h" @@ -49,8 +50,17 @@ #include "utils/memutils.h" #include "utils/relcache.h" #include "utils/syscache.h" +#include "utils/tuplesort.h" +/* state info for validate_index bulkdelete callback */ +typedef struct +{ + Tuplesortstate *tuplesort; /* for sorting the index TIDs */ + /* statistics (for debug purposes only): */ + double htups, itups, tups_inserted; +} v_i_state; + /* non-export function prototypes */ static TupleDesc ConstructTupleDescriptor(Relation heapRelation, IndexInfo *indexInfo, @@ -61,9 +71,16 @@ static void AppendAttributeTuples(Relation indexRelation, int numatts); static void UpdateIndexRelation(Oid indexoid, Oid heapoid, IndexInfo *indexInfo, Oid *classOids, - bool primary); + bool primary, + bool isvalid); static void index_update_stats(Relation rel, bool hasindex, bool isprimary, Oid reltoastidxid, double reltuples); +static bool validate_index_callback(ItemPointer itemptr, void *opaque); +static void validate_index_heapscan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + Snapshot snapshot, + v_i_state *state); static Oid IndexGetRelation(Oid indexId); @@ -308,6 +325,8 @@ AppendAttributeTuples(Relation indexRelation, int numatts) /* ---------------------------------------------------------------- * UpdateIndexRelation + * + * Construct and insert a new entry in the pg_index catalog * ---------------------------------------------------------------- */ static void @@ -315,7 +334,8 @@ UpdateIndexRelation(Oid indexoid, Oid heapoid, IndexInfo *indexInfo, Oid *classOids, - bool primary) + bool primary, + bool isvalid) { int2vector *indkey; oidvector *indclass; @@ -383,6 +403,7 @@ UpdateIndexRelation(Oid indexoid, values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique); values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary); values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false); + values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid); values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey); values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass); values[Anum_pg_index_indexprs - 1] = exprsDatum; @@ -427,7 +448,10 @@ UpdateIndexRelation(Oid indexoid, * isconstraint: index is owned by a PRIMARY KEY or UNIQUE constraint * allow_system_table_mods: allow table to be a system catalog * skip_build: true to skip the index_build() step for the moment; caller - * must do it later (typically via reindex_index()) + * must do it later (typically via reindex_index()) + * concurrent: if true, do not lock the table against writers. The index + * will be marked "invalid" and the caller must take additional steps + * to fix it up. * * Returns OID of the created index. */ @@ -443,7 +467,8 @@ index_create(Oid heapRelationId, bool isprimary, bool isconstraint, bool allow_system_table_mods, - bool skip_build) + bool skip_build, + bool concurrent) { Relation pg_class; Relation heapRelation; @@ -456,9 +481,12 @@ index_create(Oid heapRelationId, pg_class = heap_open(RelationRelationId, RowExclusiveLock); /* - * Only SELECT ... FOR UPDATE/SHARE are allowed while doing this + * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard + * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE + * (but not VACUUM). */ - heapRelation = heap_open(heapRelationId, ShareLock); + heapRelation = heap_open(heapRelationId, + (concurrent ? ShareUpdateExclusiveLock : ShareLock)); /* * The index will be in the same namespace as its parent table, and is @@ -480,6 +508,16 @@ index_create(Oid heapRelationId, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("user-defined indexes on system catalog tables are not supported"))); + /* + * concurrent index build on a system catalog is unsafe because we tend + * to release locks before committing in catalogs + */ + if (concurrent && + IsSystemRelation(heapRelation)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("concurrent index creation on system catalog tables is not supported"))); + /* * We cannot allow indexing a shared relation after initdb (because * there's no way to make the entry in other databases' pg_class). @@ -578,7 +616,7 @@ index_create(Oid heapRelationId, * ---------------- */ UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo, - classObjectId, isprimary); + classObjectId, isprimary, !concurrent); /* * Register constraint and dependencies for the index. @@ -745,9 +783,8 @@ index_create(Oid heapRelationId, } /* - * Close the heap and index; but we keep the ShareLock on the heap and - * the exclusive lock on the index that we acquired above, until end of - * transaction. + * Close the heap and index; but we keep the locks that we acquired above + * until end of transaction. */ index_close(indexRelation, NoLock); heap_close(heapRelation, NoLock); @@ -895,6 +932,7 @@ BuildIndexInfo(Relation index) /* other info */ ii->ii_Unique = indexStruct->indisunique; + ii->ii_Concurrent = false; /* assume normal case */ return ii; } @@ -1327,13 +1365,22 @@ IndexBuildHeapScan(Relation heapRelation, estate); /* - * Ok, begin our scan of the base relation. We use SnapshotAny because we - * must retrieve all tuples and do our own time qual checks. + * Prepare for scan of the base relation. In a normal index build, + * we use SnapshotAny because we must retrieve all tuples and do our own + * time qual checks (because we have to index RECENTLY_DEAD tuples). + * In a concurrent build, we take a regular MVCC snapshot and index + * whatever's live according to that. During bootstrap we just use + * SnapshotNow. */ if (IsBootstrapProcessingMode()) { snapshot = SnapshotNow; - OldestXmin = InvalidTransactionId; + OldestXmin = InvalidTransactionId; /* not used */ + } + else if (indexInfo->ii_Concurrent) + { + snapshot = CopySnapshot(GetTransactionSnapshot()); + OldestXmin = InvalidTransactionId; /* not used */ } else { @@ -1344,8 +1391,8 @@ IndexBuildHeapScan(Relation heapRelation, scan = heap_beginscan(heapRelation, /* relation */ snapshot, /* seeself */ - 0, /* number of keys */ - NULL); /* scan key */ + 0, /* number of keys */ + NULL); /* scan key */ reltuples = 0; @@ -1374,10 +1421,12 @@ IndexBuildHeapScan(Relation heapRelation, scan->rs_cbuf)) { case HEAPTUPLE_DEAD: + /* Definitely dead, we can ignore it */ indexIt = false; tupleIsAlive = false; break; case HEAPTUPLE_LIVE: + /* Normal case, index and unique-check it */ indexIt = true; tupleIsAlive = true; break; @@ -1388,6 +1437,7 @@ IndexBuildHeapScan(Relation heapRelation, * anyway to preserve MVCC semantics. (Pre-existing * transactions could try to use the index after we * finish building it, and may need to see such tuples.) + * Exclude it from unique-checking, however. */ indexIt = true; tupleIsAlive = false; @@ -1499,6 +1549,309 @@ IndexBuildHeapScan(Relation heapRelation, } +/* + * validate_index - support code for concurrent index builds + * + * We do a concurrent index build by first building the index normally via + * index_create(), while holding a weak lock that allows concurrent + * insert/update/delete. Also, we index only tuples that are valid + * as of the start of the scan (see IndexBuildHeapScan), whereas a normal + * build takes care to include recently-dead tuples. This is OK because + * we won't mark the index valid until all transactions that might be able + * to see those tuples are gone. The reason for doing that is to avoid + * bogus unique-index failures due to concurrent UPDATEs (we might see + * different versions of the same row as being valid when we pass over them, + * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that + * does not contain any tuples added to the table while we built the index. + * + * Next, we commit the transaction so that the index becomes visible to other + * backends, but it is marked not "indisvalid" to prevent the planner from + * relying on it for indexscans. Then we wait for all transactions that + * could have been modifying the table to terminate. At this point we + * know that any subsequently-started transactions will see the index and + * insert their new tuples into it. We then take a new reference snapshot + * which is passed to validate_index(). Any tuples that are valid according + * to this snap, but are not in the index, must be added to the index. + * (Any tuples committed live after the snap will be inserted into the + * index by their originating transaction. Any tuples committed dead before + * the snap need not be indexed, because we will wait out all transactions + * that might care about them before we mark the index valid.) + * + * validate_index() works by first gathering all the TIDs currently in the + * index, using a bulkdelete callback that just stores the TIDs and doesn't + * ever say "delete it". (This should be faster than a plain indexscan; + * also, not all index AMs support full-index indexscan.) Then we sort the + * TIDs, and finally scan the table doing a "merge join" against the TID list + * to see which tuples are missing from the index. Thus we will ensure that + * all tuples valid according to the reference snapshot are in the index. + * + * Building a unique index this way is tricky: we might try to insert a + * tuple that is already dead or is in process of being deleted, and we + * mustn't have a uniqueness failure against an updated version of the same + * row. We can check the tuple to see if it's already dead and tell + * index_insert() not to do the uniqueness check, but that still leaves us + * with a race condition against an in-progress update. To handle that, + * we expect the index AM to recheck liveness of the to-be-inserted tuple + * before it declares a uniqueness error. + * + * After completing validate_index(), we wait until all transactions that + * were alive at the time of the reference snapshot are gone; this is + * necessary to be sure there are none left with a serializable snapshot + * older than the reference (and hence possibly able to see tuples we did + * not index). Then we mark the index valid and commit. + * + * Doing two full table scans is a brute-force strategy. We could try to be + * cleverer, eg storing new tuples in a special area of the table (perhaps + * making the table append-only by setting use_fsm). However that would + * add yet more locking issues. + */ +void +validate_index(Oid heapId, Oid indexId, Snapshot snapshot) +{ + Relation heapRelation, indexRelation; + IndexInfo *indexInfo; + IndexVacuumInfo ivinfo; + v_i_state state; + + /* Open and lock the parent heap relation */ + heapRelation = heap_open(heapId, ShareUpdateExclusiveLock); + /* And the target index relation */ + indexRelation = index_open(indexId, RowExclusiveLock); + + /* + * Fetch info needed for index_insert. (You might think this should + * be passed in from DefineIndex, but its copy is long gone due to + * having been built in a previous transaction.) + */ + indexInfo = BuildIndexInfo(indexRelation); + + /* mark build is concurrent just for consistency */ + indexInfo->ii_Concurrent = true; + + /* + * Scan the index and gather up all the TIDs into a tuplesort object. + */ + ivinfo.index = indexRelation; + ivinfo.vacuum_full = false; + ivinfo.message_level = DEBUG2; + ivinfo.num_heap_tuples = -1; + + state.tuplesort = tuplesort_begin_datum(TIDOID, + TIDLessOperator, + maintenance_work_mem, + false); + state.htups = state.itups = state.tups_inserted = 0; + + (void) index_bulk_delete(&ivinfo, NULL, + validate_index_callback, (void *) &state); + + /* Execute the sort */ + tuplesort_performsort(state.tuplesort); + + /* + * Now scan the heap and "merge" it with the index + */ + validate_index_heapscan(heapRelation, + indexRelation, + indexInfo, + snapshot, + &state); + + /* Done with tuplesort object */ + tuplesort_end(state.tuplesort); + + elog(DEBUG2, + "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples", + state.htups, state.itups, state.tups_inserted); + + /* Close rels, but keep locks */ + index_close(indexRelation, NoLock); + heap_close(heapRelation, NoLock); +} + +/* + * validate_index_callback - bulkdelete callback to collect the index TIDs + */ +static bool +validate_index_callback(ItemPointer itemptr, void *opaque) +{ + v_i_state *state = (v_i_state *) opaque; + + tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false); + state->itups += 1; + return false; /* never actually delete anything */ +} + +/* + * validate_index_heapscan - second table scan for concurrent index build + * + * This has much code in common with IndexBuildHeapScan, but it's enough + * different that it seems cleaner to have two routines not one. + */ +static void +validate_index_heapscan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + Snapshot snapshot, + v_i_state *state) +{ + HeapScanDesc scan; + HeapTuple heapTuple; + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + List *predicate; + TupleTableSlot *slot; + EState *estate; + ExprContext *econtext; + /* state variables for the merge */ + ItemPointer indexcursor = NULL; + bool tuplesort_empty = false; + + /* + * sanity checks + */ + Assert(OidIsValid(indexRelation->rd_rel->relam)); + + /* + * Need an EState for evaluation of index expressions and partial-index + * predicates. Also a slot to hold the current tuple. + */ + estate = CreateExecutorState(); + econtext = GetPerTupleExprContext(estate); + slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation)); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* Set up execution state for predicate, if any. */ + predicate = (List *) + ExecPrepareExpr((Expr *) indexInfo->ii_Predicate, + estate); + + /* + * Prepare for scan of the base relation. We need just those tuples + * satisfying the passed-in reference snapshot. + */ + scan = heap_beginscan(heapRelation, /* relation */ + snapshot, /* seeself */ + 0, /* number of keys */ + NULL); /* scan key */ + + /* + * Scan all tuples matching the snapshot. + */ + while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + ItemPointer heapcursor = &heapTuple->t_self; + + CHECK_FOR_INTERRUPTS(); + + state->htups += 1; + + /* + * "merge" by skipping through the index tuples until we find or + * pass the current heap tuple. + */ + while (!tuplesort_empty && + (!indexcursor || + ItemPointerCompare(indexcursor, heapcursor) < 0)) + { + Datum ts_val; + bool ts_isnull; + + if (indexcursor) + pfree(indexcursor); + tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true, + &ts_val, &ts_isnull); + Assert(tuplesort_empty || !ts_isnull); + indexcursor = (ItemPointer) DatumGetPointer(ts_val); + } + + if (tuplesort_empty || + ItemPointerCompare(indexcursor, heapcursor) > 0) + { + /* + * We've overshot which means this heap tuple is missing from the + * index, so insert it. + */ + bool check_unique; + + MemoryContextReset(econtext->ecxt_per_tuple_memory); + + /* Set up for predicate or expression evaluation */ + ExecStoreTuple(heapTuple, slot, InvalidBuffer, false); + + /* + * In a partial index, discard tuples that don't satisfy the + * predicate. + */ + if (predicate != NIL) + { + if (!ExecQual(predicate, econtext, false)) + continue; + } + + /* + * For the current heap tuple, extract all the attributes we use + * in this index, and note which are null. This also performs + * evaluation of any expressions needed. + */ + FormIndexDatum(indexInfo, + slot, + estate, + values, + isnull); + + /* + * If the tuple is already committed dead, we still have to + * put it in the index (because some xacts might be able to + * see it), but we might as well suppress uniqueness checking. + * This is just an optimization because the index AM is not + * supposed to raise a uniqueness failure anyway. + */ + if (indexInfo->ii_Unique) + { + /* must hold a buffer lock to call HeapTupleSatisfiesNow */ + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + + if (HeapTupleSatisfiesNow(heapTuple->t_data, scan->rs_cbuf)) + check_unique = true; + else + check_unique = false; + + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + } + else + check_unique = false; + + /* + * You'd think we should go ahead and build the index tuple here, + * but some index AMs want to do further processing on the data + * first. So pass the values[] and isnull[] arrays, instead. + */ + index_insert(indexRelation, + values, + isnull, + heapcursor, + heapRelation, + check_unique); + + state->tups_inserted += 1; + } + } + + heap_endscan(scan); + + ExecDropSingleTupleTableSlot(slot); + + FreeExecutorState(estate); + + /* These may have been pointing to the now-gone estate */ + indexInfo->ii_ExpressionsState = NIL; + indexInfo->ii_PredicateState = NIL; +} + + /* * IndexGetRelation: given an index's relation OID, get the OID of the * relation it is an index on. Uses the system cache. @@ -1530,9 +1883,12 @@ void reindex_index(Oid indexId) { Relation iRel, - heapRelation; + heapRelation, + pg_index; Oid heapId; bool inplace; + HeapTuple indexTuple; + Form_pg_index indexForm; /* * Open and lock the parent heap relation. ShareLock is sufficient since @@ -1600,6 +1956,28 @@ reindex_index(Oid indexId) PG_END_TRY(); ResetReindexProcessing(); + /* + * If the index is marked invalid (ie, it's from a failed CREATE INDEX + * CONCURRENTLY), we can now mark it valid. This allows REINDEX to be + * used to clean up in such cases. + */ + pg_index = heap_open(IndexRelationId, RowExclusiveLock); + + indexTuple = SearchSysCacheCopy(INDEXRELID, + ObjectIdGetDatum(indexId), + 0, 0, 0); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", indexId); + indexForm = (Form_pg_index) GETSTRUCT(indexTuple); + + if (!indexForm->indisvalid) + { + indexForm->indisvalid = true; + simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); + CatalogUpdateIndexes(pg_index, indexTuple); + } + heap_close(pg_index, RowExclusiveLock); + /* Close rels, but keep locks */ index_close(iRel, NoLock); heap_close(heapRelation, NoLock); diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index eebe602ee0..aa3e7b8fef 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/toasting.c,v 1.1 2006/07/31 01:16:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/toasting.c,v 1.2 2006/08/25 04:06:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -218,6 +218,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid) indexInfo->ii_Predicate = NIL; indexInfo->ii_PredicateState = NIL; indexInfo->ii_Unique = true; + indexInfo->ii_Concurrent = false; classObjectId[0] = OID_BTREE_OPS_OID; classObjectId[1] = INT4_BTREE_OPS_OID; @@ -227,7 +228,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid) BTREE_AM_OID, rel->rd_rel->reltablespace, classObjectId, (Datum) 0, - true, false, true, false); + true, false, true, false, false); /* * Store the toast table's OID in the parent relation's pg_class row diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 97a5a72199..e96235c6f5 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.146 2006/07/31 01:16:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.147 2006/08/25 04:06:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,6 +18,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/reloptions.h" +#include "access/transam.h" #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/dependency.h" @@ -85,6 +86,7 @@ static bool relationHasPrimaryKey(Relation rel); * 'skip_build': make the catalog entries but leave the index file empty; * it will be filled later. * 'quiet': suppress the NOTICE chatter ordinarily provided for constraints. + * 'concurrent': avoid blocking writers to the table while building. */ void DefineIndex(RangeVar *heapRelation, @@ -102,7 +104,8 @@ DefineIndex(RangeVar *heapRelation, bool is_alter_table, bool check_rights, bool skip_build, - bool quiet) + bool quiet, + bool concurrent) { Oid *classObjectId; Oid accessMethodId; @@ -116,6 +119,12 @@ DefineIndex(RangeVar *heapRelation, Datum reloptions; IndexInfo *indexInfo; int numberOfAttributes; + uint32 ixcnt; + LockRelId heaprelid; + Snapshot snapshot; + Relation pg_index; + HeapTuple indexTuple; + Form_pg_index indexForm; /* * count attributes in index @@ -133,8 +142,16 @@ DefineIndex(RangeVar *heapRelation, /* * Open heap relation, acquire a suitable lock on it, remember its OID + * + * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard + * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE + * (but not VACUUM). */ - rel = heap_openrv(heapRelation, ShareLock); + rel = heap_openrv(heapRelation, + (concurrent ? ShareUpdateExclusiveLock : ShareLock)); + + relationId = RelationGetRelid(rel); + namespaceId = RelationGetNamespace(rel); /* Note: during bootstrap may see uncataloged relation */ if (rel->rd_rel->relkind != RELKIND_RELATION && @@ -144,8 +161,13 @@ DefineIndex(RangeVar *heapRelation, errmsg("\"%s\" is not a table", heapRelation->relname))); - relationId = RelationGetRelid(rel); - namespaceId = RelationGetNamespace(rel); + /* + * Don't try to CREATE INDEX on temp tables of other backends. + */ + if (isOtherTempNamespace(namespaceId)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create indexes on temporary tables of other sessions"))); /* * Verify we (still) have CREATE rights in the rel's namespace. @@ -391,6 +413,7 @@ DefineIndex(RangeVar *heapRelation, indexInfo->ii_Predicate = make_ands_implicit(predicate); indexInfo->ii_PredicateState = NIL; indexInfo->ii_Unique = unique; + indexInfo->ii_Concurrent = concurrent; classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid)); ComputeIndexAttrs(indexInfo, classObjectId, attributeList, @@ -410,10 +433,122 @@ DefineIndex(RangeVar *heapRelation, primary ? "PRIMARY KEY" : "UNIQUE", indexRelationName, RelationGetRelationName(rel)))); - index_create(relationId, indexRelationName, indexRelationId, - indexInfo, accessMethodId, tablespaceId, classObjectId, - reloptions, primary, isconstraint, - allowSystemTableMods, skip_build); + indexRelationId = + index_create(relationId, indexRelationName, indexRelationId, + indexInfo, accessMethodId, tablespaceId, classObjectId, + reloptions, primary, isconstraint, + allowSystemTableMods, skip_build, concurrent); + + if (!concurrent) + return; /* We're done, in the standard case */ + + /* + * Phase 2 of concurrent index build (see comments for validate_index() + * for an overview of how this works) + * + * We must commit our current transaction so that the index becomes + * visible; then start another. Note that all the data structures + * we just built are lost in the commit. The only data we keep past + * here are the relation IDs. + * + * Before committing, get a session-level lock on the table, to ensure + * that neither it nor the index can be dropped before we finish. + * This cannot block, even if someone else is waiting for access, because + * we already have the same lock within our transaction. + * + * Note: we don't currently bother with a session lock on the index, + * because there are no operations that could change its state while + * we hold lock on the parent table. This might need to change later. + */ + heaprelid = rel->rd_lockInfo.lockRelId; + LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock); + + CommitTransactionCommand(); + StartTransactionCommand(); + + /* Establish transaction snapshot ... else GetLatestSnapshot complains */ + (void) GetTransactionSnapshot(); + + /* + * Now we must wait until no running transaction could have the table open + * with the old list of indexes. If we can take an exclusive lock then + * there are none now and anybody who opens it later will get the new + * index in their relcache entry. Alternatively, if our Xmin reaches our + * own (new) transaction then we know no transactions that started before + * the index was visible are left anyway. + */ + for (;;) + { + CHECK_FOR_INTERRUPTS(); + + if (ConditionalLockRelationOid(relationId, ExclusiveLock)) + { + /* Release the lock right away to avoid blocking anyone */ + UnlockRelationOid(relationId, ExclusiveLock); + break; + } + + if (TransactionIdEquals(GetLatestSnapshot()->xmin, + GetTopTransactionId())) + break; + + pg_usleep(1000000L); /* 1 sec */ + } + + /* + * Now take the "reference snapshot" that will be used by validate_index() + * to filter candidate tuples. All other transactions running at this + * time will have to be out-waited before we can commit, because we can't + * guarantee that tuples deleted just before this will be in the index. + * + * We also set ActiveSnapshot to this snap, since functions in indexes + * may need a snapshot. + */ + snapshot = CopySnapshot(GetTransactionSnapshot()); + ActiveSnapshot = snapshot; + + /* + * Scan the index and the heap, insert any missing index entries. + */ + validate_index(relationId, indexRelationId, snapshot); + + /* + * The index is now valid in the sense that it contains all currently + * interesting tuples. But since it might not contain tuples deleted + * just before the reference snap was taken, we have to wait out any + * transactions older than the reference snap. We can do this by + * waiting for each xact explicitly listed in the snap. + * + * Note: GetSnapshotData() never stores our own xid into a snap, + * hence we need not check for that. + */ + for (ixcnt = 0; ixcnt < snapshot->xcnt; ixcnt++) + XactLockTableWait(snapshot->xip[ixcnt]); + + /* Index can now be marked valid -- update its pg_index entry */ + pg_index = heap_open(IndexRelationId, RowExclusiveLock); + + indexTuple = SearchSysCacheCopy(INDEXRELID, + ObjectIdGetDatum(indexRelationId), + 0, 0, 0); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", indexRelationId); + indexForm = (Form_pg_index) GETSTRUCT(indexTuple); + + Assert(indexForm->indexrelid = indexRelationId); + Assert(!indexForm->indisvalid); + + indexForm->indisvalid = true; + + simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); + CatalogUpdateIndexes(pg_index, indexTuple); + + heap_close(pg_index, RowExclusiveLock); + + /* + * Last thing to do is release the session-level lock on the parent table. + */ + UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock); } diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index a1f7603337..7e8884496d 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.200 2006/08/21 00:57:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.201 2006/08/25 04:06:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3832,7 +3832,8 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel, true, /* is_alter_table */ check_rights, skip_build, - quiet); + quiet, + false); } /* diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 40e35a3796..391846bee2 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.348 2006/08/21 00:57:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.349 2006/08/25 04:06:49 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2049,6 +2049,7 @@ _copyIndexStmt(IndexStmt *from) COPY_SCALAR_FIELD(unique); COPY_SCALAR_FIELD(primary); COPY_SCALAR_FIELD(isconstraint); + COPY_SCALAR_FIELD(concurrent); return newnode; } diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index b4d0632c03..3cb4b8aee3 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.282 2006/08/21 00:57:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.283 2006/08/25 04:06:49 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -962,6 +962,7 @@ _equalIndexStmt(IndexStmt *a, IndexStmt *b) COMPARE_SCALAR_FIELD(unique); COMPARE_SCALAR_FIELD(primary); COMPARE_SCALAR_FIELD(isconstraint); + COMPARE_SCALAR_FIELD(concurrent); return true; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index a4b4044385..7b126b7f54 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.283 2006/08/21 00:57:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.284 2006/08/25 04:06:50 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1353,6 +1353,7 @@ _outIndexStmt(StringInfo str, IndexStmt *node) WRITE_BOOL_FIELD(unique); WRITE_BOOL_FIELD(primary); WRITE_BOOL_FIELD(isconstraint); + WRITE_BOOL_FIELD(concurrent); } static void diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 47e39c6a78..5e3c7d9857 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.124 2006/08/05 00:22:49 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.125 2006/08/25 04:06:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -138,6 +138,18 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, RelOptInfo *rel) indexRelation = index_open(indexoid, lmode); index = indexRelation->rd_index; + /* + * Ignore invalid indexes, since they can't safely be used for + * queries. Note that this is OK because the data structure + * we are constructing is only used by the planner --- the + * executor still needs to insert into "invalid" indexes! + */ + if (!index->indisvalid) + { + index_close(indexRelation, NoLock); + continue; + } + info = makeNode(IndexOptInfo); info->indexoid = index->indexrelid; diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 8eb50fb573..ae3469c86c 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.347 2006/08/21 00:57:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/analyze.c,v 1.348 2006/08/25 04:06:51 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1488,6 +1488,7 @@ transformIndexConstraints(ParseState *pstate, CreateStmtContext *cxt) index->tableSpace = constraint->indexspace; index->indexParams = NIL; index->whereClause = NULL; + index->concurrent = false; /* * Make sure referenced keys exist. If we are making a PRIMARY KEY diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 60761ae6bc..a77e73a43f 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.557 2006/08/21 00:57:25 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.558 2006/08/25 04:06:51 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -364,7 +364,8 @@ static void doNegateFloat(Value *v); CACHE CALLED CASCADE CASCADED CASE CAST CHAIN CHAR_P CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT - COMMITTED CONNECTION CONSTRAINT CONSTRAINTS CONVERSION_P CONVERT COPY CREATE CREATEDB + COMMITTED CONCURRENTLY CONNECTION CONSTRAINT CONSTRAINTS + CONVERSION_P CONVERT COPY CREATE CREATEDB CREATEROLE CREATEUSER CROSS CSV CURRENT_DATE CURRENT_ROLE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE @@ -3638,20 +3639,22 @@ opt_granted_by: GRANTED BY RoleId { $$ = $3; } /***************************************************************************** * - * QUERY: - * create index on - * [ using ] "(" ( [ using ] )+ ")" - * [ tablespace ] [ where ] + * QUERY: CREATE INDEX + * + * Note: we can't factor CONCURRENTLY into a separate production without + * making it a reserved word. * * Note: we cannot put TABLESPACE clause after WHERE clause unless we are * willing to make TABLESPACE a fully reserved word. *****************************************************************************/ -IndexStmt: CREATE index_opt_unique INDEX index_name ON qualified_name - access_method_clause '(' index_params ')' opt_definition OptTableSpace where_clause +IndexStmt: CREATE index_opt_unique INDEX index_name + ON qualified_name access_method_clause '(' index_params ')' + opt_definition OptTableSpace where_clause { IndexStmt *n = makeNode(IndexStmt); n->unique = $2; + n->concurrent = false; n->idxname = $4; n->relation = $6; n->accessMethod = $7; @@ -3661,6 +3664,22 @@ IndexStmt: CREATE index_opt_unique INDEX index_name ON qualified_name n->whereClause = $13; $$ = (Node *)n; } + | CREATE index_opt_unique INDEX CONCURRENTLY index_name + ON qualified_name access_method_clause '(' index_params ')' + opt_definition OptTableSpace where_clause + { + IndexStmt *n = makeNode(IndexStmt); + n->unique = $2; + n->concurrent = true; + n->idxname = $5; + n->relation = $7; + n->accessMethod = $8; + n->indexParams = $10; + n->options = $12; + n->tableSpace = $13; + n->whereClause = $14; + $$ = (Node *)n; + } ; index_opt_unique: @@ -8491,6 +8510,7 @@ unreserved_keyword: | COMMENT | COMMIT | COMMITTED + | CONCURRENTLY | CONNECTION | CONSTRAINTS | CONVERSION_P diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c index e799d68ae6..9867982cdb 100644 --- a/src/backend/parser/keywords.c +++ b/src/backend/parser/keywords.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.175 2006/08/12 02:52:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.176 2006/08/25 04:06:52 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -84,6 +84,7 @@ static const ScanKeyword ScanKeywords[] = { {"comment", COMMENT}, {"commit", COMMIT}, {"committed", COMMITTED}, + {"concurrently", CONCURRENTLY}, {"connection", CONNECTION}, {"constraint", CONSTRAINT}, {"constraints", CONSTRAINTS}, diff --git a/src/backend/storage/page/itemptr.c b/src/backend/storage/page/itemptr.c index 1fcd6cb0ac..08f2273ab9 100644 --- a/src/backend/storage/page/itemptr.c +++ b/src/backend/storage/page/itemptr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/page/itemptr.c,v 1.17 2006/07/14 14:52:23 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/page/itemptr.c,v 1.18 2006/08/25 04:06:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,13 +16,14 @@ #include "storage/itemptr.h" + /* * ItemPointerEquals * Returns true if both item pointers point to the same item, * otherwise returns false. * * Note: - * Assumes that the disk item pointers are not NULL. + * Asserts that the disk item pointers are both valid! */ bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2) @@ -35,3 +36,30 @@ ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2) else return false; } + +/* + * ItemPointerCompare + * Generic btree-style comparison for item pointers. + */ +int32 +ItemPointerCompare(ItemPointer arg1, ItemPointer arg2) +{ + /* + * Don't use ItemPointerGetBlockNumber or ItemPointerGetOffsetNumber here, + * because they assert ip_posid != 0 which might not be true for a + * user-supplied TID. + */ + BlockNumber b1 = BlockIdGetBlockNumber(&(arg1->ip_blkid)); + BlockNumber b2 = BlockIdGetBlockNumber(&(arg2->ip_blkid)); + + if (b1 < b2) + return -1; + else if (b1 > b2) + return 1; + else if (arg1->ip_posid < arg2->ip_posid) + return -1; + else if (arg1->ip_posid > arg2->ip_posid) + return 1; + else + return 0; +} diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index ce86a90ba3..781026b323 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.266 2006/08/15 18:26:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.267 2006/08/25 04:06:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -781,6 +781,9 @@ ProcessUtility(Node *parsetree, { IndexStmt *stmt = (IndexStmt *) parsetree; + if (stmt->concurrent) + PreventTransactionChain(stmt, "CREATE INDEX CONCURRENTLY"); + CheckRelationOwnership(stmt->relation, true); DefineIndex(stmt->relation, /* relation */ @@ -795,10 +798,11 @@ ProcessUtility(Node *parsetree, stmt->unique, stmt->primary, stmt->isconstraint, - false, /* is_alter_table */ - true, /* check_rights */ - false, /* skip_build */ - false); /* quiet */ + false, /* is_alter_table */ + true, /* check_rights */ + false, /* skip_build */ + false, /* quiet */ + stmt->concurrent); /* concurrent */ } break; diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c index 814670521e..1362d0a3f8 100644 --- a/src/backend/utils/adt/tid.c +++ b/src/backend/utils/adt/tid.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tid.c,v 1.54 2006/07/21 20:51:32 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tid.c,v 1.55 2006/08/25 04:06:53 tgl Exp $ * * NOTES * input routine largely stolen from boxin(). @@ -158,36 +158,13 @@ tidsend(PG_FUNCTION_ARGS) * PUBLIC ROUTINES * *****************************************************************************/ -static int32 -tid_cmp_internal(ItemPointer arg1, ItemPointer arg2) -{ - /* - * Don't use ItemPointerGetBlockNumber or ItemPointerGetOffsetNumber here, - * because they assert ip_posid != 0 which might not be true for a - * user-supplied TID. - */ - BlockNumber b1 = BlockIdGetBlockNumber(&(arg1->ip_blkid)); - BlockNumber b2 = BlockIdGetBlockNumber(&(arg2->ip_blkid)); - - if (b1 < b2) - return -1; - else if (b1 > b2) - return 1; - else if (arg1->ip_posid < arg2->ip_posid) - return -1; - else if (arg1->ip_posid > arg2->ip_posid) - return 1; - else - return 0; -} - Datum tideq(PG_FUNCTION_ARGS) { ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); - PG_RETURN_BOOL(tid_cmp_internal(arg1,arg2) == 0); + PG_RETURN_BOOL(ItemPointerCompare(arg1,arg2) == 0); } Datum @@ -196,7 +173,7 @@ tidne(PG_FUNCTION_ARGS) ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); - PG_RETURN_BOOL(tid_cmp_internal(arg1,arg2) != 0); + PG_RETURN_BOOL(ItemPointerCompare(arg1,arg2) != 0); } Datum @@ -205,7 +182,7 @@ tidlt(PG_FUNCTION_ARGS) ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); - PG_RETURN_BOOL(tid_cmp_internal(arg1,arg2) < 0); + PG_RETURN_BOOL(ItemPointerCompare(arg1,arg2) < 0); } Datum @@ -214,7 +191,7 @@ tidle(PG_FUNCTION_ARGS) ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); - PG_RETURN_BOOL(tid_cmp_internal(arg1,arg2) <= 0); + PG_RETURN_BOOL(ItemPointerCompare(arg1,arg2) <= 0); } Datum @@ -223,7 +200,7 @@ tidgt(PG_FUNCTION_ARGS) ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); - PG_RETURN_BOOL(tid_cmp_internal(arg1,arg2) > 0); + PG_RETURN_BOOL(ItemPointerCompare(arg1,arg2) > 0); } Datum @@ -232,7 +209,7 @@ tidge(PG_FUNCTION_ARGS) ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); - PG_RETURN_BOOL(tid_cmp_internal(arg1,arg2) >= 0); + PG_RETURN_BOOL(ItemPointerCompare(arg1,arg2) >= 0); } Datum @@ -241,7 +218,7 @@ bttidcmp(PG_FUNCTION_ARGS) ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); - PG_RETURN_INT32(tid_cmp_internal(arg1, arg2)); + PG_RETURN_INT32(ItemPointerCompare(arg1, arg2)); } Datum @@ -250,7 +227,7 @@ tidlarger(PG_FUNCTION_ARGS) ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); - PG_RETURN_ITEMPOINTER(tid_cmp_internal(arg1,arg2) >= 0 ? arg1 : arg2); + PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1,arg2) >= 0 ? arg1 : arg2); } Datum @@ -259,7 +236,7 @@ tidsmaller(PG_FUNCTION_ARGS) ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0); ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1); - PG_RETURN_ITEMPOINTER(tid_cmp_internal(arg1,arg2) <= 0 ? arg1 : arg2); + PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1,arg2) <= 0 ? arg1 : arg2); } diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c index 3233dfc2fa..7bdf6d15dc 100644 --- a/src/bin/psql/common.c +++ b/src/bin/psql/common.c @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2006, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.124 2006/08/13 21:10:04 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.125 2006/08/25 04:06:54 tgl Exp $ */ #include "postgres_fe.h" #include "common.h" @@ -1075,22 +1075,71 @@ command_no_begin(const char *query) * Commands not allowed within transactions. The statements checked for * here should be exactly those that call PreventTransactionChain() in the * backend. - * - * Note: we are a bit sloppy about CLUSTER, which is transactional in some - * variants but not others. */ if (wordlen == 6 && pg_strncasecmp(query, "vacuum", 6) == 0) return true; if (wordlen == 7 && pg_strncasecmp(query, "cluster", 7) == 0) - return true; + { + /* CLUSTER with any arguments is allowed in transactions */ + query += wordlen; + + query = skip_white_space(query); + + if (isalpha((unsigned char) query[0])) + return false; /* has additional words */ + return true; /* it's CLUSTER without arguments */ + } + + if (wordlen == 6 && pg_strncasecmp(query, "create", 6) == 0) + { + query += wordlen; + + query = skip_white_space(query); + + wordlen = 0; + while (isalpha((unsigned char) query[wordlen])) + wordlen += PQmblen(&query[wordlen], pset.encoding); + + if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0) + return true; + if (wordlen == 10 && pg_strncasecmp(query, "tablespace", 10) == 0) + return true; + + /* CREATE [UNIQUE] INDEX CONCURRENTLY isn't allowed in xacts */ + if (wordlen == 6 && pg_strncasecmp(query, "unique", 6) == 0) + { + query += wordlen; + + query = skip_white_space(query); + + wordlen = 0; + while (isalpha((unsigned char) query[wordlen])) + wordlen += PQmblen(&query[wordlen], pset.encoding); + } + + if (wordlen == 5 && pg_strncasecmp(query, "index", 5) == 0) + { + query += wordlen; + + query = skip_white_space(query); + + wordlen = 0; + while (isalpha((unsigned char) query[wordlen])) + wordlen += PQmblen(&query[wordlen], pset.encoding); + + if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0) + return true; + } + + return false; + } /* - * Note: these tests will match CREATE SYSTEM, DROP SYSTEM, and REINDEX - * TABLESPACE, which aren't really valid commands so we don't care much. - * The other six possible matches are correct. + * Note: these tests will match DROP SYSTEM and REINDEX TABLESPACE, + * which aren't really valid commands so we don't care much. + * The other four possible matches are correct. */ - if ((wordlen == 6 && pg_strncasecmp(query, "create", 6) == 0) || - (wordlen == 4 && pg_strncasecmp(query, "drop", 4) == 0) || + if ((wordlen == 4 && pg_strncasecmp(query, "drop", 4) == 0) || (wordlen == 7 && pg_strncasecmp(query, "reindex", 7) == 0)) { query += wordlen; diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 4e4f2dd4b3..49db39074d 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2006, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/bin/psql/describe.c,v 1.142 2006/07/27 19:52:06 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/psql/describe.c,v 1.143 2006/08/25 04:06:54 tgl Exp $ */ #include "postgres_fe.h" #include "describe.h" @@ -942,7 +942,7 @@ describeOneTableDetails(const char *schemaname, PGresult *result; printfPQExpBuffer(&buf, - "SELECT i.indisunique, i.indisprimary, i.indisclustered, a.amname, c2.relname,\n" + "SELECT i.indisunique, i.indisprimary, i.indisclustered, i.indisvalid, a.amname, c2.relname,\n" " pg_catalog.pg_get_expr(i.indpred, i.indrelid, true)\n" "FROM pg_catalog.pg_index i, pg_catalog.pg_class c, pg_catalog.pg_class c2, pg_catalog.pg_am a\n" "WHERE i.indexrelid = c.oid AND c.oid = '%s' AND c.relam = a.oid\n" @@ -962,9 +962,10 @@ describeOneTableDetails(const char *schemaname, char *indisunique = PQgetvalue(result, 0, 0); char *indisprimary = PQgetvalue(result, 0, 1); char *indisclustered = PQgetvalue(result, 0, 2); - char *indamname = PQgetvalue(result, 0, 3); - char *indtable = PQgetvalue(result, 0, 4); - char *indpred = PQgetvalue(result, 0, 5); + char *indisvalid = PQgetvalue(result, 0, 3); + char *indamname = PQgetvalue(result, 0, 4); + char *indtable = PQgetvalue(result, 0, 5); + char *indpred = PQgetvalue(result, 0, 6); int count_footers = 0; if (strcmp(indisprimary, "t") == 0) @@ -985,6 +986,9 @@ describeOneTableDetails(const char *schemaname, if (strcmp(indisclustered, "t") == 0) appendPQExpBuffer(&tmpbuf, _(", clustered")); + if (strcmp(indisvalid, "t") != 0) + appendPQExpBuffer(&tmpbuf, _(", invalid")); + footers = pg_malloc_zero(4 * sizeof(*footers)); footers[count_footers++] = pg_strdup(tmpbuf.data); add_tablespace_footer(tableinfo.relkind, tableinfo.tablespace, @@ -1067,7 +1071,7 @@ describeOneTableDetails(const char *schemaname, if (tableinfo.hasindex) { printfPQExpBuffer(&buf, - "SELECT c2.relname, i.indisprimary, i.indisunique, i.indisclustered, " + "SELECT c2.relname, i.indisprimary, i.indisunique, i.indisclustered, i.indisvalid, " "pg_catalog.pg_get_indexdef(i.indexrelid, 0, true), c2.reltablespace\n" "FROM pg_catalog.pg_class c, pg_catalog.pg_class c2, pg_catalog.pg_index i\n" "WHERE c.oid = '%s' AND c.oid = i.indrelid AND i.indexrelid = c2.oid\n" @@ -1201,7 +1205,7 @@ describeOneTableDetails(const char *schemaname, ? " UNIQUE," : "")); /* Everything after "USING" is echoed verbatim */ - indexdef = PQgetvalue(result1, i, 4); + indexdef = PQgetvalue(result1, i, 5); usingpos = strstr(indexdef, " USING "); if (usingpos) indexdef = usingpos + 7; @@ -1211,11 +1215,14 @@ describeOneTableDetails(const char *schemaname, if (strcmp(PQgetvalue(result1, i, 3), "t") == 0) appendPQExpBuffer(&buf, " CLUSTER"); + if (strcmp(PQgetvalue(result1, i, 4), "t") != 0) + appendPQExpBuffer(&buf, " INVALID"); + /* Print tablespace of the index on the same line */ count_footers += 1; initPQExpBuffer(&tmpbuf); if (add_tablespace_footer('i', - atooid(PQgetvalue(result1, i, 5)), + atooid(PQgetvalue(result1, i, 6)), footers, &count_footers, tmpbuf, false)) { appendPQExpBuffer(&buf, ", "); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 448f9e4ecf..6266b591ca 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.352 2006/08/21 00:57:26 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.353 2006/08/25 04:06:54 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200608191 +#define CATALOG_VERSION_NO 200608211 #endif diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index f54508fb5b..881ef90279 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/index.h,v 1.70 2006/07/31 01:16:37 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/index.h,v 1.71 2006/08/25 04:06:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -39,7 +39,8 @@ extern Oid index_create(Oid heapRelationId, bool isprimary, bool isconstraint, bool allow_system_table_mods, - bool skip_build); + bool skip_build, + bool concurrent); extern void index_drop(Oid indexId); @@ -64,6 +65,8 @@ extern double IndexBuildHeapScan(Relation heapRelation, IndexBuildCallback callback, void *callback_state); +extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot); + extern void reindex_index(Oid indexId); extern bool reindex_relation(Oid relid, bool toast_too); diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h index e098079b93..68d7eae7ab 100644 --- a/src/include/catalog/pg_attribute.h +++ b/src/include/catalog/pg_attribute.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_attribute.h,v 1.123 2006/07/10 16:20:51 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_attribute.h,v 1.124 2006/08/25 04:06:55 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -460,9 +460,10 @@ DATA(insert ( 1259 tableoid 26 0 4 -7 0 -1 -1 t p i t f f t 0)); { 0, {"indisunique"}, 16, -1, 1, 4, 0, -1, -1, true, 'p', 'c', true, false, false, true, 0 }, \ { 0, {"indisprimary"}, 16, -1, 1, 5, 0, -1, -1, true, 'p', 'c', true, false, false, true, 0 }, \ { 0, {"indisclustered"}, 16, -1, 1, 6, 0, -1, -1, true, 'p', 'c', true, false, false, true, 0 }, \ -{ 0, {"indkey"}, 22, -1, -1, 7, 1, -1, -1, false, 'p', 'i', true, false, false, true, 0 }, \ -{ 0, {"indclass"}, 30, -1, -1, 8, 1, -1, -1, false, 'p', 'i', true, false, false, true, 0 }, \ -{ 0, {"indexprs"}, 25, -1, -1, 9, 0, -1, -1, false, 'x', 'i', false, false, false, true, 0 }, \ -{ 0, {"indpred"}, 25, -1, -1, 10, 0, -1, -1, false, 'x', 'i', false, false, false, true, 0 } +{ 0, {"indisvalid"}, 16, -1, 1, 7, 0, -1, -1, true, 'p', 'c', true, false, false, true, 0 }, \ +{ 0, {"indkey"}, 22, -1, -1, 8, 1, -1, -1, false, 'p', 'i', true, false, false, true, 0 }, \ +{ 0, {"indclass"}, 30, -1, -1, 9, 1, -1, -1, false, 'p', 'i', true, false, false, true, 0 }, \ +{ 0, {"indexprs"}, 25, -1, -1, 10, 0, -1, -1, false, 'x', 'i', false, false, false, true, 0 }, \ +{ 0, {"indpred"}, 25, -1, -1, 11, 0, -1, -1, false, 'x', 'i', false, false, false, true, 0 } #endif /* PG_ATTRIBUTE_H */ diff --git a/src/include/catalog/pg_index.h b/src/include/catalog/pg_index.h index bc7026cc25..506fbc293d 100644 --- a/src/include/catalog/pg_index.h +++ b/src/include/catalog/pg_index.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_index.h,v 1.39 2006/03/05 15:58:54 momjian Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_index.h,v 1.40 2006/08/25 04:06:55 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -41,6 +41,7 @@ CATALOG(pg_index,2610) BKI_WITHOUT_OIDS bool indisunique; /* is this a unique index? */ bool indisprimary; /* is this index for primary key? */ bool indisclustered; /* is this the index last clustered by? */ + bool indisvalid; /* is this index valid for use by queries? */ /* VARIABLE LENGTH FIELDS: */ int2vector indkey; /* column numbers of indexed cols, or 0 */ @@ -63,16 +64,17 @@ typedef FormData_pg_index *Form_pg_index; * compiler constants for pg_index * ---------------- */ -#define Natts_pg_index 10 +#define Natts_pg_index 11 #define Anum_pg_index_indexrelid 1 #define Anum_pg_index_indrelid 2 #define Anum_pg_index_indnatts 3 #define Anum_pg_index_indisunique 4 #define Anum_pg_index_indisprimary 5 #define Anum_pg_index_indisclustered 6 -#define Anum_pg_index_indkey 7 -#define Anum_pg_index_indclass 8 -#define Anum_pg_index_indexprs 9 -#define Anum_pg_index_indpred 10 +#define Anum_pg_index_indisvalid 7 +#define Anum_pg_index_indkey 8 +#define Anum_pg_index_indclass 9 +#define Anum_pg_index_indexprs 10 +#define Anum_pg_index_indpred 11 #endif /* PG_INDEX_H */ diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 902931d02a..26983fc198 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/commands/defrem.h,v 1.75 2006/07/18 17:42:01 momjian Exp $ + * $PostgreSQL: pgsql/src/include/commands/defrem.h,v 1.76 2006/08/25 04:06:55 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,7 +33,8 @@ extern void DefineIndex(RangeVar *heapRelation, bool is_alter_table, bool check_rights, bool skip_build, - bool quiet); + bool quiet, + bool concurrent); extern void RemoveIndex(RangeVar *relation, DropBehavior behavior); extern void ReindexIndex(RangeVar *indexRelation); extern void ReindexTable(RangeVar *relation); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 47b7c01f23..35ee8a20d0 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.159 2006/08/12 02:52:06 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.160 2006/08/25 04:06:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,7 @@ * Predicate partial-index predicate, or NIL if none * PredicateState exec state for predicate, or NIL if none * Unique is it a unique index? + * Concurrent are we doing a concurrent index build? * ---------------- */ typedef struct IndexInfo @@ -49,6 +50,7 @@ typedef struct IndexInfo List *ii_Predicate; /* list of Expr */ List *ii_PredicateState; /* list of ExprState */ bool ii_Unique; + bool ii_Concurrent; } IndexInfo; /* ---------------- diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index a3ae58d17b..7aa7bfd38e 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.324 2006/08/21 00:57:26 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.325 2006/08/25 04:06:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1463,6 +1463,7 @@ typedef struct IndexStmt bool unique; /* is index unique? */ bool primary; /* is index on primary key? */ bool isconstraint; /* is it from a CONSTRAINT clause? */ + bool concurrent; /* should this be a concurrent index build? */ } IndexStmt; /* ---------------------- diff --git a/src/include/storage/itemptr.h b/src/include/storage/itemptr.h index c6eaaf1911..b558eb33eb 100644 --- a/src/include/storage/itemptr.h +++ b/src/include/storage/itemptr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/itemptr.h,v 1.28 2006/03/05 15:58:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/itemptr.h,v 1.29 2006/08/25 04:06:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -141,5 +141,6 @@ typedef ItemPointerData *ItemPointer; */ extern bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2); +extern int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2); #endif /* ITEMPTR_H */ diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 715462e3d5..1604d87377 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -360,3 +360,53 @@ INSERT INTO func_index_heap VALUES('QWERTY'); create unique index hash_f8_index_1 on hash_f8_heap(abs(random)); create unique index hash_f8_index_2 on hash_f8_heap((seqno + 1), random); create unique index hash_f8_index_3 on hash_f8_heap(random) where seqno > 1000; +-- +-- Try some concurrent index builds +-- +-- Unfortunately this only tests about half the code paths because there are +-- no concurrent updates happening to the table at the same time. +CREATE TABLE concur_heap (f1 text, f2 text); +-- empty table +CREATE INDEX CONCURRENTLY concur_index1 ON concur_heap(f2,f1); +INSERT INTO concur_heap VALUES ('a','b'); +INSERT INTO concur_heap VALUES ('b','b'); +-- unique index +CREATE UNIQUE INDEX CONCURRENTLY concur_index2 ON concur_heap(f1); +-- check if constraint is set up properly to be enforced +INSERT INTO concur_heap VALUES ('b','x'); +ERROR: duplicate key violates unique constraint "concur_index2" +-- check if constraint is enforced properly at build time +CREATE UNIQUE INDEX CONCURRENTLY concur_index3 ON concur_heap(f2); +ERROR: could not create unique index +DETAIL: Table contains duplicated values. +-- test that expression indexes and partial indexes work concurrently +CREATE INDEX CONCURRENTLY concur_index4 on concur_heap(f2) WHERE f1='a'; +CREATE INDEX CONCURRENTLY concur_index5 on concur_heap(f2) WHERE f1='x'; +CREATE INDEX CONCURRENTLY concur_index6 on concur_heap((f2||f1)); +-- You can't do a concurrent index build in a transaction +BEGIN; +CREATE INDEX CONCURRENTLY concur_index7 ON concur_heap(f1); +ERROR: CREATE INDEX CONCURRENTLY cannot run inside a transaction block +COMMIT; +-- But you can do a regular index build in a transaction +BEGIN; +CREATE INDEX std_index on concur_heap(f2); +COMMIT; +-- check to make sure that the failed indexes were cleaned up properly and the +-- successful indexes are created properly. Notably that they do NOT have the +-- "invalid" flag set. +\d concur_heap +Table "public.concur_heap" + Column | Type | Modifiers +--------+------+----------- + f1 | text | + f2 | text | +Indexes: + "concur_index2" UNIQUE, btree (f1) + "concur_index1" btree (f2, f1) + "concur_index4" btree (f2) WHERE f1 = 'a'::text + "concur_index5" btree (f2) WHERE f1 = 'x'::text + "concur_index6" btree ((f2 || f1)) + "std_index" btree (f2) + +DROP TABLE concur_heap; diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index 37df3ee2ca..33211e967b 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -219,3 +219,43 @@ INSERT INTO func_index_heap VALUES('QWERTY'); create unique index hash_f8_index_1 on hash_f8_heap(abs(random)); create unique index hash_f8_index_2 on hash_f8_heap((seqno + 1), random); create unique index hash_f8_index_3 on hash_f8_heap(random) where seqno > 1000; + +-- +-- Try some concurrent index builds +-- +-- Unfortunately this only tests about half the code paths because there are +-- no concurrent updates happening to the table at the same time. + +CREATE TABLE concur_heap (f1 text, f2 text); +-- empty table +CREATE INDEX CONCURRENTLY concur_index1 ON concur_heap(f2,f1); +INSERT INTO concur_heap VALUES ('a','b'); +INSERT INTO concur_heap VALUES ('b','b'); +-- unique index +CREATE UNIQUE INDEX CONCURRENTLY concur_index2 ON concur_heap(f1); +-- check if constraint is set up properly to be enforced +INSERT INTO concur_heap VALUES ('b','x'); +-- check if constraint is enforced properly at build time +CREATE UNIQUE INDEX CONCURRENTLY concur_index3 ON concur_heap(f2); +-- test that expression indexes and partial indexes work concurrently +CREATE INDEX CONCURRENTLY concur_index4 on concur_heap(f2) WHERE f1='a'; +CREATE INDEX CONCURRENTLY concur_index5 on concur_heap(f2) WHERE f1='x'; +CREATE INDEX CONCURRENTLY concur_index6 on concur_heap((f2||f1)); + +-- You can't do a concurrent index build in a transaction +BEGIN; +CREATE INDEX CONCURRENTLY concur_index7 ON concur_heap(f1); +COMMIT; + +-- But you can do a regular index build in a transaction +BEGIN; +CREATE INDEX std_index on concur_heap(f2); +COMMIT; + +-- check to make sure that the failed indexes were cleaned up properly and the +-- successful indexes are created properly. Notably that they do NOT have the +-- "invalid" flag set. + +\d concur_heap + +DROP TABLE concur_heap;