diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 0535fd6278..bc3338b115 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -6,7 +6,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.45 1999/09/18 19:05:46 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.46 1999/09/24 00:23:42 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -141,7 +141,7 @@ gistbuild(Relation heap, tupleTable = ExecCreateTupleTable(1); slot = ExecAllocTableSlot(tupleTable); econtext = makeNode(ExprContext); - FillDummyExprContext(econtext, slot, hd, buffer); + FillDummyExprContext(econtext, slot, hd, InvalidBuffer); } else /* shut the compiler up */ diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 6e729008e8..ca7c14d30d 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.31 1999/09/18 19:05:52 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.32 1999/09/24 00:23:48 tgl Exp $ * * NOTES * This file contains only the public interface routines. @@ -58,7 +58,6 @@ hashbuild(Relation heap, nitups; int i; HashItem hitem; - Buffer buffer = InvalidBuffer; #ifndef OMIT_PARTIAL_INDEX ExprContext *econtext; @@ -101,7 +100,7 @@ hashbuild(Relation heap, tupleTable = ExecCreateTupleTable(1); slot = ExecAllocTableSlot(tupleTable); econtext = makeNode(ExprContext); - FillDummyExprContext(econtext, slot, htupdesc, buffer); + FillDummyExprContext(econtext, slot, htupdesc, InvalidBuffer); } else /* quiet the compiler */ diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 092a23cc0e..4c05492194 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.54 1999/09/18 19:05:58 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.55 1999/09/24 00:23:54 tgl Exp $ * * * INTERFACE ROUTINES @@ -160,7 +160,7 @@ unpinscan(HeapScanDesc scan) ReleaseBuffer(scan->rs_pbuf); /* ------------------------------------ - * Scan will pin buffer one for each non-NULL tuple pointer + * Scan will pin buffer once for each non-NULL tuple pointer * (ptup, ctup, ntup), so they have to be unpinned multiple * times. * ------------------------------------ @@ -170,6 +170,10 @@ unpinscan(HeapScanDesc scan) if (BufferIsValid(scan->rs_nbuf)) ReleaseBuffer(scan->rs_nbuf); + + /* we don't bother to clear rs_pbuf etc --- caller must + * reinitialize them if scan descriptor is not being deleted. + */ } /* ------------------------------------------ @@ -826,6 +830,8 @@ heap_getnext(HeapScanDesc scandesc, int backw) { if (BufferIsValid(scan->rs_nbuf)) ReleaseBuffer(scan->rs_nbuf); + scan->rs_ntup.t_data = NULL; + scan->rs_nbuf = UnknownBuffer; return NULL; } @@ -906,6 +912,8 @@ heap_getnext(HeapScanDesc scandesc, int backw) { if (BufferIsValid(scan->rs_pbuf)) ReleaseBuffer(scan->rs_pbuf); + scan->rs_ptup.t_data = NULL; + scan->rs_pbuf = UnknownBuffer; HEAPDEBUG_3; /* heap_getnext returns NULL at end */ return NULL; } @@ -1014,8 +1022,6 @@ heap_fetch(Relation relation, ItemPointer tid = &(tuple->t_self); OffsetNumber offnum; - AssertMacro(PointerIsValid(userbuf)); /* see comments above */ - /* ---------------- * increment access statistics * ---------------- @@ -1067,21 +1073,17 @@ heap_fetch(Relation relation, if (tuple->t_data == NULL) { + /* Tuple failed time check, so we can release now. */ ReleaseBuffer(buffer); - return; + *userbuf = InvalidBuffer; + } + else + { + /* All checks passed, so return the tuple as valid. + * Caller is now responsible for releasing the buffer. + */ + *userbuf = buffer; } - - /* ---------------- - * all checks passed, now either return a copy of the tuple - * or pin the buffer page and return a pointer, depending on - * whether caller gave us a valid buf. - * ---------------- - */ - - *userbuf = buffer; /* user is required to ReleaseBuffer() - * this */ - - return; } /* ---------------- diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c index 133bbdbc03..ee36b41889 100644 --- a/src/backend/access/rtree/rtree.c +++ b/src/backend/access/rtree/rtree.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.37 1999/09/18 19:06:16 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.38 1999/09/24 00:23:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -135,7 +135,7 @@ rtbuild(Relation heap, tupleTable = ExecCreateTupleTable(1); slot = ExecAllocTableSlot(tupleTable); econtext = makeNode(ExprContext); - FillDummyExprContext(econtext, slot, hd, buffer); + FillDummyExprContext(econtext, slot, hd, InvalidBuffer); } else { diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 1c5dae27be..b3bf5cd973 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.52 1999/09/16 09:08:56 ishii Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.53 1999/09/24 00:24:05 tgl Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -151,6 +151,7 @@ #include "commands/vacuum.h" #include "libpq/be-fsstubs.h" #include "storage/proc.h" +#include "storage/sinval.h" #include "utils/temprel.h" #include "utils/inval.h" #include "utils/portal.h" @@ -749,8 +750,8 @@ RecordTransactionAbort() static void AtAbort_Cache() { - RegisterInvalid(false); RelationCacheAbort(); + RegisterInvalid(false); } /* -------------------------------- @@ -929,7 +930,7 @@ CommitTransaction() /* * Let others know about no transaction in progress by me. * Note that this must be done _before_ releasing locks we hold - * and SpinAcquire(ShmemIndexLock) is required: UPDATE with xid 0 is + * and SpinAcquire(SInvalLock) is required: UPDATE with xid 0 is * blocked by xid 1' UPDATE, xid 1 is doing commit while xid 2 * gets snapshot - if xid 2' GetSnapshotData sees xid 1 as running * then it must see xid 0 as running as well or it will see two @@ -937,10 +938,11 @@ CommitTransaction() */ if (MyProc != (PROC *) NULL) { - SpinAcquire(ShmemIndexLock); + /* Lock SInvalLock because that's what GetSnapshotData uses. */ + SpinAcquire(SInvalLock); MyProc->xid = InvalidTransactionId; MyProc->xmin = InvalidTransactionId; - SpinRelease(ShmemIndexLock); + SpinRelease(SInvalLock); } RelationPurgeLocalRelation(true); diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index e225a5834d..f1051cb784 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.97 1999/09/23 17:02:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.98 1999/09/24 00:24:11 tgl Exp $ * * * INTERFACE ROUTINES @@ -1065,224 +1065,189 @@ DeleteRelationTuple(Relation rel) * The routine will truncate and then reconstruct the indices on * the relation specified by the heapRelation parameter. * -------------------------------- -*/ + */ +static void +RelationTruncateIndexes(Relation heapRelation) +{ + Relation indexRelation, currentIndex; + ScanKeyData entry; + HeapScanDesc scan; + HeapTuple indexTuple, procTuple, classTuple; + Form_pg_index index; + Oid heapId, indexId, procId, accessMethodId; + Node *oldPred = NULL; + PredInfo *predInfo; + List *cnfPred = NULL; + AttrNumber *attributeNumberA; + FuncIndexInfo fInfo, *funcInfo = NULL; + int i, numberOfAttributes; + char *predString; -static void -RelationTruncateIndexes(Relation heapRelation) { + heapId = RelationGetRelid(heapRelation); - Relation indexRelation, currentIndex; - ScanKeyData entry; - HeapScanDesc scan; - HeapTuple indexTuple, procTuple, classTuple; - Form_pg_index index; - Oid heapId, indexId, procId, accessMethodId; - Node *oldPred = NULL; - PredInfo *predInfo; - List *cnfPred = NULL; - AttrNumber *attributeNumberA; - FuncIndexInfo fInfo, *funcInfo = NULL; - int i, numberOfAttributes; - char *predString; + /* Scan pg_index to find indexes on heapRelation */ - /*** Save the id of the heap relation ***/ + indexRelation = heap_openr(IndexRelationName, AccessShareLock); + ScanKeyEntryInitialize(&entry, 0, Anum_pg_index_indrelid, F_OIDEQ, + ObjectIdGetDatum(heapId)); + scan = heap_beginscan(indexRelation, false, SnapshotNow, 1, &entry); + while (HeapTupleIsValid(indexTuple = heap_getnext(scan, 0))) + { + /* + * For each index, fetch index attributes so we can apply index_build + */ + index = (Form_pg_index) GETSTRUCT(indexTuple); + indexId = index->indexrelid; + procId = index->indproc; - heapId = RelationGetRelid(heapRelation); - - /*** Open the System relation, pg_index ***/ + for (i = 0; i < INDEX_MAX_KEYS; i++) + { + if (index->indkey[i] == InvalidAttrNumber) + break; + } + numberOfAttributes = i; - indexRelation = heap_openr(IndexRelationName); - - /*** Scan pg_index For indexes related to heap relation ***/ + /* If a valid where predicate, compute predicate Node */ + if (VARSIZE(&index->indpred) != 0) + { + predString = fmgr(F_TEXTOUT, &index->indpred); + oldPred = stringToNode(predString); + pfree(predString); + } + predInfo = (PredInfo *) palloc(sizeof(PredInfo)); + predInfo->pred = (Node *) cnfPred; + predInfo->oldPred = oldPred; - ScanKeyEntryInitialize(&entry, 0x0, Anum_pg_index_indrelid, F_OIDEQ, - ObjectIdGetDatum(heapId)); + /* Assign Index keys to attributes array */ + attributeNumberA = (AttrNumber *) palloc(numberOfAttributes * + sizeof(AttrNumber)); + for (i = 0; i < numberOfAttributes; i++) + attributeNumberA[i] = index->indkey[i]; - scan = heap_beginscan(indexRelation, false, SnapshotNow, 1, &entry); - while (HeapTupleIsValid(indexTuple = heap_getnext(scan, 0))) { - - /*** For each index, fetch index attributes ***/ + /* If this is a procedural index, initialize our FuncIndexInfo */ + if (procId != InvalidOid) + { + funcInfo = &fInfo; + FIsetnArgs(funcInfo, numberOfAttributes); + procTuple = SearchSysCacheTuple(PROOID, ObjectIdGetDatum(procId), + 0, 0, 0); + if (!HeapTupleIsValid(procTuple)) + elog(ERROR, "RelationTruncateIndexes: index procedure not found"); + namecpy(&(funcInfo->funcName), + &(((Form_pg_proc) GETSTRUCT(procTuple))->proname)); + FIsetProcOid(funcInfo, procTuple->t_data->t_oid); + } - index = (Form_pg_index) GETSTRUCT(indexTuple); - indexId = index->indexrelid; - procId = index->indproc; - - for (i = 0; i < INDEX_MAX_KEYS; i++) { - if (index->indkey[i] == InvalidAttrNumber) break; - } - numberOfAttributes = i; - - /*** If a valid where predicate, compute predicate Node ***/ + /* Fetch the classTuple associated with this index */ + classTuple = SearchSysCacheTupleCopy(RELOID, ObjectIdGetDatum(indexId), + 0, 0, 0); + if (!HeapTupleIsValid(classTuple)) + elog(ERROR, "RelationTruncateIndexes: index access method not found"); + accessMethodId = ((Form_pg_class) GETSTRUCT(classTuple))->relam; - if (VARSIZE(&index->indpred) != 0) { - predString = fmgr(F_TEXTOUT, &index->indpred); - oldPred = stringToNode(predString); - pfree(predString); - } - - predInfo = (PredInfo *) palloc(sizeof(PredInfo)); - predInfo->pred = (Node *) cnfPred; - /* predInfo->pred = (Node *) oldPred; */ - predInfo->oldPred = oldPred; + /* Open our index relation */ + currentIndex = index_open(indexId); + if (currentIndex == NULL) + elog(ERROR, "RelationTruncateIndexes: can't open index relation"); - /*** Assign Index keys to attributes array ***/ + /* Obtain exclusive lock on it, just to be sure */ + LockRelation(currentIndex, AccessExclusiveLock); - attributeNumberA = (AttrNumber *) palloc(numberOfAttributes * - sizeof(attributeNumberA[0])); - for (i = 0; i < numberOfAttributes; i++) { - attributeNumberA[i] = index->indkey[i]; - } - - /*** If this is a procedural index, initialize our FuncIndexInfo ***/ + /* + * Release any buffers associated with this index. If they're dirty, + * they're just dropped without bothering to flush to disk. + */ + ReleaseRelationBuffers(currentIndex); + if (FlushRelationBuffers(currentIndex, (BlockNumber) 0, false) < 0) + elog(ERROR, "RelationTruncateIndexes: unable to flush index from buffer pool"); - if (procId != InvalidOid) { - funcInfo = &fInfo; - FIsetnArgs(funcInfo, numberOfAttributes); - procTuple = SearchSysCacheTuple(PROOID, ObjectIdGetDatum(procId), - 0, 0, 0); - if (!HeapTupleIsValid(procTuple)) { - elog(ERROR, "RelationTruncateIndexes: index procedure not found"); - } - namecpy(&(funcInfo->funcName), - &(((Form_pg_proc) GETSTRUCT(procTuple))->proname)); - FIsetProcOid(funcInfo, procTuple->t_data->t_oid); - } + /* Now truncate the actual data and set blocks to zero */ + smgrtruncate(DEFAULT_SMGR, currentIndex, 0); + currentIndex->rd_nblocks = 0; - /*** Fetch the classTuple associated with this index ***/ - - classTuple = SearchSysCacheTupleCopy(RELOID, ObjectIdGetDatum(indexId), - 0, 0, 0); - if (!HeapTupleIsValid(classTuple)) { - elog(ERROR, "RelationTruncateIndexes: index access method not found"); - } - accessMethodId = ((Form_pg_class) GETSTRUCT(classTuple))->relam; + /* Initialize the index and rebuild */ + InitIndexStrategy(numberOfAttributes, currentIndex, accessMethodId); + index_build(heapRelation, currentIndex, numberOfAttributes, + attributeNumberA, 0, NULL, funcInfo, predInfo); - /*** Open our index relation ***/ - - currentIndex = index_open(indexId); - if (currentIndex == NULL) { - elog(ERROR, "RelationTruncateIndexes: can't open index relation"); - } - - /*** Truncate the index before building ***/ - - smgrtruncate(DEFAULT_SMGR, currentIndex, 0); - currentIndex->rd_nblocks = 0; - - /*** Initialize the index and rebuild ***/ - - InitIndexStrategy(numberOfAttributes, currentIndex, accessMethodId); - index_build(heapRelation, currentIndex, numberOfAttributes, - attributeNumberA, 0, NULL, funcInfo, predInfo); - - /*** Re-open our heap relation and re-lock, since index_build ***/ - /*** will close and unlock the relation ***/ - - heapRelation = heap_open(heapId); - LockRelation(heapRelation, AccessExclusiveLock); - - /*** RelationUnsetLockForWrite(currentIndex); ***/ - - } - - /*** Complete the scan and close the Catalogueindex Relation ***/ - - heap_endscan(scan); - heap_close(indexRelation); + /* + * index_build will close both the heap and index relations + * (but not give up the locks we hold on them). That's fine + * for the index, but we need to open the heap again. We need + * no new lock, since this backend still has the exclusive lock + * grabbed by heap_truncate. + */ + heapRelation = heap_open(heapId, NoLock); + Assert(heapRelation != NULL); + } + /* Complete the scan and close pg_index */ + heap_endscan(scan); + heap_close(indexRelation, AccessShareLock); } /* ---------------------------- * heap_truncate - * - * This routine is used to truncate the data from the - * storange manager of any data within the relation handed - * to this routine. The routine assumes that the relation - * handed to this routine is an open relation. * + * This routine is used to truncate the data from the + * storage manager of any data within the relation handed + * to this routine. * ---------------------------- */ -void -heap_truncate(char *relname) { - - Relation rel; - Oid rid; - Portal portal; - char *pname; - MemoryContext old; - PortalVariableMemory pmem; - NameData truncRel; +void +heap_truncate(char *relname) +{ + Relation rel; + Oid rid; - /* - * Create a portal for safe memory across transctions. We need to - * palloc the name space for it because our hash function expects the - * name to be on a longword boundary. CreatePortal copies the name to - * safe storage for us. - */ - - pname = (char *) palloc(strlen(TRUNCPNAME) + 1); - strcpy(pname, TRUNCPNAME); - portal = CreatePortal(pname); - pfree(pname); + /* Open relation for processing, and grab exclusive access on it. */ - /* relname gets de-allocated on transaction commit */ - - strcpy(truncRel.data, relname); - - pmem = PortalGetVariableMemory(portal); - old = MemoryContextSwitchTo((MemoryContext) pmem); - MemoryContextSwitchTo(old); - - /* Commit the current transaction */ - - CommitTransactionCommand(); - StartTransactionCommand(); - - /* Open relation for processing */ + rel = heap_openr(relname, AccessExclusiveLock); + rid = rel->rd_id; - rel = heap_openr(truncRel.data); - if (rel == NULL) - elog(ERROR, "Relation %s Does Not Exist!", truncRel.data); - rid = rel->rd_id; + /* ---------------- + * TRUNCATE TABLE within a transaction block is dangerous, because + * if the transaction is later rolled back we have no way to + * undo truncation of the relation's physical file. For now, allow it + * but emit a warning message. + * Someday we might want to consider postponing the physical truncate + * until transaction commit, but that's a lot of work... + * The only case that actually works right is for relations created + * in the current transaction, since the post-abort state would be that + * they don't exist anyway. So, no warning in that case. + * ---------------- + */ + if (IsTransactionBlock() && ! rel->rd_myxactonly) + elog(NOTICE, "Caution: TRUNCATE TABLE cannot be rolled back, so don't abort now"); - LockRelation(rel, AccessExclusiveLock); + /* + * Release any buffers associated with this relation. If they're dirty, + * they're just dropped without bothering to flush to disk. + */ - /* Release any buffers associated with this relation */ + ReleaseRelationBuffers(rel); + if (FlushRelationBuffers(rel, (BlockNumber) 0, false) < 0) + elog(ERROR, "heap_truncate: unable to flush relation from buffer pool"); - ReleaseRelationBuffers(rel); - BlowawayRelationBuffers(rel, 0); + /* Now truncate the actual data and set blocks to zero */ - /* Now truncate the actual data and set blocks to zero */ - - smgrtruncate(DEFAULT_SMGR, rel, 0); - rel->rd_nblocks = 0; + smgrtruncate(DEFAULT_SMGR, rel, 0); + rel->rd_nblocks = 0; - /* If this relation has indexes, truncate the indexes, which */ - /* will unlock the relation as a result. Otherwise, unlock */ - /* the relation ourselves. */ - - if (rel->rd_rel->relhasindex) { - RelationTruncateIndexes(rel); - } else { - UnlockRelation(rel, AccessExclusiveLock); - } + /* If this relation has indexes, truncate the indexes too */ + if (rel->rd_rel->relhasindex) + RelationTruncateIndexes(rel); - /* Close our relation */ - - heap_close(rel); - RelationForgetRelation(rid); - - /* Destoy cross-transaction memory */ - - PortalDestroy(&portal); - - /* Start new transaction */ - - CommitTransactionCommand(); - StartTransactionCommand(); - - return; + /* + * Close the relation, but keep exclusive lock on it until commit. + */ + heap_close(rel, NoLock); + /* + * Is this really necessary? + */ + RelationForgetRelation(rid); } @@ -1468,15 +1433,19 @@ heap_destroy_with_catalog(char *relname) &rel->rd_rel->relname); /* ---------------- - * We do not allow DROP TABLE within a transaction block, because - * if the transaction is later rolled back there would be no way to - * undo the unlink of the relation's physical file. The sole exception - * is for relations created in the current transaction, since the post- - * abort state would be that they don't exist anyway. + * DROP TABLE within a transaction block is dangerous, because + * if the transaction is later rolled back there will be no way to + * undo the unlink of the relation's physical file. For now, allow it + * but emit a warning message. + * Someday we might want to consider postponing the physical unlink + * until transaction commit, but that's a lot of work... + * The only case that actually works right is for relations created + * in the current transaction, since the post-abort state would be that + * they don't exist anyway. So, no warning in that case. * ---------------- */ if (IsTransactionBlock() && ! rel->rd_myxactonly) - elog(ERROR, "Cannot destroy relation within a transaction block"); + elog(NOTICE, "Caution: DROP TABLE cannot be rolled back, so don't abort now"); /* ---------------- * remove inheritance information diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 912996fb1f..f8c4dac95e 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.90 1999/09/18 19:06:33 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.91 1999/09/24 00:24:11 tgl Exp $ * * * INTERFACE ROUTINES @@ -1113,15 +1113,19 @@ index_destroy(Oid indexId) LockRelation(userindexRelation, AccessExclusiveLock); /* ---------------- - * We do not allow DROP INDEX within a transaction block, because - * if the transaction is later rolled back there would be no way to - * undo the unlink of the relation's physical file. The sole exception - * is for relations created in the current transaction, since the post- - * abort state would be that they don't exist anyway. + * DROP INDEX within a transaction block is dangerous, because + * if the transaction is later rolled back there will be no way to + * undo the unlink of the relation's physical file. For now, allow it + * but emit a warning message. + * Someday we might want to consider postponing the physical unlink + * until transaction commit, but that's a lot of work... + * The only case that actually works right is for relations created + * in the current transaction, since the post-abort state would be that + * they don't exist anyway. So, no warning in that case. * ---------------- */ if (IsTransactionBlock() && ! userindexRelation->rd_myxactonly) - elog(ERROR, "Cannot destroy index within a transaction block"); + elog(NOTICE, "Caution: DROP INDEX cannot be rolled back, so don't abort now"); /* ---------------- * fix RELATION relation @@ -1370,7 +1374,7 @@ UpdateStats(Oid relid, long reltuples, bool hasindex) rd_rel->relpages = relpages; rd_rel->reltuples = reltuples; rd_rel->relhasindex = hasindex; - WriteBuffer(pg_class_scan->rs_cbuf); + WriteNoReleaseBuffer(pg_class_scan->rs_cbuf); } else { @@ -1413,6 +1417,9 @@ UpdateStats(Oid relid, long reltuples, bool hasindex) * FillDummyExprContext * Sets up dummy ExprContext and TupleTableSlot objects for use * with ExecQual. + * + * NOTE: buffer is passed for historical reasons; it should + * almost certainly always be InvalidBuffer. * ------------------------- */ void @@ -1508,7 +1515,6 @@ DefaultBuild(Relation heapRelation, tupleTable = ExecCreateTupleTable(1); slot = ExecAllocTableSlot(tupleTable); econtext = makeNode(ExprContext); - /* last parameter was junk being sent bjm 1998/08/17 */ FillDummyExprContext(econtext, slot, heapDescriptor, InvalidBuffer); } else @@ -1605,7 +1611,8 @@ DefaultBuild(Relation heapRelation, #ifndef OMIT_PARTIAL_INDEX if (predicate != NULL || oldPred != NULL) { - ExecDestroyTupleTable(tupleTable, false); + /* parameter was 'false', almost certainly wrong --- tgl 9/21/99 */ + ExecDestroyTupleTable(tupleTable, true); } #endif /* OMIT_PARTIAL_INDEX */ diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 24eb5b531d..b243dd173b 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/dbcommands.c,v 1.40 1999/09/18 19:06:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/dbcommands.c,v 1.41 1999/09/24 00:24:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,6 +22,7 @@ #include "catalog/pg_shadow.h" #include "commands/dbcommands.h" #include "miscadmin.h" +#include "storage/sinval.h" #include "tcop/tcopprot.h" #include "utils/syscache.h" @@ -89,7 +90,11 @@ destroydb(char *dbname, CommandDest dest) Oid db_id; char *path, dbpath[MAXPGPATH + 1], - buf[512]; + buf[MAXPGPATH + 50]; + Relation pgdbrel; + HeapScanDesc pgdbscan; + ScanKeyData key; + HeapTuple tup; /* * If this call returns, the database exists and we're allowed to @@ -97,36 +102,79 @@ destroydb(char *dbname, CommandDest dest) */ check_permissions("destroydb", dbpath, dbname, &db_id, &user_id); + /* do as much checking as we can... */ if (!OidIsValid(db_id)) elog(FATAL, "pg_database instance has an invalid OID"); - /* stop the vacuum daemon */ - stop_vacuum(dbpath, dbname); - - /* XXX what about stopping backends connected to the target database? */ - path = ExpandDatabasePath(dbpath); if (path == NULL) elog(ERROR, "Unable to locate path '%s'" "\n\tThis may be due to a missing environment variable" " in the server", dbpath); - /* - * remove the pg_database tuple FIRST, this may fail due to - * permissions problems - */ - snprintf(buf, 512, - "delete from pg_database where pg_database.oid = \'%u\'::oid", db_id); - pg_exec_query_dest(buf, dest, false); + /* stop the vacuum daemon (dead code...) */ + stop_vacuum(dbpath, dbname); - /* drop pages for this database that are in the shared buffer cache */ + /* + * Obtain exclusive lock on pg_database. We need this to ensure + * that no new backend starts up in the target database while we + * are deleting it. (Actually, a new backend might still manage to + * start up, because it will read pg_database without any locking + * to discover the database's OID. But it will detect its error + * in ReverifyMyDatabase and shut down before any serious damage + * is done. See postinit.c.) + */ + pgdbrel = heap_openr(DatabaseRelationName, AccessExclusiveLock); + + /* + * Check for active backends in the target database. + */ + if (DatabaseHasActiveBackends(db_id)) + elog(ERROR, "Database '%s' has running backends, can't destroy it", + dbname); + + /* + * Find the database's tuple by OID (should be unique, we trust). + */ + ScanKeyEntryInitialize(&key, 0, ObjectIdAttributeNumber, + F_OIDEQ, ObjectIdGetDatum(db_id)); + + pgdbscan = heap_beginscan(pgdbrel, 0, SnapshotNow, 1, &key); + + tup = heap_getnext(pgdbscan, 0); + if (!HeapTupleIsValid(tup)) + { + heap_close(pgdbrel, AccessExclusiveLock); + elog(ERROR, "Database '%s', OID %u, not found in pg_database", + dbname, db_id); + } + + /* + * Houston, we have launch commit... + * + * Remove the database's tuple from pg_database. + */ + heap_delete(pgdbrel, &tup->t_self, NULL); + + heap_endscan(pgdbscan); + + /* + * Close pg_database, but keep exclusive lock till commit to ensure + * that any new backend scanning pg_database will see the tuple dead. + */ + heap_close(pgdbrel, NoLock); + + /* + * Drop pages for this database that are in the shared buffer cache. + * This is important to ensure that no remaining backend tries to + * write out a dirty buffer to the dead database later... + */ DropBuffers(db_id); /* - * remove the data directory. If the DELETE above failed, this will - * not be reached + * Remove the database's subdirectory and everything in it. */ - snprintf(buf, 512, "rm -r %s", path); + snprintf(buf, sizeof(buf), "rm -r '%s'", path); system(buf); } @@ -274,22 +322,28 @@ check_permissions(char *command, } /* check_permissions() */ /* - * stop_vacuum() -- stop the vacuum daemon on the database, if one is running. + * stop_vacuum -- stop the vacuum daemon on the database, if one is running. + * + * This is currently dead code, since we don't *have* vacuum daemons. + * If you want to re-enable it, think about the interlock against deleting + * a database out from under running backends, in destroydb() above. */ static void stop_vacuum(char *dbpath, char *dbname) { - char filename[256]; +#ifdef NOT_USED + char filename[MAXPGPATH + 1]; FILE *fp; int pid; if (strchr(dbpath, SEP_CHAR) != 0) { - snprintf(filename, 256, "%s%cbase%c%s%c%s.vacuum", + snprintf(filename, sizeof(filename), "%s%cbase%c%s%c%s.vacuum", DataDir, SEP_CHAR, SEP_CHAR, dbname, SEP_CHAR, dbname); } else - snprintf(filename, 256, "%s%c%s.vacuum", dbpath, SEP_CHAR, dbname); + snprintf(filename, sizeof(filename), "%s%c%s.vacuum", + dbpath, SEP_CHAR, dbname); #ifndef __CYGWIN32__ if ((fp = AllocateFile(filename, "r")) != NULL) @@ -305,4 +359,5 @@ stop_vacuum(char *dbpath, char *dbname) pid, dbname); } } +#endif } diff --git a/src/backend/commands/rename.c b/src/backend/commands/rename.c index 3a822bd4e4..0a72ba497e 100644 --- a/src/backend/commands/rename.c +++ b/src/backend/commands/rename.c @@ -7,12 +7,14 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/Attic/rename.c,v 1.33 1999/09/18 19:06:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/Attic/rename.c,v 1.34 1999/09/24 00:24:17 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include + #include "access/heapam.h" #include "catalog/catname.h" #include "utils/syscache.h" @@ -21,6 +23,7 @@ #include "catalog/catalog.h" #include "commands/rename.h" #include "miscadmin.h" +#include "storage/smgr.h" #include "optimizer/prep.h" #include "utils/acl.h" @@ -166,19 +169,6 @@ renameatt(char *relname, /* * renamerel - change the name of a relation - * - * Relname attribute is changed in relation catalog. - * No record of the previous relname is kept (correct?). - * - * scan relation catalog - * for name conflict - * for original relation (if not arg) - * modify relname in relation tuple - * insert modified relation in relation catalog - * delete original relation from relation catalog - * - * XXX Will currently lose track of a relation if it is unable to - * properly replace the new relation tuple. */ void renamerel(char *oldrelname, char *newrelname) @@ -206,8 +196,55 @@ renamerel(char *oldrelname, char *newrelname) * until end of transaction. */ targetrelation = heap_openr(oldrelname, AccessExclusiveLock); - heap_close(targetrelation, NoLock); /* close rel but keep lock! */ + /* ---------------- + * RENAME TABLE within a transaction block is dangerous, because + * if the transaction is later rolled back we have no way to + * undo the rename of the relation's physical file. For now, allow it + * but emit a warning message. + * Someday we might want to consider postponing the physical rename + * until transaction commit, but that's a lot of work... + * The only case that actually works right is for relations created + * in the current transaction, since the post-abort state would be that + * they don't exist anyway. So, no warning in that case. + * ---------------- + */ + if (IsTransactionBlock() && ! targetrelation->rd_myxactonly) + elog(NOTICE, "Caution: RENAME TABLE cannot be rolled back, so don't abort now"); + + /* + * Flush all blocks of the relation out of the buffer pool. We need this + * because the blocks are marked with the relation's name as well as OID. + * If some backend tries to write a dirty buffer with mdblindwrt after + * we've renamed the physical file, we'll be in big trouble. + * + * Since we hold the exclusive lock on the relation, we don't have to + * worry about more blocks being read in while we finish the rename. + */ + if (FlushRelationBuffers(targetrelation, (BlockNumber) 0, true) < 0) + elog(ERROR, "renamerel: unable to flush relation from buffer pool"); + + /* + * Make sure smgr and lower levels close the relation's files. + * (Next access to rel will reopen them.) + * + * Note: we rely on shared cache invalidation message to make other + * backends close and re-open the files. + */ + smgrclose(DEFAULT_SMGR, targetrelation); + + /* + * Close rel, but keep exclusive lock! + * + * Note: we don't do anything about updating the relcache entry; + * we assume it will be flushed by shared cache invalidate. + * XXX is this good enough? What if relation is myxactonly? + */ + heap_close(targetrelation, NoLock); + + /* + * Find relation's pg_class tuple, and make sure newrelname isn't in use. + */ relrelation = heap_openr(RelationRelationName, RowExclusiveLock); oldreltup = SearchSysCacheTupleCopy(RELNAME, @@ -220,14 +257,17 @@ renamerel(char *oldrelname, char *newrelname) elog(ERROR, "renamerel: relation \"%s\" exists", newrelname); /* - * XXX need to close relation and flush dirty buffers here! + * Perform physical rename of files. If this fails, we haven't yet + * done anything irreversible. + * + * XXX smgr.c ought to provide an interface for this; doing it + * directly is bletcherous. */ - - /* rename the path first, so if this fails the rename's not done */ strcpy(oldpath, relpath(oldrelname)); strcpy(newpath, relpath(newrelname)); if (rename(oldpath, newpath) < 0) - elog(ERROR, "renamerel: unable to rename file: %s", oldpath); + elog(ERROR, "renamerel: unable to rename %s to %s: %m", + oldpath, newpath); /* rename additional segments of relation, too */ for (i = 1;; i++) @@ -235,13 +275,22 @@ renamerel(char *oldrelname, char *newrelname) sprintf(toldpath, "%s.%d", oldpath, i); sprintf(tnewpath, "%s.%d", newpath, i); if (rename(toldpath, tnewpath) < 0) - break; + { + /* expected case is that there's not another segment file */ + if (errno == ENOENT) + break; + /* otherwise we're up the creek... */ + elog(ERROR, "renamerel: unable to rename %s to %s: %m", + toldpath, tnewpath); + } } + /* + * Update pg_class tuple with new relname. + */ StrNCpy((((Form_pg_class) GETSTRUCT(oldreltup))->relname.data), newrelname, NAMEDATALEN); - /* insert fixed rel tuple */ heap_replace(relrelation, &oldreltup->t_self, oldreltup, NULL); /* keep the system catalog indices current */ diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 3027763b46..e5cf7b0c88 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.120 1999/09/18 19:06:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.121 1999/09/24 00:24:17 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,6 +30,7 @@ #include "commands/vacuum.h" #include "miscadmin.h" #include "parser/parse_oper.h" +#include "storage/sinval.h" #include "storage/smgr.h" #include "utils/builtins.h" #include "utils/inval.h" @@ -46,8 +47,6 @@ /* #include *//* Why? */ -extern int BlowawayRelationBuffers(Relation rel, BlockNumber block); - bool VacuumRunning = false; static Portal vc_portal; @@ -1838,9 +1837,9 @@ Elapsed %u/%u sec.", /* truncate relation */ if (blkno < nblocks) { - i = BlowawayRelationBuffers(onerel, blkno); + i = FlushRelationBuffers(onerel, blkno, false); if (i < 0) - elog(FATAL, "VACUUM (vc_rpfheap): BlowawayRelationBuffers returned %d", i); + elog(FATAL, "VACUUM (vc_rpfheap): FlushRelationBuffers returned %d", i); blkno = smgrtruncate(DEFAULT_SMGR, onerel, blkno); Assert(blkno >= 0); vacrelstats->num_pages = blkno; /* set new number of blocks */ @@ -1902,12 +1901,14 @@ vc_vacheap(VRelStats *vacrelstats, Relation onerel, VPageList vacuum_pages) /* * we have to flush "empty" end-pages (if changed, but who knows * it) before truncation + * + * XXX wouldn't passing 'true' to FlushRelationBuffers do the job? */ FlushBufferPool(!TransactionFlushEnabled()); - i = BlowawayRelationBuffers(onerel, nblocks); + i = FlushRelationBuffers(onerel, nblocks, false); if (i < 0) - elog(FATAL, "VACUUM (vc_vacheap): BlowawayRelationBuffers returned %d", i); + elog(FATAL, "VACUUM (vc_vacheap): FlushRelationBuffers returned %d", i); nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks); Assert(nblocks >= 0); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 97dffe548f..f07f8777a2 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -26,7 +26,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execMain.c,v 1.94 1999/09/18 19:06:47 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execMain.c,v 1.95 1999/09/24 00:24:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -130,16 +130,6 @@ ExecutorStart(QueryDesc *queryDesc, EState *estate) queryDesc->plantree, estate); - /* - * reset buffer refcount. the current refcounts are saved and will be - * restored when ExecutorEnd is called - * - * this makes sure that when ExecutorRun's are called recursively as for - * postquel functions, the buffers pinned by one ExecutorRun will not - * be unpinned by another ExecutorRun. - */ - BufferRefCountReset(estate->es_refcount); - return result; } @@ -385,10 +375,6 @@ ExecutorEnd(QueryDesc *queryDesc, EState *estate) pfree(estate->es_param_exec_vals); estate->es_param_exec_vals = NULL; } - - /* restore saved refcounts. */ - BufferRefCountRestore(estate->es_refcount); - } void @@ -802,7 +788,7 @@ EndPlan(Plan *plan, EState *estate) { TupleTable tupleTable = (TupleTable) estate->es_tupleTable; - ExecDestroyTupleTable(tupleTable, true); /* was missing last arg */ + ExecDestroyTupleTable(tupleTable, true); estate->es_tupleTable = NULL; } @@ -1678,7 +1664,6 @@ EvalPlanQual(EState *estate, Index rti, ItemPointer tid) sizeof(ParamExecData)); epqstate->es_tupleTable = ExecCreateTupleTable(estate->es_tupleTable->size); - epqstate->es_refcount = estate->es_refcount; /* ... rest */ newepq->plan = copyObject(estate->es_origPlan); newepq->free = NULL; diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index a44030aa40..2886cab725 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.59 1999/09/18 23:26:37 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execQual.c,v 1.60 1999/09/24 00:24:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -637,7 +637,8 @@ ExecEvalFuncArgs(FunctionCachePtr fcache, if (!(*argIsDone)) { - Assert(i == 0); + if (i != 0) + elog(ERROR, "functions can only take sets in their first argument"); fcache->setArg = (char *) argV[0]; fcache->hasSetArg = true; } @@ -758,35 +759,48 @@ ExecMakeFunctionResult(Node *node, if (fcache->language == SQLlanguageId) { Datum result; + bool argDone; Assert(funcNode); - result = postquel_function(funcNode, (char **) argV, isNull, isDone); - /* - * finagle the situation where we are iterating through all - * results in a nested dot function (whose argument function + /*-------------------- + * This loop handles the situation where we are iterating through + * all results in a nested dot function (whose argument function * returns a set of tuples) and the current function finally - * finishes. We need to get the next argument in the set and run - * the function all over again. This is getting unclean. + * finishes. We need to get the next argument in the set and start + * the function all over again. We might have to do it more than + * once, if the function produces no results for a particular argument. + * This is getting unclean. + *-------------------- */ - if ((*isDone) && (fcache->hasSetArg)) + for (;;) { - bool argDone; + result = postquel_function(funcNode, (char **) argV, + isNull, isDone); + if (! *isDone) + break; /* got a result from current argument */ + if (! fcache->hasSetArg) + break; /* input not a set, so done */ + + /* OK, get the next argument... */ ExecEvalFuncArgs(fcache, econtext, arguments, argV, &argDone); if (argDone) { + /* End of arguments, so reset the setArg flag and say "Done" */ fcache->setArg = (char *) NULL; + fcache->hasSetArg = false; *isDone = true; result = (Datum) NULL; + break; } - else - result = postquel_function(funcNode, - (char **) argV, - isNull, - isDone); + + /* If we reach here, loop around to run the function on the + * new argument. + */ } + if (funcisset) { @@ -805,6 +819,7 @@ ExecMakeFunctionResult(Node *node, if (*isDone) ((Func *) node)->func_fcache = NULL; } + return result; } else @@ -1424,8 +1439,10 @@ ExecTargetList(List *targetlist, { char nulls_array[64]; bool fjNullArray[64]; - bool *fjIsNull; + bool itemIsDoneArray[64]; char *null_head; + bool *fjIsNull; + bool *itemIsDone; List *tl; TargetEntry *tle; Node *expr; @@ -1434,6 +1451,7 @@ ExecTargetList(List *targetlist, Datum constvalue; HeapTuple newTuple; bool isNull; + bool haveDoneIters; static struct tupleDesc NullTupleDesc; /* we assume this inits to zeroes */ /* @@ -1457,24 +1475,30 @@ ExecTargetList(List *targetlist, /* * allocate an array of char's to hold the "null" information only if * we have a really large targetlist. otherwise we use the stack. + * + * We also allocate a bool array that is used to hold fjoin result state, + * and another that holds the isDone status for each targetlist item. */ if (nodomains > 64) { null_head = (char *) palloc(nodomains + 1); fjIsNull = (bool *) palloc(nodomains + 1); + itemIsDone = (bool *) palloc(nodomains + 1); } else { null_head = &nulls_array[0]; fjIsNull = &fjNullArray[0]; + itemIsDone = &itemIsDoneArray[0]; } /* * evaluate all the expressions in the target list */ - EV_printf("ExecTargetList: setting target list values\n"); - *isDone = true; + *isDone = true; /* until proven otherwise */ + haveDoneIters = false; /* any isDone Iter exprs in tlist? */ + foreach(tl, targetlist) { @@ -1493,13 +1517,11 @@ ExecTargetList(List *targetlist, expr = tle->expr; resdom = tle->resdom; resind = resdom->resno - 1; + constvalue = (Datum) ExecEvalExpr(expr, econtext, &isNull, - isDone); - - if ((IsA(expr, Iter)) && (*isDone)) - return (HeapTuple) NULL; + &itemIsDone[resind]); values[resind] = constvalue; @@ -1507,6 +1529,14 @@ ExecTargetList(List *targetlist, null_head[resind] = ' '; else null_head[resind] = 'n'; + + if (IsA(expr, Iter)) + { + if (itemIsDone[resind]) + haveDoneIters = true; + else + *isDone = false; /* we have undone Iters in the list */ + } } else { @@ -1518,6 +1548,8 @@ ExecTargetList(List *targetlist, DatumPtr results = fjNode->fj_results; ExecEvalFjoin(tle, econtext, fjIsNull, isDone); + + /* this is probably wrong: */ if (*isDone) return (HeapTuple) NULL; @@ -1558,18 +1590,86 @@ ExecTargetList(List *targetlist, } } + if (haveDoneIters) + { + if (*isDone) + { + /* all Iters are done, so return a null indicating tlist set + * expansion is complete. + */ + newTuple = NULL; + goto exit; + } + else + { + /* We have some done and some undone Iters. Restart the done ones + * so that we can deliver a tuple (if possible). + * + * XXX this code is a crock, because it only works for Iters at + * the top level of tlist expressions, and doesn't even work right + * for them: you should get all possible combinations of Iter + * results, but you won't unless the numbers of values returned by + * each are relatively prime. Should have a mechanism more like + * aggregate functions, where we make a list of all Iters + * contained in the tlist and cycle through their values in a + * methodical fashion. To do someday; can't get excited about + * fixing a Berkeley feature that's not in SQL92. (The only + * reason we're doing this much is that we have to be sure all + * the Iters are run to completion, or their subplan executors + * will have unreleased resources, e.g. pinned buffers...) + */ + foreach(tl, targetlist) + { + tle = lfirst(tl); + + if (tle->resdom != NULL) + { + expr = tle->expr; + resdom = tle->resdom; + resind = resdom->resno - 1; + + if (IsA(expr, Iter) && itemIsDone[resind]) + { + constvalue = (Datum) ExecEvalExpr(expr, + econtext, + &isNull, + &itemIsDone[resind]); + if (itemIsDone[resind]) + { + /* Oh dear, this Iter is returning an empty set. + * Guess we can't make a tuple after all. + */ + *isDone = true; + newTuple = NULL; + goto exit; + } + + values[resind] = constvalue; + + if (!isNull) + null_head[resind] = ' '; + else + null_head[resind] = 'n'; + } + } + } + } + } + /* * form the new result tuple (in the "normal" context) */ newTuple = (HeapTuple) heap_formtuple(targettype, values, null_head); +exit: /* - * free the nulls array if we allocated one.. + * free the status arrays if we palloc'd them */ if (nodomains > 64) { pfree(null_head); pfree(fjIsNull); + pfree(itemIsDone); } return newTuple; diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index 6e2e249c9a..835dba7c5c 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -14,7 +14,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execTuples.c,v 1.29 1999/07/17 20:16:57 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execTuples.c,v 1.30 1999/09/24 00:24:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,9 +38,6 @@ * ExecSetSlotDescriptor - set a slot's tuple descriptor * ExecSetSlotDescriptorIsNew - diddle the slot-desc-is-new flag * ExecSetNewSlotDescriptor - set a desc and the is-new-flag all at once - * ExecSlotBuffer - return buffer of tuple in slot - * ExecSetSlotBuffer - set the buffer for tuple in slot - * ExecIncrSlotBufferRefcnt - bump the refcnt of the slot buffer(Macro) * * SLOT STATUS PREDICATES * TupIsNull - true when slot contains no tuple(Macro) @@ -193,7 +190,7 @@ ExecDestroyTupleTable(TupleTable table, /* tuple table */ bool shouldFree) /* true if we should free slot * contents */ { - int next; /* next avaliable slot */ + int next; /* next available slot */ TupleTableSlot *array; /* start of table array */ int i; /* counter */ @@ -212,38 +209,27 @@ ExecDestroyTupleTable(TupleTable table, /* tuple table */ /* ---------------- * first free all the valid pointers in the tuple array - * if that's what the caller wants.. + * and drop refcounts of any referenced buffers, + * if that's what the caller wants. (There is probably + * no good reason for the caller ever not to want it!) * - * Note: we do nothing about the Buffer and Tuple Descriptor's + * Note: we do nothing about the Tuple Descriptor's * we store in the slots. This may have to change (ex: we should * probably worry about pfreeing tuple descs too) -cim 3/14/91 + * + * Right now, the handling of tuple pointers and buffer refcounts + * is clean, but the handling of tuple descriptors is NOT; they + * are copied around with wild abandon. It would take some work + * to make tuple descs pfree'able. Fortunately, since they're + * normally only made once per scan, it's probably not worth + * worrying about... tgl 9/21/99 * ---------------- */ if (shouldFree) + { for (i = 0; i < next; i++) - { - TupleTableSlot slot; - HeapTuple tuple; - - slot = array[i]; - tuple = slot.val; - - if (tuple != NULL) - { - slot.val = (HeapTuple) NULL; - if (slot.ttc_shouldFree) - { - /* ---------------- - * since a tuple may contain a pointer to - * lock information allocated along with the - * tuple, we have to be careful to free any - * rule locks also -cim 1/17/90 - * ---------------- - */ - pfree(tuple); - } - } - } + ExecClearTuple(&array[i]); + } /* ---------------- * finally free the tuple array and the table itself. @@ -274,6 +260,7 @@ TupleTableSlot * /* return: the slot allocated in the tuple ExecAllocTableSlot(TupleTable table) { int slotnum; /* new slot number */ + TupleTableSlot* slot; /* ---------------- * sanity checks @@ -319,9 +306,18 @@ ExecAllocTableSlot(TupleTable table) slotnum = table->next; table->next++; - table->array[slotnum].type = T_TupleTableSlot; + slot = &(table->array[slotnum]); - return &(table->array[slotnum]); + /* Make sure the allocated slot is valid (and empty) */ + slot->type = T_TupleTableSlot; + slot->val = (HeapTuple) NULL; + slot->ttc_shouldFree = true; + slot->ttc_descIsNew = true; + slot->ttc_tupleDescriptor = (TupleDesc) NULL; + slot->ttc_buffer = InvalidBuffer; + slot->ttc_whichplan = -1; + + return slot; } /* ---------------------------------------------------------------- @@ -333,26 +329,49 @@ ExecAllocTableSlot(TupleTable table) * ExecStoreTuple * * This function is used to store a tuple into a specified - * slot in the tuple table. Note: the only slots which should - * be called with shouldFree == false are those slots used to - * store tuples not allocated with pfree(). Currently the - * seqscan and indexscan nodes use this for the tuples returned - * by amgetattr, which are actually pointers onto disk pages. + * slot in the tuple table. + * + * tuple: tuple to store + * slot: slot to store it in + * buffer: disk buffer if tuple is in a disk page, else InvalidBuffer + * shouldFree: true if ExecClearTuple should pfree() the tuple + * when done with it + * + * If 'buffer' is not InvalidBuffer, the tuple table code acquires a pin + * on the buffer which is held until the slot is cleared, so that the tuple + * won't go away on us. + * + * shouldFree is normally set 'true' for tuples constructed on-the-fly. + * It must always be 'false' for tuples that are stored in disk pages, + * since we don't want to try to pfree those. + * + * Another case where it is 'false' is when the referenced tuple is held + * in a tuple table slot belonging to a lower-level executor Proc node. + * In this case the lower-level slot retains ownership and responsibility + * for eventually releasing the tuple. When this method is used, we must + * be certain that the upper-level Proc node will lose interest in the tuple + * sooner than the lower-level one does! If you're not certain, copy the + * lower-level tuple with heap_copytuple and let the upper-level table + * slot assume ownership of the copy! + * + * Return value is just the passed-in slot pointer. * -------------------------------- */ -TupleTableSlot * /* return: slot passed */ -ExecStoreTuple(HeapTuple tuple, /* tuple to store */ - TupleTableSlot *slot, /* slot in which to store tuple */ - Buffer buffer, /* buffer associated with tuple */ - bool shouldFree) /* true if we call pfree() when we gc. */ +TupleTableSlot * +ExecStoreTuple(HeapTuple tuple, + TupleTableSlot *slot, + Buffer buffer, + bool shouldFree) { /* ---------------- * sanity checks * ---------------- */ Assert(slot != NULL); + /* passing shouldFree=true for a tuple on a disk page is not sane */ + Assert(BufferIsValid(buffer) ? (!shouldFree) : true); - /* clear out the slot first */ + /* clear out any old contents of the slot */ ExecClearTuple(slot); /* ---------------- @@ -364,6 +383,12 @@ ExecStoreTuple(HeapTuple tuple, /* tuple to store */ slot->ttc_buffer = buffer; slot->ttc_shouldFree = shouldFree; + /* If tuple is on a disk page, keep the page pinned as long as we hold + * a pointer into it. + */ + if (BufferIsValid(buffer)) + IncrBufferRefCount(buffer); + return slot; } @@ -395,29 +420,20 @@ ExecClearTuple(TupleTableSlot *slot) /* slot in which to store tuple */ * ---------------- */ if (slot->ttc_shouldFree && oldtuple != NULL) - { - /* ---------------- - * since a tuple may contain a pointer to - * lock information allocated along with the - * tuple, we have to be careful to free any - * rule locks also -cim 1/17/90 - * ---------------- - */ pfree(oldtuple); - } - /* ---------------- - * store NULL into the specified slot and return the slot. - * - also set buffer to InvalidBuffer -cim 3/14/91 - * ---------------- - */ slot->val = (HeapTuple) NULL; + slot->ttc_shouldFree = true; /* probably useless code... */ + + /* ---------------- + * Drop the pin on the referenced buffer, if there is one. + * ---------------- + */ if (BufferIsValid(slot->ttc_buffer)) ReleaseBuffer(slot->ttc_buffer); slot->ttc_buffer = InvalidBuffer; - slot->ttc_shouldFree = true; return slot; } @@ -525,41 +541,6 @@ ExecSetNewSlotDescriptor(TupleTableSlot *slot, /* slot to change */ #endif -/* -------------------------------- - * ExecSlotBuffer - * - * This function is used to get the tuple descriptor associated - * with the slot's tuple. Be very careful with this as it does not - * balance the reference counts. If the buffer returned is stored - * someplace else, then also use ExecIncrSlotBufferRefcnt(). - * - * Now a macro in tuptable.h - * -------------------------------- - */ - -/* -------------------------------- - * ExecSetSlotBuffer - * - * This function is used to set the tuple descriptor associated - * with the slot's tuple. Be very careful with this as it does not - * balance the reference counts. If we're using this then we should - * also use ExecIncrSlotBufferRefcnt(). - * -------------------------------- - */ -#ifdef NOT_USED -Buffer /* return: old slot buffer */ -ExecSetSlotBuffer(TupleTableSlot *slot, /* slot to change */ - Buffer b) /* tuple descriptor */ -{ - Buffer oldb = slot->ttc_buffer; - - slot->ttc_buffer = b; - - return oldb; -} - -#endif - /* ---------------------------------------------------------------- * tuple table slot status predicates * ---------------------------------------------------------------- @@ -601,12 +582,7 @@ ExecSlotDescriptorIsNew(TupleTableSlot *slot) /* slot to inspect */ #define INIT_SLOT_ALLOC \ tupleTable = (TupleTable) estate->es_tupleTable; \ - slot = ExecAllocTableSlot(tupleTable); \ - slot->val = (HeapTuple)NULL; \ - slot->ttc_shouldFree = true; \ - slot->ttc_tupleDescriptor = (TupleDesc)NULL; \ - slot->ttc_whichplan = -1;\ - slot->ttc_descIsNew = true; + slot = ExecAllocTableSlot(tupleTable); /* ---------------- * ExecInitResultTupleSlot diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c index bd515d51f9..f20d9c56bc 100644 --- a/src/backend/executor/nodeAppend.c +++ b/src/backend/executor/nodeAppend.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeAppend.c,v 1.25 1999/09/18 19:06:48 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeAppend.c,v 1.26 1999/09/24 00:24:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -399,12 +399,13 @@ ExecProcAppend(Append *node) { /* ---------------- * if the subplan gave us something then place a copy of - * whatever we get into our result slot and return it, else.. + * whatever we get into our result slot and return it. + * + * Note we rely on the subplan to retain ownership of the + * tuple for as long as we need it --- we don't copy it. * ---------------- */ - return ExecStoreTuple(result->val, - result_slot, result->ttc_buffer, false); - + return ExecStoreTuple(result->val, result_slot, InvalidBuffer, false); } else { diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index f856d4780b..38f7a0365d 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -13,7 +13,7 @@ * columns. (ie. tuples from the same group are consecutive) * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.29 1999/07/17 20:16:58 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.30 1999/09/24 00:24:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -91,10 +91,12 @@ ExecGroupEveryTuple(Group *node) { grpstate->grp_useFirstTuple = FALSE; + /* note we rely on subplan to hold ownership of the tuple + * for as long as we need it; we don't copy it. + */ ExecStoreTuple(grpstate->grp_firstTuple, grpstate->csstate.css_ScanTupleSlot, - InvalidBuffer, - false); + InvalidBuffer, false); } else { @@ -129,10 +131,12 @@ ExecGroupEveryTuple(Group *node) } } + /* note we rely on subplan to hold ownership of the tuple + * for as long as we need it; we don't copy it. + */ ExecStoreTuple(outerTuple, grpstate->csstate.css_ScanTupleSlot, - outerslot->ttc_buffer, - false); + InvalidBuffer, false); } /* ---------------- @@ -226,10 +230,12 @@ ExecGroupOneTuple(Group *node) */ projInfo = grpstate->csstate.cstate.cs_ProjInfo; + /* note we rely on subplan to hold ownership of the tuple + * for as long as we need it; we don't copy it. + */ ExecStoreTuple(firsttuple, grpstate->csstate.css_ScanTupleSlot, - InvalidBuffer, - false); + InvalidBuffer, false); econtext->ecxt_scantuple = grpstate->csstate.css_ScanTupleSlot; resultSlot = ExecProject(projInfo, &isDone); diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 362851a425..b9e3cf5863 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.42 1999/08/12 00:42:43 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeIndexscan.c,v 1.43 1999/09/24 00:24:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -125,14 +125,14 @@ IndexNext(IndexScan *node) { int iptr; - slot->ttc_buffer = InvalidBuffer; - slot->ttc_shouldFree = false; + ExecClearTuple(slot); if (estate->es_evTupleNull[node->scan.scanrelid - 1]) - { - slot->val = NULL; /* must not free tuple! */ - return (slot); - } + return slot; /* return empty slot */ + + /* probably ought to use ExecStoreTuple here... */ slot->val = estate->es_evTuple[node->scan.scanrelid - 1]; + slot->ttc_shouldFree = false; + for (iptr = 0; iptr < numIndices; iptr++) { scanstate->cstate.cs_ExprContext->ecxt_scantuple = slot; @@ -142,6 +142,7 @@ IndexNext(IndexScan *node) } if (iptr == numIndices) /* would not be returned by indices */ slot->val = NULL; + /* Flag for the next call that no more tuples */ estate->es_evTupleNull[node->scan.scanrelid - 1] = true; return (slot); @@ -192,7 +193,7 @@ IndexNext(IndexScan *node) * the scan state. Eventually we will only do this and not * return a tuple. Note: we pass 'false' because tuples * returned by amgetnext are pointers onto disk pages and - * were not created with palloc() and so should not be pfree()'d. + * must not be pfree()'d. * ---------------- */ ExecStoreTuple(tuple, /* tuple to store */ @@ -200,6 +201,13 @@ IndexNext(IndexScan *node) buffer, /* buffer associated with tuple */ false); /* don't pfree */ + /* + * At this point we have an extra pin on the buffer, + * because ExecStoreTuple incremented the pin count. + * Drop our local pin. + */ + ReleaseBuffer(buffer); + /* * We must check to see if the current tuple would have * been matched by an earlier index, so we don't double @@ -223,8 +231,6 @@ IndexNext(IndexScan *node) else ExecClearTuple(slot); } - if (BufferIsValid(buffer)) - ReleaseBuffer(buffer); } if (indexNumber < numIndices) { diff --git a/src/backend/executor/nodeMaterial.c b/src/backend/executor/nodeMaterial.c index 783dbc7b32..24232617cf 100644 --- a/src/backend/executor/nodeMaterial.c +++ b/src/backend/executor/nodeMaterial.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeMaterial.c,v 1.25 1999/07/16 04:58:50 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeMaterial.c,v 1.26 1999/09/24 00:24:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,7 +31,7 @@ * ExecMaterial * * The first time this is called, ExecMaterial retrieves tuples - * this node's outer subplan and inserts them into a temporary + * from this node's outer subplan and inserts them into a temporary * relation. After this is done, a flag is set indicating that * the subplan has been materialized. Once the relation is * materialized, the first tuple is then returned. Successive @@ -41,7 +41,7 @@ * Initial State: * * ExecMaterial assumes the temporary relation has been - * created and openend by ExecInitMaterial during the prior + * created and opened by ExecInitMaterial during the prior * InitPlan() phase. * * ---------------------------------------------------------------- @@ -116,18 +116,7 @@ ExecMaterial(Material *node) if (TupIsNull(slot)) break; - /* - * heap_insert changes something... - */ - if (slot->ttc_buffer != InvalidBuffer) - heapTuple = heap_copytuple(slot->val); - else - heapTuple = slot->val; - - heap_insert(tempRelation, heapTuple); - - if (slot->ttc_buffer != InvalidBuffer) - pfree(heapTuple); + heap_insert(tempRelation, slot->val); ExecClearTuple(slot); } @@ -164,7 +153,7 @@ ExecMaterial(Material *node) /* ---------------- * at this point we know we have a sorted relation so - * we preform a simple scan on it with amgetnext().. + * we perform a simple scan on it with amgetnext().. * ---------------- */ currentScanDesc = matstate->csstate.css_currentScanDesc; diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 62b53af3c6..4b3f021fe0 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.28 1999/07/16 04:58:50 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.29 1999/09/24 00:24:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1153,15 +1153,18 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, Plan *parent) #define MERGEJOIN_NSLOTS 2 /* ---------------- * tuple table initialization + * + * XXX why aren't we getting a tuple table slot in the normal way? * ---------------- */ ExecInitResultTupleSlot(estate, &mergestate->jstate); - mjSlot = (TupleTableSlot *) palloc(sizeof(TupleTableSlot)); + mjSlot = makeNode(TupleTableSlot); mjSlot->val = NULL; mjSlot->ttc_shouldFree = true; - mjSlot->ttc_tupleDescriptor = NULL; - mjSlot->ttc_whichplan = -1; mjSlot->ttc_descIsNew = true; + mjSlot->ttc_tupleDescriptor = NULL; + mjSlot->ttc_buffer = InvalidBuffer; + mjSlot->ttc_whichplan = -1; mergestate->mj_MarkedTupleSlot = mjSlot; /* ---------------- @@ -1278,11 +1281,9 @@ ExecReScanMergeJoin(MergeJoin *node, ExprContext *exprCtxt, Plan *parent) TupleTableSlot *mjSlot = mergestate->mj_MarkedTupleSlot; ExecClearTuple(mjSlot); - mjSlot->val = NULL; - mjSlot->ttc_shouldFree = true; mjSlot->ttc_tupleDescriptor = NULL; - mjSlot->ttc_whichplan = -1; mjSlot->ttc_descIsNew = true; + mjSlot->ttc_whichplan = -1; mergestate->mj_JoinState = EXEC_MJ_INITIALIZE; diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index c83aa725a7..eb73733b58 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeSeqscan.c,v 1.20 1999/07/16 04:58:52 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeSeqscan.c,v 1.21 1999/09/24 00:24:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -74,20 +74,20 @@ SeqNext(SeqScan *node) if (estate->es_evTuple != NULL && estate->es_evTuple[node->scanrelid - 1] != NULL) { - slot->ttc_buffer = InvalidBuffer; - slot->ttc_shouldFree = false; + ExecClearTuple(slot); if (estate->es_evTupleNull[node->scanrelid - 1]) - { - slot->val = NULL; /* must not free tuple! */ - return (slot); - } + return slot; /* return empty slot */ + + /* probably ought to use ExecStoreTuple here... */ slot->val = estate->es_evTuple[node->scanrelid - 1]; + slot->ttc_shouldFree = false; /* * Note that unlike IndexScan, SeqScan never use keys in - * heap_beginscan (and this is very bad) - so, here we have not + * heap_beginscan (and this is very bad) - so, here we do not * check are keys ok or not. */ + /* Flag for the next call that no more tuples */ estate->es_evTupleNull[node->scanrelid - 1] = true; return (slot); @@ -104,7 +104,9 @@ SeqNext(SeqScan *node) * in our scan tuple slot and return the slot. Note: we pass 'false' * because tuples returned by heap_getnext() are pointers onto * disk pages and were not created with palloc() and so should not - * be pfree()'d. + * be pfree()'d. Note also that ExecStoreTuple will increment the + * refcount of the buffer; the refcount will not be dropped until + * the tuple table slot is cleared. * ---------------- */ @@ -114,17 +116,6 @@ SeqNext(SeqScan *node) * this tuple */ false); /* don't pfree this pointer */ - /* ---------------- - * XXX -- mao says: The sequential scan for heap relations will - * automatically unpin the buffer this tuple is on when we cross - * a page boundary. The clearslot code also does this. We bump - * the pin count on the page here, since we actually have two - * pointers to it -- one in the scan desc and one in the tuple - * table slot. --mar 20 91 - * ---------------- - */ - ExecIncrSlotBufferRefcnt(slot); - return slot; } diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 4bd0eb2ff3..32a39ee18d 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -165,8 +165,6 @@ ExecInitSubPlan(SubPlan *node, EState *estate, Plan *parent) sp_estate->es_param_exec_vals = estate->es_param_exec_vals; sp_estate->es_tupleTable = ExecCreateTupleTable(ExecCountSlotsNode(node->plan) + 10); - pfree(sp_estate->es_refcount); - sp_estate->es_refcount = estate->es_refcount; sp_estate->es_snapshot = estate->es_snapshot; if (!ExecInitNode(node->plan, sp_estate, NULL)) diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index 88f6416e84..bfd0561705 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.29 1999/07/17 20:17:40 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.30 1999/09/24 00:24:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -64,7 +64,6 @@ long *NWaitIOBackendP; extern IpcSemaphoreId WaitIOSemId; long *PrivateRefCount; /* also used in freelist.c */ -long *LastRefCount; /* refcounts of last ExecMain level */ bits8 *BufferLocks; /* */ long *CommitInfoNeedsSave;/* to write buffers where we have filled * in t_infomask */ @@ -244,7 +243,6 @@ InitBufferPool(IPCKey key) } #endif PrivateRefCount = (long *) calloc(NBuffers, sizeof(long)); - LastRefCount = (long *) calloc(NBuffers, sizeof(long)); BufferLocks = (bits8 *) calloc(NBuffers, sizeof(bits8)); CommitInfoNeedsSave = (long *) calloc(NBuffers, sizeof(long)); } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index b435dd53ca..e0327c678f 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.62 1999/09/18 19:07:26 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.63 1999/09/24 00:24:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -89,9 +89,6 @@ static void BufferSync(void); static int BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld); void PrintBufferDescs(void); -/* not static but used by vacuum only ... */ -int BlowawayRelationBuffers(Relation rel, BlockNumber block); - /* --------------------------------------------------- * RelationGetBufferWithBuffer * see if the given buffer is what we want @@ -146,9 +143,6 @@ RelationGetBufferWithBuffer(Relation relation, * opened already. */ -extern int ShowPinTrace; - - #undef ReadBuffer /* conflicts with macro when BUFMGR_DEBUG * defined */ @@ -499,6 +493,7 @@ BufferAlloc(Relation reln, SignalIO(buf); #endif /* !HAS_TEST_AND_SET */ PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; + Assert(buf->refcount > 0); buf->refcount--; if (buf->refcount == 0) { @@ -575,10 +570,14 @@ BufferAlloc(Relation reln, SignalIO(buf); #endif /* !HAS_TEST_AND_SET */ /* give up the buffer since we don't need it any more */ - buf->refcount--; PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; - AddBufferToFreelist(buf); - buf->flags |= BM_FREE; + Assert(buf->refcount > 0); + buf->refcount--; + if (buf->refcount == 0) + { + AddBufferToFreelist(buf); + buf->flags |= BM_FREE; + } buf->flags &= ~BM_IO_IN_PROGRESS; } @@ -791,7 +790,7 @@ FlushBuffer(Buffer buffer, bool release) int status; if (BufferIsLocal(buffer)) - return FlushLocalBuffer(buffer, release); + return FlushLocalBuffer(buffer, release) ? STATUS_OK : STATUS_ERROR; if (BAD_BUFFER_ID(buffer)) return STATUS_ERROR; @@ -813,7 +812,7 @@ FlushBuffer(Buffer buffer, bool release) status = smgrflush(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum, (char *) MAKE_PTR(bufHdr->data)); - /* drop relcache refcount incremented by RelationIdCacheGetRelation */ + /* drop relcache refcnt incremented by RelationIdCacheGetRelation */ RelationDecrementReferenceCount(bufrel); if (status == SM_FAIL) @@ -908,15 +907,10 @@ ReleaseAndReadBuffer(Buffer buffer, bufHdr = &BufferDescriptors[buffer - 1]; Assert(PrivateRefCount[buffer - 1] > 0); PrivateRefCount[buffer - 1]--; - if (PrivateRefCount[buffer - 1] == 0 && - LastRefCount[buffer - 1] == 0) + if (PrivateRefCount[buffer - 1] == 0) { - - /* - * only release buffer if it is not pinned in previous - * ExecMain level - */ SpinAcquire(BufMgrLock); + Assert(bufHdr->refcount > 0); bufHdr->refcount--; if (bufHdr->refcount == 0) { @@ -994,7 +988,7 @@ BufferSync() elog(ERROR, "BufferSync: write error %u for %s", bufHdr->tag.blockNum, bufHdr->sb_relname); } - /* drop refcount from RelationIdCacheGetRelation */ + /* drop refcnt from RelationIdCacheGetRelation */ if (reln != (Relation) NULL) RelationDecrementReferenceCount(reln); continue; @@ -1049,7 +1043,7 @@ BufferSync() */ if (!(bufHdr->flags & BM_JUST_DIRTIED)) bufHdr->flags &= ~BM_DIRTY; - /* drop refcount from RelationIdCacheGetRelation */ + /* drop refcnt from RelationIdCacheGetRelation */ if (reln != (Relation) NULL) RelationDecrementReferenceCount(reln); } @@ -1175,7 +1169,7 @@ ResetBufferUsage() * ResetBufferPool * * this routine is supposed to be called when a transaction aborts. - * it will release all the buffer pins held by the transaciton. + * it will release all the buffer pins held by the transaction. * * ---------------------------------------------- */ @@ -1184,15 +1178,24 @@ ResetBufferPool() { int i; - for (i = 1; i <= NBuffers; i++) + for (i = 0; i < NBuffers; i++) { - CommitInfoNeedsSave[i - 1] = 0; - if (BufferIsValid(i)) + if (PrivateRefCount[i] != 0) { - while (PrivateRefCount[i - 1] > 0) - ReleaseBuffer(i); + BufferDesc *buf = &BufferDescriptors[i]; + + SpinAcquire(BufMgrLock); + Assert(buf->refcount > 0); + buf->refcount--; + if (buf->refcount == 0) + { + AddBufferToFreelist(buf); + buf->flags |= BM_FREE; + } + SpinRelease(BufMgrLock); } - LastRefCount[i - 1] = 0; + PrivateRefCount[i] = 0; + CommitInfoNeedsSave[i] = 0; } ResetLocalBufferPool(); @@ -1213,7 +1216,7 @@ BufferPoolCheckLeak() for (i = 1; i <= NBuffers; i++) { - if (BufferIsValid(i)) + if (PrivateRefCount[i - 1] != 0) { BufferDesc *buf = &(BufferDescriptors[i - 1]); @@ -1226,7 +1229,7 @@ relname=%s, blockNum=%d, flags=0x%x, refcount=%d %d)", result = 1; } } - return (result); + return result; } /* ------------------------------------------------ @@ -1287,7 +1290,7 @@ BufferGetRelation(Buffer buffer) relation = RelationIdGetRelation(relid); Assert(relation); - /* drop relcache refcount incremented by RelationIdGetRelation */ + /* drop relcache refcnt incremented by RelationIdGetRelation */ RelationDecrementReferenceCount(relation); if (RelationHasReferenceCountZero(relation)) @@ -1354,7 +1357,7 @@ BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld) (char *) MAKE_PTR(bufHdr->data)); } - /* drop relcache refcount incremented by RelationIdCacheGetRelation */ + /* drop relcache refcnt incremented by RelationIdCacheGetRelation */ if (reln != (Relation) NULL) RelationDecrementReferenceCount(reln); @@ -1549,10 +1552,27 @@ BufferPoolBlowaway() #endif /* --------------------------------------------------------------------- - * BlowawayRelationBuffers + * FlushRelationBuffers * - * This function blowaway all the pages with blocknumber >= passed - * of a relation in the buffer pool. Used by vacuum before truncation... + * This function removes from the buffer pool all pages of a relation + * that have blocknumber >= specified block. If doFlush is true, + * dirty buffers are written out --- otherwise it's an error for any + * of the buffers to be dirty. + * + * This is used by VACUUM before truncating the relation to the given + * number of blocks. For VACUUM, we pass doFlush = false since it would + * mean a bug in VACUUM if any of the unwanted pages were still dirty. + * (TRUNCATE TABLE also uses it in the same way.) + * + * This is also used by RENAME TABLE (with block = 0 and doFlush = true) + * to clear out the buffer cache before renaming the physical files of + * a relation. Without that, some other backend might try to do a + * blind write of a buffer page (relying on the sb_relname of the buffer) + * and fail because it's not got the right filename anymore. + * + * In both cases, the caller should be holding AccessExclusiveLock on + * the target relation to ensure that no other backend is busy reading + * more blocks of the relation... * * Returns: 0 - Ok, -1 - DIRTY, -2 - PINNED * @@ -1561,7 +1581,7 @@ BufferPoolBlowaway() * -------------------------------------------------------------------- */ int -BlowawayRelationBuffers(Relation rel, BlockNumber block) +FlushRelationBuffers(Relation rel, BlockNumber block, bool doFlush) { int i; BufferDesc *buf; @@ -1576,13 +1596,25 @@ BlowawayRelationBuffers(Relation rel, BlockNumber block) { if (buf->flags & BM_DIRTY) { - elog(NOTICE, "BlowawayRelationBuffers(%s (local), %u): block %u is dirty", - rel->rd_rel->relname.data, block, buf->tag.blockNum); - return -1; + if (doFlush) + { + if (FlushBuffer(-i-1, false) != STATUS_OK) + { + elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty, could not flush it", + rel->rd_rel->relname.data, block, buf->tag.blockNum); + return -1; + } + } + else + { + elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty", + rel->rd_rel->relname.data, block, buf->tag.blockNum); + return -1; + } } if (LocalRefCount[i] > 0) { - elog(NOTICE, "BlowawayRelationBuffers(%s (local), %u): block %u is referenced (%d)", + elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is referenced (%d)", rel->rd_rel->relname.data, block, buf->tag.blockNum, LocalRefCount[i]); return -2; @@ -1603,18 +1635,33 @@ BlowawayRelationBuffers(Relation rel, BlockNumber block) { if (buf->flags & BM_DIRTY) { - elog(NOTICE, "BlowawayRelationBuffers(%s, %u): block %u is dirty (private %d, last %d, global %d)", - buf->sb_relname, block, buf->tag.blockNum, - PrivateRefCount[i], LastRefCount[i], buf->refcount); - SpinRelease(BufMgrLock); - return -1; + if (doFlush) + { + SpinRelease(BufMgrLock); + if (FlushBuffer(i+1, false) != STATUS_OK) + { + elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is dirty (private %d, global %d), could not flush it", + buf->sb_relname, block, buf->tag.blockNum, + PrivateRefCount[i], buf->refcount); + return -1; + } + SpinAcquire(BufMgrLock); + } + else + { + SpinRelease(BufMgrLock); + elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is dirty (private %d, global %d)", + buf->sb_relname, block, buf->tag.blockNum, + PrivateRefCount[i], buf->refcount); + return -1; + } } if (!(buf->flags & BM_FREE)) { - elog(NOTICE, "BlowawayRelationBuffers(%s, %u): block %u is referenced (private %d, last %d, global %d)", - buf->sb_relname, block, buf->tag.blockNum, - PrivateRefCount[i], LastRefCount[i], buf->refcount); SpinRelease(BufMgrLock); + elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is referenced (private %d, global %d)", + buf->sb_relname, block, buf->tag.blockNum, + PrivateRefCount[i], buf->refcount); return -2; } BufTableDelete(buf); @@ -1650,14 +1697,10 @@ ReleaseBuffer(Buffer buffer) Assert(PrivateRefCount[buffer - 1] > 0); PrivateRefCount[buffer - 1]--; - if (PrivateRefCount[buffer - 1] == 0 && LastRefCount[buffer - 1] == 0) + if (PrivateRefCount[buffer - 1] == 0) { - - /* - * only release buffer if it is not pinned in previous ExecMain - * levels - */ SpinAcquire(BufMgrLock); + Assert(bufHdr->refcount > 0); bufHdr->refcount--; if (bufHdr->refcount == 0) { @@ -1892,32 +1935,6 @@ _bm_die(Oid dbId, Oid relId, int blkNo, int bufNo, #endif /* BMTRACE */ -void -BufferRefCountReset(int *refcountsave) -{ - int i; - - for (i = 0; i < NBuffers; i++) - { - refcountsave[i] = PrivateRefCount[i]; - LastRefCount[i] += PrivateRefCount[i]; - PrivateRefCount[i] = 0; - } -} - -void -BufferRefCountRestore(int *refcountsave) -{ - int i; - - for (i = 0; i < NBuffers; i++) - { - PrivateRefCount[i] = refcountsave[i]; - LastRefCount[i] -= refcountsave[i]; - refcountsave[i] = 0; - } -} - int SetBufferWriteMode(int mode) { diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index d8f70a3287..f59a2cc81e 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.18 1999/07/17 20:17:41 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.19 1999/09/24 00:24:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -105,7 +105,7 @@ PinBuffer(BufferDesc *buf) b = BufferDescriptorGetBuffer(buf) - 1; Assert(PrivateRefCount[b] >= 0); - if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0) + if (PrivateRefCount[b] == 0) buf->refcount++; PrivateRefCount[b]++; } @@ -138,10 +138,10 @@ UnpinBuffer(BufferDesc *buf) { long b = BufferDescriptorGetBuffer(buf) - 1; - Assert(buf->refcount); + Assert(buf->refcount > 0); Assert(PrivateRefCount[b] > 0); PrivateRefCount[b]--; - if (PrivateRefCount[b] == 0 && LastRefCount[b] == 0) + if (PrivateRefCount[b] == 0) buf->refcount--; NotInQueue(buf); diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index e003595bed..6c0d1431e5 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.27 1999/09/18 19:07:26 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/localbuf.c,v 1.28 1999/09/24 00:24:29 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -193,9 +193,11 @@ FlushLocalBuffer(Buffer buffer, bool release) /* drop relcache refcount incremented by RelationIdCacheGetRelation */ RelationDecrementReferenceCount(bufrel); - Assert(LocalRefCount[bufid] > 0); if (release) + { + Assert(LocalRefCount[bufid] > 0); LocalRefCount[bufid]--; + } return true; } diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index b057aaa578..c23952c191 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.45 1999/07/17 20:17:44 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.46 1999/09/24 00:24:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -579,174 +579,3 @@ ShmemInitStruct(char *name, unsigned long size, bool *foundPtr) SpinRelease(ShmemIndexLock); return structPtr; } - -/* - * TransactionIdIsInProgress -- is given transaction running by some backend - * - * Strange place for this func, but we have to lookup process data structures - * for all running backends. - vadim 11/26/96 - * - * We should keep all PROC structs not in ShmemIndex - this is too - * general hash table... - * - */ -bool -TransactionIdIsInProgress(TransactionId xid) -{ - ShmemIndexEnt *result; - PROC *proc; - - Assert(ShmemIndex); - - SpinAcquire(ShmemIndexLock); - - hash_seq((HTAB *) NULL); - while ((result = (ShmemIndexEnt *) hash_seq(ShmemIndex)) != NULL) - { - if (result == (ShmemIndexEnt *) TRUE) - { - SpinRelease(ShmemIndexLock); - return false; - } - if (result->location == INVALID_OFFSET || - strncmp(result->key, "PID ", 4) != 0) - continue; - proc = (PROC *) MAKE_PTR(result->location); - if (proc->xid == xid) - { - SpinRelease(ShmemIndexLock); - return true; - } - } - - SpinRelease(ShmemIndexLock); - elog(ERROR, "TransactionIdIsInProgress: ShmemIndex corrupted"); - return false; -} - -/* - * GetSnapshotData -- returns information about running transactions. - * - * Yet another strange func for this place... - vadim 07/21/98 - */ -Snapshot -GetSnapshotData(bool serializable) -{ - Snapshot snapshot = (Snapshot) malloc(sizeof(SnapshotData)); - ShmemIndexEnt *result; - PROC *proc; - TransactionId cid = GetCurrentTransactionId(); - TransactionId xid; - uint32 count = 0; - uint32 have = 32; - - Assert(ShmemIndex); - - snapshot->xip = (TransactionId *) malloc(have * sizeof(TransactionId)); - snapshot->xmin = cid; - - SpinAcquire(ShmemIndexLock); - /* - * Unfortunately, we have to call ReadNewTransactionId() - * after acquiring ShmemIndexLock above. It's not good because of - * ReadNewTransactionId() does SpinAcquire(OidGenLockId) but - * _necessary_. - */ - ReadNewTransactionId(&(snapshot->xmax)); - - hash_seq((HTAB *) NULL); - while ((result = (ShmemIndexEnt *) hash_seq(ShmemIndex)) != NULL) - { - if (result == (ShmemIndexEnt *) TRUE) - { - if (serializable) - MyProc->xmin = snapshot->xmin; - /* Serializable snapshot must be computed before any other... */ - Assert(MyProc->xmin != InvalidTransactionId); - SpinRelease(ShmemIndexLock); - snapshot->xcnt = count; - return snapshot; - } - if (result->location == INVALID_OFFSET || - strncmp(result->key, "PID ", 4) != 0) - continue; - proc = (PROC *) MAKE_PTR(result->location); - /* - * We don't use spin-locking when changing proc->xid - * in GetNewTransactionId() and in AbortTransaction() !.. - */ - xid = proc->xid; - if (proc == MyProc || - xid < FirstTransactionId || xid >= snapshot->xmax) - { - /* - * Seems that there is no sense to store xid >= snapshot->xmax - * (what we got from ReadNewTransactionId above) in snapshot->xip - * - we just assume that all xacts with such xid-s are running - * and may be ignored. - */ - continue; - } - if (xid < snapshot->xmin) - snapshot->xmin = xid; - if (have == 0) - { - snapshot->xip = (TransactionId *) realloc(snapshot->xip, - (count + 32) * sizeof(TransactionId)); - have = 32; - } - snapshot->xip[count] = xid; - have--; - count++; - } - - SpinRelease(ShmemIndexLock); - free(snapshot->xip); - free(snapshot); - elog(ERROR, "GetSnapshotData: ShmemIndex corrupted"); - return NULL; -} - -/* - * GetXmaxRecent -- returns oldest transaction that was running - * when all current transaction was started. - * It's used by vacuum to decide what deleted - * tuples must be preserved in a table. - * - * And yet another strange func for this place... - vadim 03/18/99 - */ -void -GetXmaxRecent(TransactionId *XmaxRecent) -{ - ShmemIndexEnt *result; - PROC *proc; - TransactionId xmin; - - Assert(ShmemIndex); - - *XmaxRecent = GetCurrentTransactionId(); - - SpinAcquire(ShmemIndexLock); - - hash_seq((HTAB *) NULL); - while ((result = (ShmemIndexEnt *) hash_seq(ShmemIndex)) != NULL) - { - if (result == (ShmemIndexEnt *) TRUE) - { - SpinRelease(ShmemIndexLock); - return; - } - if (result->location == INVALID_OFFSET || - strncmp(result->key, "PID ", 4) != 0) - continue; - proc = (PROC *) MAKE_PTR(result->location); - xmin = proc->xmin; /* we don't use spin-locking in AbortTransaction() ! */ - if (proc == MyProc || xmin < FirstTransactionId) - continue; - if (xmin < *XmaxRecent) - *XmaxRecent = xmin; - } - - SpinRelease(ShmemIndexLock); - elog(ERROR, "GetXmaxRecent: ShmemIndex corrupted"); -} diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c index c1a557033b..42c22faa2f 100644 --- a/src/backend/storage/ipc/sinval.c +++ b/src/backend/storage/ipc/sinval.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.18 1999/09/06 19:37:38 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.19 1999/09/24 00:24:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,8 +18,10 @@ #include "postgres.h" #include "storage/backendid.h" +#include "storage/proc.h" #include "storage/sinval.h" #include "storage/sinvaladt.h" +#include "utils/tqual.h" SPINLOCK SInvalLock = (SPINLOCK) NULL; @@ -165,3 +167,201 @@ InvalidateSharedInvalid(void (*invalFunction) (), SpinRelease(SInvalLock); } } + + +/****************************************************************************/ +/* Functions that need to scan the PROC structures of all running backends. */ +/* It's a bit strange to keep these in sinval.c, since they don't have any */ +/* direct relationship to shared-cache invalidation. But the procState */ +/* array in the SI segment is the only place in the system where we have */ +/* an array of per-backend data, so it is the most convenient place to keep */ +/* pointers to the backends' PROC structures. We used to implement these */ +/* functions with a slow, ugly search through the ShmemIndex hash table --- */ +/* now they are simple loops over the SI ProcState array. */ +/****************************************************************************/ + + +/* + * DatabaseHasActiveBackends -- are there any backends running in the given DB + * + * This function is used to interlock DROP DATABASE against there being + * any active backends in the target DB --- dropping the DB while active + * backends remain would be a Bad Thing. Note that we cannot detect here + * the possibility of a newly-started backend that is trying to connect + * to the doomed database, so additional interlocking is needed during + * backend startup. + */ + +bool +DatabaseHasActiveBackends(Oid databaseId) +{ + bool result = false; + SISeg *segP = shmInvalBuffer; + ProcState *stateP = segP->procState; + int index; + + SpinAcquire(SInvalLock); + + for (index = 0; index < segP->maxBackends; index++) + { + SHMEM_OFFSET pOffset = stateP[index].procStruct; + + if (pOffset != INVALID_OFFSET) + { + PROC *proc = (PROC *) MAKE_PTR(pOffset); + + if (proc->databaseId == databaseId) + { + result = true; + break; + } + } + } + + SpinRelease(SInvalLock); + + return result; +} + +/* + * TransactionIdIsInProgress -- is given transaction running by some backend + */ +bool +TransactionIdIsInProgress(TransactionId xid) +{ + bool result = false; + SISeg *segP = shmInvalBuffer; + ProcState *stateP = segP->procState; + int index; + + SpinAcquire(SInvalLock); + + for (index = 0; index < segP->maxBackends; index++) + { + SHMEM_OFFSET pOffset = stateP[index].procStruct; + + if (pOffset != INVALID_OFFSET) + { + PROC *proc = (PROC *) MAKE_PTR(pOffset); + + if (proc->xid == xid) + { + result = true; + break; + } + } + } + + SpinRelease(SInvalLock); + + return result; +} + +/* + * GetXmaxRecent -- returns oldest transaction that was running + * when all current transaction was started. + * It's used by vacuum to decide what deleted + * tuples must be preserved in a table. + */ +void +GetXmaxRecent(TransactionId *XmaxRecent) +{ + SISeg *segP = shmInvalBuffer; + ProcState *stateP = segP->procState; + int index; + + *XmaxRecent = GetCurrentTransactionId(); + + SpinAcquire(SInvalLock); + + for (index = 0; index < segP->maxBackends; index++) + { + SHMEM_OFFSET pOffset = stateP[index].procStruct; + + if (pOffset != INVALID_OFFSET) + { + PROC *proc = (PROC *) MAKE_PTR(pOffset); + TransactionId xmin; + + xmin = proc->xmin; /* we don't use spin-locking in AbortTransaction() ! */ + if (proc == MyProc || xmin < FirstTransactionId) + continue; + if (xmin < *XmaxRecent) + *XmaxRecent = xmin; + } + } + + SpinRelease(SInvalLock); +} + +/* + * GetSnapshotData -- returns information about running transactions. + */ +Snapshot +GetSnapshotData(bool serializable) +{ + Snapshot snapshot = (Snapshot) malloc(sizeof(SnapshotData)); + SISeg *segP = shmInvalBuffer; + ProcState *stateP = segP->procState; + int index; + int count = 0; + + /* There can be no more than maxBackends active transactions, + * so this is enough space: + */ + snapshot->xip = (TransactionId *) + malloc(segP->maxBackends * sizeof(TransactionId)); + snapshot->xmin = GetCurrentTransactionId(); + + SpinAcquire(SInvalLock); + + /* + * Unfortunately, we have to call ReadNewTransactionId() + * after acquiring SInvalLock above. It's not good because + * ReadNewTransactionId() does SpinAcquire(OidGenLockId) but + * _necessary_. + */ + ReadNewTransactionId(&(snapshot->xmax)); + + for (index = 0; index < segP->maxBackends; index++) + { + SHMEM_OFFSET pOffset = stateP[index].procStruct; + + if (pOffset != INVALID_OFFSET) + { + PROC *proc = (PROC *) MAKE_PTR(pOffset); + TransactionId xid; + + /* + * We don't use spin-locking when changing proc->xid + * in GetNewTransactionId() and in AbortTransaction() !.. + */ + xid = proc->xid; + if (proc == MyProc || + xid < FirstTransactionId || xid >= snapshot->xmax) + { + /* + * Seems that there is no sense to store xid >= snapshot->xmax + * (what we got from ReadNewTransactionId above) in + * snapshot->xip - we just assume that all xacts with such + * xid-s are running and may be ignored. + */ + continue; + } + if (xid < snapshot->xmin) + snapshot->xmin = xid; + snapshot->xip[count] = xid; + count++; + } + } + + if (serializable) + MyProc->xmin = snapshot->xmin; + /* Serializable snapshot must be computed before any other... */ + Assert(MyProc->xmin != InvalidTransactionId); + + SpinRelease(SInvalLock); + + snapshot->xcnt = count; + return snapshot; +} diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c index 1a91dde9b1..f2e531be44 100644 --- a/src/backend/storage/ipc/sinvaladt.c +++ b/src/backend/storage/ipc/sinvaladt.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.26 1999/09/09 14:56:06 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.27 1999/09/24 00:24:35 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,8 @@ #include "miscadmin.h" #include "storage/backendid.h" #include "storage/lmgr.h" +#include "storage/proc.h" +#include "storage/sinval.h" #include "storage/sinvaladt.h" #include "utils/trace.h" @@ -125,6 +127,7 @@ SISegInit(SISeg *segP, int maxBackends) segP->procState[i].nextMsgNum = -1; /* inactive */ segP->procState[i].resetState = false; segP->procState[i].tag = InvalidBackendTag; + segP->procState[i].procStruct = INVALID_OFFSET; } } @@ -161,8 +164,8 @@ SIBackendInit(SISeg *segP) } } - /* elog() with spinlock held is probably not too cool, but these - * conditions should never happen anyway. + /* elog() with spinlock held is probably not too cool, but this + * condition should never happen anyway. */ if (stateP == NULL) { @@ -179,9 +182,10 @@ SIBackendInit(SISeg *segP) #endif /* INVALIDDEBUG */ /* mark myself active, with all extant messages already read */ - stateP->tag = MyBackendTag; - stateP->resetState = false; stateP->nextMsgNum = segP->maxMsgNum; + stateP->resetState = false; + stateP->tag = MyBackendTag; + stateP->procStruct = MAKE_OFFSET(MyProc); /* register exit routine to mark my entry inactive at exit */ on_shmem_exit(CleanupInvalidationState, (caddr_t) segP); @@ -193,7 +197,8 @@ SIBackendInit(SISeg *segP) * CleanupInvalidationState * Mark the current backend as no longer active. * - * This function is called via on_shmem_exit() during backend shutdown. + * This function is called via on_shmem_exit() during backend shutdown, + * so the caller has NOT acquired the lock for us. */ static void CleanupInvalidationState(int status, @@ -201,13 +206,14 @@ CleanupInvalidationState(int status, { Assert(PointerIsValid(segP)); - /* XXX we probably oughta grab the SInval spinlock for this... - * but I think it is safe not to. - */ + SpinAcquire(SInvalLock); segP->procState[MyBackendId - 1].nextMsgNum = -1; segP->procState[MyBackendId - 1].resetState = false; segP->procState[MyBackendId - 1].tag = InvalidBackendTag; + segP->procState[MyBackendId - 1].procStruct = INVALID_OFFSET; + + SpinRelease(SInvalLock); } /* diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 6186904ad4..159edf0549 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.60 1999/07/17 20:17:47 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.61 1999/09/24 00:24:41 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -46,7 +46,7 @@ * This is so that we can support more backends. (system-wide semaphore * sets run out pretty fast.) -ay 4/95 * - * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.60 1999/07/17 20:17:47 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.61 1999/09/24 00:24:41 tgl Exp $ */ #include #include @@ -296,6 +296,7 @@ InitProcess(IPCKey key) SpinRelease(ProcStructLock); MyProc->pid = MyProcPid; + MyProc->databaseId = MyDatabaseId; MyProc->xid = InvalidTransactionId; MyProc->xmin = InvalidTransactionId; diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 304dc786f2..444181a938 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.53 1999/09/05 23:24:53 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.54 1999/09/24 00:24:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -576,12 +576,10 @@ mdblindwrt(char *dbstr, /* this is work arround only !!! */ { char dbpath[MAXPGPATH + 1]; - int4 owner; Oid id; char *tmpPath; - int tmpEncoding; - GetRawDatabaseInfo(dbstr, &owner, &id, dbpath, &tmpEncoding); + GetRawDatabaseInfo(dbstr, &id, dbpath); if (id != dbid) elog(FATAL, "mdblindwrt: oid of db %s is not %u", dbstr, dbid); @@ -615,12 +613,10 @@ mdblindwrt(char *dbstr, /* this is work arround only !!! */ { char dbpath[MAXPGPATH + 1]; - int4 owner; Oid id; char *tmpPath; - int tmpEncoding; - GetRawDatabaseInfo(dbstr, &owner, &id, dbpath, &tmpEncoding); + GetRawDatabaseInfo(dbstr, &id, dbpath); if (id != dbid) elog(FATAL, "mdblindwrt: oid of db %s is not %u", dbstr, dbid); diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 3986bee47b..4947b29137 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.128 1999/08/31 04:26:40 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.129 1999/09/24 00:24:52 tgl Exp $ * * NOTES * this is the "main" module of the postgres backend and @@ -113,7 +113,22 @@ char relname[80]; /* current relation name */ /* note: these declarations had better match tcopprot.h */ DLLIMPORT sigjmp_buf Warn_restart; -bool InError; + +bool InError = true; + +/* + * Note: InError is a flag to elog() telling whether it is safe to longjmp + * back to PostgresMain. It is "false", allowing an error longjmp, during + * normal processing. It is "true" during startup, when we have not yet + * set the Warn_restart jmp_buf, and also "true" in the interval when we + * have executed a longjmp back to PostgresMain and not yet finished cleaning + * up after the error. In either case, elog(ERROR) should be treated as a + * fatal exit condition rather than attempting to recover --- since there is + * noplace to recover to in the first case, and we don't want to risk an + * infinite loop of "error recoveries" in the second case. + * + * Therefore, InError starts out "true" at program load time, as shown above. + */ extern int NBuffers; @@ -1469,7 +1484,7 @@ PostgresMain(int argc, char *argv[], int real_argc, char *real_argv[]) if (!IsUnderPostmaster) { puts("\nPOSTGRES backend interactive interface "); - puts("$Revision: 1.128 $ $Date: 1999/08/31 04:26:40 $\n"); + puts("$Revision: 1.129 $ $Date: 1999/09/24 00:24:52 $\n"); } /* ---------------- @@ -1479,6 +1494,7 @@ PostgresMain(int argc, char *argv[], int real_argc, char *real_argv[]) * so we abort the current transaction and start a new one. * * Note: elog(ERROR) does a siglongjmp() to transfer control here. + * See comments with the declaration of InError, above. * ---------------- */ diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 6307f0cd6a..cbbe82eb3b 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/tcop/pquery.c,v 1.27 1999/07/17 20:17:51 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/tcop/pquery.c,v 1.28 1999/09/24 00:24:53 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -55,7 +55,6 @@ CreateExecutorState(void) { EState *state; extern int NBuffers; - long *refcount; /* ---------------- * create a new executor state @@ -81,10 +80,6 @@ CreateExecutorState(void) state->es_junkFilter = NULL; - refcount = (long *) palloc(NBuffers * sizeof(long)); - MemSet((char *) refcount, 0, NBuffers * sizeof(long)); - state->es_refcount = (int *) refcount; - /* ---------------- * return the executor state structure * ---------------- diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 4118fe16a1..51d4727fe3 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.48 1999/07/17 20:18:08 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.49 1999/09/24 00:24:58 tgl Exp $ * * NOTES * InitPostgres() is the function called from PostgresMain @@ -36,6 +36,7 @@ #include "access/heapam.h" #include "catalog/catname.h" +#include "catalog/pg_database.h" #include "libpq/libpq.h" #include "miscadmin.h" #include "storage/backendid.h" @@ -54,13 +55,12 @@ static void VerifySystemDatabase(void); static void VerifyMyDatabase(void); +static void ReverifyMyDatabase(char *name); static void InitCommunication(void); static void InitMyDatabaseInfo(char *name); static void InitStdio(void); static void InitUserid(void); -extern char *ExpandDatabasePath(char *name); -extern void GetRawDatabaseInfo(char *name, int4 *owner, Oid *db_id, char *path, int *encoding); static IPCKey PostgresIpcKey; @@ -98,13 +98,11 @@ static IPCKey PostgresIpcKey; static void InitMyDatabaseInfo(char *name) { - int4 owner; char *path, myPath[MAXPGPATH + 1]; - int encoding; SetDatabaseName(name); - GetRawDatabaseInfo(name, &owner, &MyDatabaseId, myPath, &encoding); + GetRawDatabaseInfo(name, &MyDatabaseId, myPath); if (!OidIsValid(MyDatabaseId)) elog(FATAL, @@ -114,11 +112,6 @@ InitMyDatabaseInfo(char *name) path = ExpandDatabasePath(myPath); SetDatabasePath(path); -#ifdef MULTIBYTE - SetDatabaseEncoding(encoding); -#endif - - return; } /* InitMyDatabaseInfo() */ @@ -249,6 +242,86 @@ VerifyMyDatabase() /* Above does not return */ } /* VerifyMyDatabase() */ +/* -------------------------------- + * ReverifyMyDatabase + * + * Since we are forced to fetch the database OID out of pg_database without + * benefit of locking or transaction ID checking (see utils/misc/database.c), + * we might have gotten a wrong answer. Or, we might have attached to a + * database that's in process of being destroyed by destroydb(). This + * routine is called after we have all the locking and other infrastructure + * running --- now we can check that we are really attached to a valid + * database. + * + * In reality, if destroydb() is running in parallel with our startup, + * it's pretty likely that we will have failed before now, due to being + * unable to read some of the system tables within the doomed database. + * This routine just exists to make *sure* we have not started up in an + * invalid database. If we quit now, we should have managed to avoid + * creating any serious problems. + * + * This is also a handy place to fetch the database encoding info out + * of pg_database, if we are in MULTIBYTE mode. + * -------------------------------- + */ +static void +ReverifyMyDatabase(char *name) +{ + Relation pgdbrel; + HeapScanDesc pgdbscan; + ScanKeyData key; + HeapTuple tup; + + /* + * Because we grab AccessShareLock here, we can be sure that + * destroydb is not running in parallel with us (any more). + */ + pgdbrel = heap_openr(DatabaseRelationName, AccessShareLock); + + ScanKeyEntryInitialize(&key, 0, Anum_pg_database_datname, + F_NAMEEQ, NameGetDatum(name)); + + pgdbscan = heap_beginscan(pgdbrel, 0, SnapshotNow, 1, &key); + + tup = heap_getnext(pgdbscan, 0); + if (!HeapTupleIsValid(tup) || + tup->t_data->t_oid != MyDatabaseId) + { + /* OOPS */ + heap_close(pgdbrel, AccessShareLock); + /* + * The only real problem I could have created is to load dirty + * buffers for the dead database into shared buffer cache; + * if I did, some other backend will eventually try to write + * them and die in mdblindwrt. Flush any such pages to forestall + * trouble. + */ + DropBuffers(MyDatabaseId); + /* Now I can commit hara-kiri with a clear conscience... */ + elog(FATAL, "Database '%s', OID %u, has disappeared from pg_database", + name, MyDatabaseId); + } + + /* + * OK, we're golden. Only other to-do item is to save the MULTIBYTE + * encoding info out of the pg_database tuple. Note we also set the + * "template encoding", which is the default encoding for any + * CREATE DATABASE commands executed in this backend; essentially, + * you get the same encoding of the database you connected to as + * the default. (This replaces code that unreliably grabbed + * template1's encoding out of pg_database. We could do an extra + * scan to find template1's tuple, but for 99.99% of all backend + * startups it'd be wasted cycles --- and the 'createdb' script + * connects to template1 anyway, so there's no difference.) + */ +#ifdef MULTIBYTE + SetDatabaseEncoding(((Form_pg_database) GETSTRUCT(tup))->encoding); + SetTemplateEncoding(((Form_pg_database) GETSTRUCT(tup))->encoding); +#endif + + heap_endscan(pgdbscan); + heap_close(pgdbrel, AccessShareLock); +} /* -------------------------------- * InitUserid @@ -402,17 +475,11 @@ InitStdio() * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ -bool PostgresIsInitialized = false; extern int NBuffers; -/* - * this global is used by wei for testing his code, but must be declared - * here rather than in postgres.c so that it's defined for cinterface.a - * applications. - */ +bool PostgresIsInitialized = false; -/*int testFlag = 0;*/ -int lockingOff = 0; +int lockingOff = 0; /* backend -L switch */ /* */ @@ -530,22 +597,22 @@ InitPostgres(char *name) /* database name */ LockDisable(false); /* ---------------- - * anyone knows what this does? something having to do with - * system catalog cache invalidation in the case of multiple - * backends, I think -cim 10/3/90 - * Sets up MyBackendId a unique backend identifier. - * ---------------- - */ - InitSharedInvalidationState(); - - /* ---------------- - * Set up a per backend process in shared memory. Must be done after - * InitSharedInvalidationState() as it relies on MyBackendId being - * initialized already. XXX -mer 11 Aug 1991 + * Set up my per-backend PROC struct in shared memory. * ---------------- */ InitProcess(PostgresIpcKey); + /* ---------------- + * Initialize my entry in the shared-invalidation manager's + * array of per-backend data. (Formerly this came before + * InitProcess, but now it must happen after, because it uses + * MyProc.) Once I have done this, I am visible to other backends! + * + * Sets up MyBackendId, a unique backend identifier. + * ---------------- + */ + InitSharedInvalidationState(); + if (MyBackendId > MAXBACKENDS || MyBackendId <= 0) { elog(FATAL, "cinit2: bad backend id %d (%d)", @@ -592,7 +659,6 @@ InitPostgres(char *name) /* database name */ * ---------------- */ PostgresIsInitialized = true; -/* on_shmem_exit(DestroyLocalRelList, (caddr_t) NULL); */ /* ---------------- * Done with "InitPostgres", now change to NormalProcessing unless @@ -601,7 +667,14 @@ InitPostgres(char *name) /* database name */ */ if (!bootstrap) SetProcessingMode(NormalProcessing); -/* if (testFlag || lockingOff) */ if (lockingOff) LockDisable(true); + + /* + * Unless we are bootstrapping, double-check that InitMyDatabaseInfo() + * got a correct result. We can't do this until essentially all the + * infrastructure is up, so just do it at the end. + */ + if (!bootstrap) + ReverifyMyDatabase(name); } diff --git a/src/backend/utils/misc/database.c b/src/backend/utils/misc/database.c index 321ab943ae..f5ff732b8f 100644 --- a/src/backend/utils/misc/database.c +++ b/src/backend/utils/misc/database.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * database.c - * miscellanious initialization support stuff + * miscellaneous initialization support stuff * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/misc/Attic/database.c,v 1.29 1999/09/18 19:08:07 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/misc/Attic/database.c,v 1.30 1999/09/24 00:25:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,10 +21,6 @@ #include "miscadmin.h" #include "utils/syscache.h" -#ifdef MULTIBYTE -#include "mb/pg_wchar.h" -#endif - #ifdef NOT_USED /* GetDatabaseInfo() * Pull database information from pg_database. @@ -153,24 +149,13 @@ ExpandDatabasePath(char *dbpath) * cache. To get around this problem, this code opens and scans the * pg_database relation by hand. * - * This algorithm relies on the fact that first attribute in the - * pg_database relation schema is the database name. It also knows - * about the internal format of tuples on disk and the length of - * the datname attribute. It knows the location of the pg_database - * file. - * Actually, the code looks as though it is using the pg_database - * tuple definition to locate the database name, so the above statement - * seems to be no longer correct. - thomas 1997-11-01 - * - * This code is called from InitPostgres(), before we chdir() to the - * local database directory and before we open any relations. - * Used to be called after the chdir(), but we now want to confirm - * the location of the target database using pg_database info. - * - thomas 1997-11-01 + * This code knows way more than it should about the layout of + * tuples on disk, but there seems to be no help for that. + * We're pulling ourselves up by the bootstraps here... * -------------------------------- */ void -GetRawDatabaseInfo(char *name, int4 *owner, Oid *db_id, char *path, int *encoding) +GetRawDatabaseInfo(char *name, Oid *db_id, char *path) { int dbfd; int fileflags; @@ -238,48 +223,38 @@ GetRawDatabaseInfo(char *name, int4 *owner, Oid *db_id, char *path, int *encodin * skip this tuple. XXX warning, will robinson: violation of * transaction semantics happens right here. we should check * to be sure that the xact that deleted this tuple actually - * committed. only way to do this at init time is to paw over - * the log relation by hand, too. let's be optimistic. + * committed. Only way to do that at init time is to paw over + * the log relation by hand, too. Instead we take the + * conservative assumption that if someone tried to delete it, + * it's gone. The other side of the coin is that we might + * accept a tuple that was stored and never committed. All in + * all, this code is pretty shaky. We will cross-check our + * result in ReverifyMyDatabase() in postinit.c. * - * XXX This is an evil type cast. tup->t_xmax is char[5] while - * TransactionId is struct * { char data[5] }. It works but - * if data is ever moved and no longer the first field this - * will be broken!! -mer 11 Nov 1991. + * NOTE: if a bogus tuple in pg_database prevents connection + * to a valid database, a fix is to connect to another database + * and do "select * from pg_database". That should cause + * committed and dead tuples to be marked with correct states. + * + * XXX wouldn't it be better to let new backends read the + * database OID from a flat file, handled the same way + * we handle the password relation? */ if (TransactionIdIsValid((TransactionId) tup.t_data->t_xmax)) continue; /* - * Okay, see if this is the one we want. XXX 1 july 91: mao - * and mer discover that tuples now squash t_bits. Why is - * this? - * - * 24 july 92: mer realizes that the t_bits field is only used - * in the event of null values. If no fields are null we - * reduce the header size by doing the squash. t_hoff tells - * you exactly how big the header actually is. use the PC - * means of getting at sys cat attrs. + * Okay, see if this is the one we want. */ tup_db = (Form_pg_database) GETSTRUCT(&tup); -#ifdef MULTIBYTE - /* - * get encoding from template database. This is the "default - * for default" for create database command. - */ - if (strcmp("template1", tup_db->datname.data) == 0) - SetTemplateEncoding(tup_db->encoding); -#endif if (strcmp(name, tup_db->datname.data) == 0) { + /* Found it; extract the OID and the database path. */ *db_id = tup.t_data->t_oid; strncpy(path, VARDATA(&(tup_db->datpath)), (VARSIZE(&(tup_db->datpath)) - VARHDRSZ)); *(path + VARSIZE(&(tup_db->datpath)) - VARHDRSZ) = '\0'; -#ifdef MULTIBYTE - *encoding = tup_db->encoding; -#endif - goto done; } } diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index a23c81227e..662358f459 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: executor.h,v 1.37 1999/07/17 20:18:26 momjian Exp $ + * $Id: executor.h,v 1.38 1999/09/24 00:25:10 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -36,22 +36,6 @@ ) \ ) -/* -------------------------------- - * ExecIncrSlotBufferRefcnt - * - * When we pass around buffers in the tuple table, we have to - * be careful to increment reference counts appropriately. - * This is used mainly in the mergejoin code. - * -------------------------------- - */ -#define ExecIncrSlotBufferRefcnt(slot) \ -( \ - BufferIsValid((slot)->ttc_buffer) ? \ - IncrBufferRefCount((slot)->ttc_buffer) \ - : (void)NULL \ -) - - /* * prototypes from functions in execAmi.c */ diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index a9e603053f..ecd567c364 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -11,7 +11,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: miscadmin.h,v 1.40 1999/06/12 22:17:23 tgl Exp $ + * $Id: miscadmin.h,v 1.41 1999/09/24 00:25:16 tgl Exp $ * * NOTES * some of the information in this file will be moved to @@ -111,7 +111,7 @@ extern char *DatabaseName; extern char *DatabasePath; /* in utils/misc/database.c */ -extern void GetRawDatabaseInfo(char *name, int4 *owner, Oid *db_id, char *path, int *encoding); +extern void GetRawDatabaseInfo(char *name, Oid *db_id, char *path); extern int GetDatabaseInfo(char *name, int4 *owner, char *path); extern char *ExpandDatabasePath(char *path); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 132f052667..3263e500f2 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: execnodes.h,v 1.34 1999/08/21 03:49:08 tgl Exp $ + * $Id: execnodes.h,v 1.35 1999/09/24 00:25:22 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -205,7 +205,6 @@ typedef struct EState int es_BaseId; TupleTable es_tupleTable; JunkFilter *es_junkFilter; - int *es_refcount; uint32 es_processed; /* # of tuples processed */ Oid es_lastoid; /* last oid processed (by INSERT) */ List *es_rowMark; /* not good place, but there is no other */ diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index b98f0fb820..b38d3ff5c8 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: buf_internals.h,v 1.32 1999/09/18 19:08:18 tgl Exp $ + * $Id: buf_internals.h,v 1.33 1999/09/24 00:25:27 tgl Exp $ * * NOTE * If BUFFERPAGE0 is defined, then 0 will be used as a @@ -68,7 +68,7 @@ struct buftag (a)->relId = (xx_reln)->rd_lockInfo.lockRelId \ ) -#define BAD_BUFFER_ID(bid) ((bid<1) || (bid>(NBuffers))) +#define BAD_BUFFER_ID(bid) ((bid) < 1 || (bid) > NBuffers) #define INVALID_DESCRIPTOR (-3) /* @@ -168,7 +168,6 @@ extern bool BufTableInsert(BufferDesc *buf); extern BufferDesc *BufferDescriptors; extern BufferBlock BufferBlocks; extern long *PrivateRefCount; -extern long *LastRefCount; extern bits8 *BufferLocks; extern long *CommitInfoNeedsSave; extern SPINLOCK BufMgrLock; diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index b55230e136..fb901b8f44 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: bufmgr.h,v 1.30 1999/09/23 17:03:27 momjian Exp $ + * $Id: bufmgr.h,v 1.31 1999/09/24 00:25:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -71,12 +71,35 @@ extern int ShowPinTrace; /* * BufferIsValid - * True iff the refcnt of the local buffer is > 0 + * True iff the given buffer number is valid (either as a shared + * or local buffer). + * * Note: * BufferIsValid(InvalidBuffer) is False. * BufferIsValid(UnknownBuffer) is False. + * + * Note: For a long time this was defined the same as BufferIsPinned, + * that is it would say False if you didn't hold a pin on the buffer. + * I believe this was bogus and served only to mask logic errors. + * Code should always know whether it has a buffer reference, + * independently of the pin state. */ #define BufferIsValid(bufnum) \ +( \ + BufferIsLocal(bufnum) ? \ + ((bufnum) >= -NLocBuffer) \ + : \ + (! BAD_BUFFER_ID(bufnum)) \ +) + +/* + * BufferIsPinned + * True iff the buffer is pinned (also checks for valid buffer number). + * + * NOTE: what we check here is that *this* backend holds a pin on + * the buffer. We do not care whether some other backend does. + */ +#define BufferIsPinned(bufnum) \ ( \ BufferIsLocal(bufnum) ? \ ((bufnum) >= -NLocBuffer && LocalRefCount[-(bufnum) - 1] > 0) \ @@ -90,28 +113,27 @@ extern int ShowPinTrace; ) /* - * BufferIsPinned - * True iff the buffer is pinned (and therefore valid) + * IncrBufferRefCount + * Increment the pin count on a buffer that we have *already* pinned + * at least once. * - * Note: - * Smenatics are identical to BufferIsValid - * XXX - need to remove either one eventually. + * This macro cannot be used on a buffer we do not have pinned, + * because it doesn't change the shared buffer state. Therefore the + * Assert checks are for refcount > 0. Someone got this wrong once... */ -#define BufferIsPinned BufferIsValid - - #define IncrBufferRefCount(buffer) \ ( \ BufferIsLocal(buffer) ? \ ( \ - (void)AssertMacro(LocalRefCount[-(buffer) - 1] >= 0), \ - (void)LocalRefCount[-(buffer) - 1]++ \ + (void) AssertMacro((buffer) >= -NLocBuffer), \ + (void) AssertMacro(LocalRefCount[-(buffer) - 1] > 0), \ + (void) LocalRefCount[-(buffer) - 1]++ \ ) \ : \ ( \ - (void)AssertMacro(!BAD_BUFFER_ID(buffer)), \ - (void)AssertMacro(PrivateRefCount[(buffer) - 1] >= 0), \ - (void)PrivateRefCount[(buffer) - 1]++ \ + (void) AssertMacro(!BAD_BUFFER_ID(buffer)), \ + (void) AssertMacro(PrivateRefCount[(buffer) - 1] > 0), \ + (void) PrivateRefCount[(buffer) - 1]++ \ ) \ ) @@ -151,19 +173,18 @@ extern int BufferPoolCheckLeak(void); extern void FlushBufferPool(int StableMainMemoryFlag); extern BlockNumber BufferGetBlockNumber(Buffer buffer); extern BlockNumber RelationGetNumberOfBlocks(Relation relation); +extern int FlushRelationBuffers(Relation rel, BlockNumber block, + bool doFlush); extern void ReleaseRelationBuffers(Relation rel); extern void DropBuffers(Oid dbid); extern void PrintPinnedBufs(void); extern int BufferShmemSize(void); extern int ReleaseBuffer(Buffer buffer); -extern void BufferRefCountReset(int *refcountsave); -extern void BufferRefCountRestore(int *refcountsave); extern int SetBufferWriteMode(int mode); extern void SetBufferCommitInfoNeedsSave(Buffer buffer); -extern int BlowawayRelationBuffers(Relation rel, BlockNumber block); extern void UnlockBuffers(void); extern void LockBuffer(Buffer buffer, int mode); -#endif /* !defined(BufMgrIncluded) */ +#endif diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 4a5eb5533a..d28e936b33 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: proc.h,v 1.25 1999/07/15 23:04:13 momjian Exp $ + * $Id: proc.h,v 1.26 1999/09/24 00:25:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -51,7 +51,8 @@ typedef struct proc LOCK *waitLock; /* Lock we're sleeping on ... */ int token; /* type of lock we sleeping for */ int holdLock; /* while holding these locks */ - int pid; /* This procs process id */ + int pid; /* This backend's process id */ + Oid databaseId; /* OID of database this backend is using */ short sLocks[MAX_SPINS]; /* Spin lock stats */ SHM_QUEUE lockQueue; /* locks associated with current * transaction */ @@ -64,6 +65,7 @@ typedef struct proc * on your machine), or our free-semaphores bitmap won't work. You also must * not set it higher than your kernel's SEMMSL (max semaphores per set) * parameter, which is often around 25. + * * MAX_PROC_SEMS is the maximum number of per-process semaphores (those used * by the lock mgr) we can keep track of. It must be a multiple of * PROC_NSEMS_PER_SET. @@ -78,9 +80,9 @@ typedef struct procglobal int32 freeSemMap[MAX_PROC_SEMS / PROC_NSEMS_PER_SET]; /* - * In each freeSemMap entry, the PROC_NSEMS_PER_SET lsbs flag whether - * individual semaphores are in use, and the next higher bit is set to - * show that the entire set is allocated. + * In each freeSemMap entry, the PROC_NSEMS_PER_SET least-significant bits + * flag whether individual semaphores are in use, and the next higher bit + * is set to show that the entire set is allocated. */ } PROC_HDR; diff --git a/src/include/storage/shmem.h b/src/include/storage/shmem.h index d73404d154..c10aec4c91 100644 --- a/src/include/storage/shmem.h +++ b/src/include/storage/shmem.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: shmem.h,v 1.20 1999/07/16 17:07:38 momjian Exp $ + * $Id: shmem.h,v 1.21 1999/09/24 00:25:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -70,8 +70,6 @@ extern bool ShmemPIDLookup(int pid, SHMEM_OFFSET *locationPtr); extern SHMEM_OFFSET ShmemPIDDestroy(int pid); extern long *ShmemInitStruct(char *name, unsigned long size, bool *foundPtr); -extern bool TransactionIdIsInProgress(TransactionId xid); -extern void GetXmaxRecent(TransactionId *XmaxRecent); typedef int TableID; diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index 8d0f35a43c..19ce6375f0 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: sinval.h,v 1.12 1999/07/15 23:04:14 momjian Exp $ + * $Id: sinval.h,v 1.13 1999/09/24 00:25:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,5 +26,9 @@ extern void RegisterSharedInvalid(int cacheId, Index hashIndex, extern void InvalidateSharedInvalid(void (*invalFunction) (), void (*resetFunction) ()); +extern bool DatabaseHasActiveBackends(Oid databaseId); +extern bool TransactionIdIsInProgress(TransactionId xid); +extern void GetXmaxRecent(TransactionId *XmaxRecent); + #endif /* SINVAL_H */ diff --git a/src/include/storage/sinvaladt.h b/src/include/storage/sinvaladt.h index b9d349a4c5..7944f21a64 100644 --- a/src/include/storage/sinvaladt.h +++ b/src/include/storage/sinvaladt.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: sinvaladt.h,v 1.18 1999/09/06 19:37:37 tgl Exp $ + * $Id: sinvaladt.h,v 1.19 1999/09/24 00:25:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -15,6 +15,7 @@ #include "storage/ipc.h" #include "storage/itemptr.h" +#include "storage/shmem.h" /* * The shared cache invalidation manager is responsible for transmitting @@ -77,6 +78,7 @@ typedef struct ProcState int nextMsgNum; /* next message number to read, or -1 */ bool resetState; /* true, if backend has to reset its state */ int tag; /* backend tag received from postmaster */ + SHMEM_OFFSET procStruct; /* location of backend's PROC struct */ } ProcState; /* Shared cache invalidation memory segment */ diff --git a/src/test/regress/output/misc.source b/src/test/regress/output/misc.source index ff6b77291d..4eb0dafb1e 100644 --- a/src/test/regress/output/misc.source +++ b/src/test/regress/output/misc.source @@ -404,52 +404,61 @@ QUERY: SELECT p.name, p.hobbies.name, p.hobbies.equipment.name FROM person p; name |name |name -----+-----------+------------- mike |posthacking|advil -joe |basketball |peet's coffee +mike |posthacking|peet's coffee +joe |basketball |hightops sally|basketball |hightops -(3 rows) +(4 rows) QUERY: SELECT p.name, p.hobbies.name, p.hobbies.equipment.name FROM person* p; name |name |name -----+-----------+------------- mike |posthacking|advil -joe |basketball |peet's coffee +mike |posthacking|peet's coffee +joe |basketball |hightops sally|basketball |hightops jeff |posthacking|advil -(4 rows) +jeff |posthacking|peet's coffee +(6 rows) QUERY: SELECT p.hobbies.equipment.name, p.name, p.hobbies.name FROM person p; -name |name |name ---------+-----+----------- -advil |mike |posthacking -hightops|joe |basketball -hightops|sally|basketball -(3 rows) +name |name |name +-------------+-----+----------- +advil |mike |posthacking +peet's coffee|mike |posthacking +hightops |joe |basketball +hightops |sally|basketball +(4 rows) QUERY: SELECT p.hobbies.equipment.name, p.name, p.hobbies.name FROM person* p; -name |name |name ---------+-----+----------- -advil |mike |posthacking -hightops|joe |basketball -hightops|sally|basketball -advil |jeff |posthacking -(4 rows) +name |name |name +-------------+-----+----------- +advil |mike |posthacking +peet's coffee|mike |posthacking +hightops |joe |basketball +hightops |sally|basketball +advil |jeff |posthacking +peet's coffee|jeff |posthacking +(6 rows) QUERY: SELECT p.hobbies.equipment.name, p.hobbies.name, p.name FROM person p; name |name |name -------------+-----------+----- advil |posthacking|mike -peet's coffee|basketball |joe +peet's coffee|posthacking|mike +hightops |basketball |joe hightops |basketball |sally -(3 rows) +(4 rows) QUERY: SELECT p.hobbies.equipment.name, p.hobbies.name, p.name FROM person* p; name |name |name -------------+-----------+----- advil |posthacking|mike -peet's coffee|basketball |joe +peet's coffee|posthacking|mike +hightops |basketball |joe hightops |basketball |sally advil |posthacking|jeff -(4 rows) +peet's coffee|posthacking|jeff +(6 rows) QUERY: SELECT user_relns() AS user_relns ORDER BY user_relns;