diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 95eda7af47..c43bebce85 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -312,6 +312,10 @@ smgrDoPendingDeletes(bool isCommit) PendingRelDelete *pending; PendingRelDelete *prev; PendingRelDelete *next; + int nrels = 0, + i = 0, + maxrels = 8; + SMgrRelation *srels = palloc(maxrels * sizeof(SMgrRelation)); prev = NULL; for (pending = pendingDeletes; pending != NULL; pending = next) @@ -335,14 +339,32 @@ smgrDoPendingDeletes(bool isCommit) SMgrRelation srel; srel = smgropen(pending->relnode, pending->backend); - smgrdounlink(srel, false); - smgrclose(srel); + + /* extend the array if needed (double the size) */ + if (maxrels <= nrels) + { + maxrels *= 2; + srels = repalloc(srels, sizeof(SMgrRelation) * maxrels); + } + + srels[nrels++] = srel; } /* must explicitly free the list entry */ pfree(pending); /* prev does not change */ } } + + if (nrels > 0) + { + smgrdounlinkall(srels, nrels, false); + + for (i = 0; i < nrels; i++) + smgrclose(srels[i]); + } + + pfree(srels); + } /* diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 03ed41dc15..13b80aefc5 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -62,6 +62,7 @@ #define BUF_WRITTEN 0x01 #define BUF_REUSABLE 0x02 +#define DROP_RELS_BSEARCH_THRESHOLD 20 /* GUC variables */ bool zero_damaged_pages = false; @@ -107,6 +108,7 @@ static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, bool *foundPtr); static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln); static void AtProcExit_Buffers(int code, Datum arg); +static int rnode_comparator(const void *p1, const void *p2); /* @@ -2086,43 +2088,103 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum, } /* --------------------------------------------------------------------- - * DropRelFileNodeAllBuffers + * DropRelFileNodesAllBuffers * * This function removes from the buffer pool all the pages of all - * forks of the specified relation. It's equivalent to calling - * DropRelFileNodeBuffers once per fork with firstDelBlock = 0. + * forks of the specified relations. It's equivalent to calling + * DropRelFileNodeBuffers once per fork per relation with + * firstDelBlock = 0. * -------------------------------------------------------------------- */ void -DropRelFileNodeAllBuffers(RelFileNodeBackend rnode) +DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes) { - int i; + int i, + n = 0; + RelFileNode *nodes; + bool use_bsearch; + + if (nnodes == 0) + return; + + nodes = palloc(sizeof(RelFileNode) * nnodes); /* non-local relations */ /* If it's a local relation, it's localbuf.c's problem. */ - if (RelFileNodeBackendIsTemp(rnode)) + for (i = 0; i < nnodes; i++) { - if (rnode.backend == MyBackendId) - DropRelFileNodeAllLocalBuffers(rnode.node); + if (RelFileNodeBackendIsTemp(rnodes[i])) + { + if (rnodes[i].backend == MyBackendId) + DropRelFileNodeAllLocalBuffers(rnodes[i].node); + } + else + nodes[n++] = rnodes[i].node; + } + + /* + * If there are no non-local relations, then we're done. Release the memory + * and return. + */ + if (n == 0) + { + pfree(nodes); return; } + /* + * For low number of relations to drop just use a simple walk through, to + * save the bsearch overhead. The threshold to use is rather a guess than a + * exactly determined value, as it depends on many factors (CPU and RAM + * speeds, amount of shared buffers etc.). + */ + use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD; + + /* sort the list of rnodes if necessary */ + if (use_bsearch) + pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator); + for (i = 0; i < NBuffers; i++) { + RelFileNode *rnode = NULL; volatile BufferDesc *bufHdr = &BufferDescriptors[i]; /* * As in DropRelFileNodeBuffers, an unlocked precheck should be safe * and saves some cycles. */ - if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node)) + + if (!use_bsearch) + { + int j; + + for (j = 0; j < n; j++) + { + if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j])) + { + rnode = &nodes[j]; + break; + } + } + } + else + { + rnode = bsearch((const void *) &(bufHdr->tag.rnode), + nodes, n, sizeof(RelFileNode), + rnode_comparator); + } + + /* buffer doesn't belong to any of the given relfilenodes; skip it */ + if (rnode == NULL) continue; LockBufHdr(bufHdr); - if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node)) + if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode))) InvalidateBuffer(bufHdr); /* releases spinlock */ else UnlockBufHdr(bufHdr); } + + pfree(nodes); } /* --------------------------------------------------------------------- @@ -2953,3 +3015,30 @@ local_buffer_write_error_callback(void *arg) pfree(path); } } + +/* + * RelFileNode qsort/bsearch comparator; see RelFileNodeEquals. + */ +static int +rnode_comparator(const void *p1, const void *p2) +{ + RelFileNode n1 = *(RelFileNode *) p1; + RelFileNode n2 = *(RelFileNode *) p2; + + if (n1.relNode < n2.relNode) + return -1; + else if (n1.relNode > n2.relNode) + return 1; + + if (n1.dbNode < n2.dbNode) + return -1; + else if (n1.dbNode > n2.dbNode) + return 1; + + if (n1.spcNode < n2.spcNode) + return -1; + else if (n1.spcNode > n2.spcNode) + return 1; + else + return 0; +} diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 925238cd89..3aa6325481 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -390,7 +390,7 @@ smgrdounlink(SMgrRelation reln, bool isRedo) * Get rid of any remaining buffers for the relation. bufmgr will just * drop them without bothering to write the contents. */ - DropRelFileNodeAllBuffers(rnode); + DropRelFileNodesAllBuffers(&rnode, 1); /* * It'd be nice to tell the stats collector to forget it immediately, too. @@ -419,6 +419,86 @@ smgrdounlink(SMgrRelation reln, bool isRedo) (*(smgrsw[which].smgr_unlink)) (rnode, InvalidForkNumber, isRedo); } +/* + * smgrdounlinkall() -- Immediately unlink all forks of all given relations + * + * All forks of all given relations are removed from the store. This + * should not be used during transactional operations, since it can't be + * undone. + * + * If isRedo is true, it is okay for the underlying file(s) to be gone + * already. + * + * This is equivalent to calling smgrdounlink for each relation, but it's + * significantly quicker so should be preferred when possible. + */ +void +smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) +{ + int i = 0; + RelFileNodeBackend *rnodes; + ForkNumber forknum; + + if (nrels == 0) + return; + + /* + * create an array which contains all relations to be dropped, and + * close each relation's forks at the smgr level while at it + */ + rnodes = palloc(sizeof(RelFileNodeBackend) * nrels); + for (i = 0; i < nrels; i++) + { + RelFileNodeBackend rnode = rels[i]->smgr_rnode; + int which = rels[i]->smgr_which; + + rnodes[i] = rnode; + + /* Close the forks at smgr level */ + for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) + (*(smgrsw[which].smgr_close)) (rels[i], forknum); + } + + /* + * Get rid of any remaining buffers for the relations. bufmgr will just + * drop them without bothering to write the contents. + */ + DropRelFileNodesAllBuffers(rnodes, nrels); + + /* + * It'd be nice to tell the stats collector to forget them immediately, too. + * But we can't because we don't know the OIDs. + */ + + /* + * Send a shared-inval message to force other backends to close any + * dangling smgr references they may have for these rels. We should do + * this before starting the actual unlinking, in case we fail partway + * through that step. Note that the sinval messages will eventually come + * back to this backend, too, and thereby provide a backstop that we closed + * our own smgr rel. + */ + for (i = 0; i < nrels; i++) + CacheInvalidateSmgr(rnodes[i]); + + /* + * Delete the physical file(s). + * + * Note: smgr_unlink must treat deletion failure as a WARNING, not an + * ERROR, because we've already decided to commit or abort the current + * xact. + */ + + for (i = 0; i < nrels; i++) + { + int which = rels[i]->smgr_which; + for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) + (*(smgrsw[which].smgr_unlink)) (rnodes[i], forknum, isRedo); + } + + pfree(rnodes); +} + /* * smgrdounlinkfork() -- Immediately unlink one fork of a relation. * diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index d34034bcb7..2ad536b745 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -188,7 +188,7 @@ extern void FlushRelationBuffers(Relation rel); extern void FlushDatabaseBuffers(Oid dbid); extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum, BlockNumber firstDelBlock); -extern void DropRelFileNodeAllBuffers(RelFileNodeBackend rnode); +extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes); extern void DropDatabaseBuffers(Oid dbid); #define RelationGetNumberOfBlocks(reln) \ diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 4547a0f518..98b6f13137 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -85,6 +85,7 @@ extern void smgrcloseall(void); extern void smgrclosenode(RelFileNodeBackend rnode); extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); extern void smgrdounlink(SMgrRelation reln, bool isRedo); +extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo); extern void smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo); extern void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync);