Avoid WAL-logging individual tuple insertions during CREATE TABLE AS

(a/k/a SELECT INTO).  Instead, flush and fsync the whole relation before
committing.  We do still need the WAL log when PITR is active, however.
Simon Riggs and Tom Lane.
This commit is contained in:
Tom Lane 2005-06-20 18:37:02 +00:00
parent 1bfdd1a893
commit b95ae32b41
9 changed files with 108 additions and 27 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.194 2005/06/08 15:50:21 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.195 2005/06/20 18:37:01 tgl Exp $
*
*
* INTERFACE ROUTINES
@ -1034,9 +1034,20 @@ heap_get_latest_tid(Relation relation,
*
* The new tuple is stamped with current transaction ID and the specified
* command ID.
*
* If use_wal is false, the new tuple is not logged in WAL, even for a
* non-temp relation. Safe usage of this behavior requires that we arrange
* that all new tuples go into new pages not containing any tuples from other
* transactions, that the relation gets fsync'd before commit, and that the
* transaction emits at least one WAL record to ensure RecordTransactionCommit
* will decide to WAL-log the commit.
*
* use_fsm is passed directly to RelationGetBufferForTuple, which see for
* more info.
*/
Oid
heap_insert(Relation relation, HeapTuple tup, CommandId cid)
heap_insert(Relation relation, HeapTuple tup, CommandId cid,
bool use_wal, bool use_fsm)
{
TransactionId xid = GetCurrentTransactionId();
Buffer buffer;
@ -1086,7 +1097,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
heap_tuple_toast_attrs(relation, tup, NULL);
/* Find buffer to insert this tuple into */
buffer = RelationGetBufferForTuple(relation, tup->t_len, InvalidBuffer);
buffer = RelationGetBufferForTuple(relation, tup->t_len,
InvalidBuffer, use_fsm);
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@ -1096,7 +1108,12 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
pgstat_count_heap_insert(&relation->pgstat_info);
/* XLOG stuff */
if (!relation->rd_istemp)
if (relation->rd_istemp)
{
/* No XLOG record, but still need to flag that XID exists on disk */
MyXactMadeTempRelUpdate = true;
}
else if (use_wal)
{
xl_heap_insert xlrec;
xl_heap_header xlhdr;
@ -1151,11 +1168,6 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
else
{
/* No XLOG record, but still need to flag that XID exists on disk */
MyXactMadeTempRelUpdate = true;
}
END_CRIT_SECTION();
@ -1183,7 +1195,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
Oid
simple_heap_insert(Relation relation, HeapTuple tup)
{
return heap_insert(relation, tup, GetCurrentCommandId());
return heap_insert(relation, tup, GetCurrentCommandId(), true, true);
}
/*
@ -1743,7 +1755,7 @@ l2:
{
/* Assume there's no chance to put newtup on same page. */
newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
buffer);
buffer, true);
}
else
{
@ -1760,7 +1772,7 @@ l2:
*/
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
buffer);
buffer, true);
}
else
{

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.56 2005/05/07 21:32:23 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.57 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -79,12 +79,26 @@ RelationPutHeapTuple(Relation relation,
* happen if space is freed in that page after heap_update finds there's not
* enough there). In that case, the page will be pinned and locked only once.
*
* If use_fsm is true (the normal case), we use FSM to help us find free
* space. If use_fsm is false, we always append a new empty page to the
* end of the relation if the tuple won't fit on the current target page.
* This can save some cycles when we know the relation is new and doesn't
* contain useful amounts of free space.
*
* The use_fsm = false case is also useful for non-WAL-logged additions to a
* relation, if the caller holds exclusive lock and is careful to invalidate
* relation->rd_targblock before the first insertion --- that ensures that
* all insertions will occur into newly added pages and not be intermixed
* with tuples from other transactions. That way, a crash can't risk losing
* any committed data of other transactions. (See heap_insert's comments
* for additional constraints needed for safe usage of this behavior.)
*
* ereport(ERROR) is allowed here, so this routine *must* be called
* before any (unlogged) changes are made in buffer pool.
*/
Buffer
RelationGetBufferForTuple(Relation relation, Size len,
Buffer otherBuffer)
Buffer otherBuffer, bool use_fsm)
{
Buffer buffer = InvalidBuffer;
Page pageHeader;
@ -121,11 +135,14 @@ RelationGetBufferForTuple(Relation relation, Size len,
* on each page that proves not to be suitable.) If the FSM has no
* record of a page with enough free space, we give up and extend the
* relation.
*
* When use_fsm is false, we either put the tuple onto the existing
* target page or extend the relation.
*/
targetBlock = relation->rd_targblock;
if (targetBlock == InvalidBlockNumber)
if (targetBlock == InvalidBlockNumber && use_fsm)
{
/*
* We have no cached target page, so ask the FSM for an initial
@ -209,6 +226,10 @@ RelationGetBufferForTuple(Relation relation, Size len,
ReleaseBuffer(buffer);
}
/* Without FSM, always fall out of the loop and extend */
if (!use_fsm)
break;
/*
* Update FSM as to condition of this page, and ask for another
* page to try.

View File

@ -26,13 +26,14 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.249 2005/05/22 22:30:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.250 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/heapam.h"
#include "access/xlog.h"
#include "catalog/heap.h"
#include "catalog/namespace.h"
#include "commands/tablecmds.h"
@ -44,6 +45,7 @@
#include "optimizer/clauses.h"
#include "optimizer/var.h"
#include "parser/parsetree.h"
#include "storage/smgr.h"
#include "utils/acl.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
@ -784,6 +786,20 @@ InitPlan(QueryDesc *queryDesc, bool explainOnly)
* And open the constructed table for writing.
*/
intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
/* use_wal off requires rd_targblock be initially invalid */
Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);
/*
* We can skip WAL-logging the insertions, unless PITR is in use.
*
* Note that for a non-temp INTO table, this is safe only because
* we know that the catalog changes above will have been WAL-logged,
* and so RecordTransactionCommit will think it needs to WAL-log the
* eventual transaction commit. Else the commit might be lost, even
* though all the data is safely fsync'd ...
*/
estate->es_into_relation_use_wal = XLogArchivingActive();
}
estate->es_into_relation_descriptor = intoRelationDesc;
@ -979,7 +995,22 @@ ExecEndPlan(PlanState *planstate, EState *estate)
* close the "into" relation if necessary, again keeping lock
*/
if (estate->es_into_relation_descriptor != NULL)
{
/*
* If we skipped using WAL, and it's not a temp relation,
* we must force the relation down to disk before it's
* safe to commit the transaction. This requires forcing
* out any dirty buffers and then doing a forced fsync.
*/
if (!estate->es_into_relation_use_wal &&
!estate->es_into_relation_descriptor->rd_istemp)
{
FlushRelationBuffers(estate->es_into_relation_descriptor);
smgrimmedsync(estate->es_into_relation_descriptor->rd_smgr);
}
heap_close(estate->es_into_relation_descriptor, NoLock);
}
/*
* close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
@ -1307,7 +1338,9 @@ ExecSelect(TupleTableSlot *slot,
tuple = ExecCopySlotTuple(slot);
heap_insert(estate->es_into_relation_descriptor, tuple,
estate->es_snapshot->curcid);
estate->es_snapshot->curcid,
estate->es_into_relation_use_wal,
false); /* never any point in using FSM */
/* we know there are no indexes to update */
heap_freetuple(tuple);
IncrAppended();
@ -1386,7 +1419,8 @@ ExecInsert(TupleTableSlot *slot,
* insert the tuple
*/
newId = heap_insert(resultRelationDesc, tuple,
estate->es_snapshot->curcid);
estate->es_snapshot->curcid,
true, true);
IncrAppended();
(estate->es_processed)++;
@ -2089,6 +2123,7 @@ EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq)
epqstate->es_result_relation_info = estate->es_result_relation_info;
epqstate->es_junkFilter = estate->es_junkFilter;
epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor;
epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal;
epqstate->es_param_list_info = estate->es_param_list_info;
if (estate->es_topPlan->nParamExec > 0)
epqstate->es_param_exec_vals = (ParamExecData *)

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.123 2005/04/28 21:47:12 tgl Exp $
* $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.124 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -186,7 +186,9 @@ CreateExecutorState(void)
estate->es_result_relation_info = NULL;
estate->es_junkFilter = NULL;
estate->es_into_relation_descriptor = NULL;
estate->es_into_relation_use_wal = false;
estate->es_param_list_info = NULL;
estate->es_param_exec_vals = NULL;

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.115 2005/05/29 04:23:05 tgl Exp $
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.116 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -660,6 +660,9 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
/*
* mdimmedsync() -- Immediately sync a relation to stable storage.
*
* Note that only writes already issued are synced; this routine knows
* nothing of dirty buffers that may exist inside the buffer manager.
*/
bool
mdimmedsync(SMgrRelation reln)

View File

@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.90 2005/06/17 22:32:46 tgl Exp $
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.91 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -650,7 +650,8 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
/*
* smgrimmedsync() -- Force the specified relation to stable storage.
*
* Synchronously force all of the specified relation down to disk.
* Synchronously force all previous writes to the specified relation
* down to disk.
*
* This is useful for building completely new relations (eg, new
* indexes). Instead of incrementally WAL-logging the index build
@ -664,6 +665,10 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
*
* The preceding writes should specify isTemp = true to avoid
* duplicative fsyncs.
*
* Note that you need to do FlushRelationBuffers() first if there is
* any possibility that there are dirty buffers for the relation;
* otherwise the sync is not very meaningful.
*/
void
smgrimmedsync(SMgrRelation reln)

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.101 2005/06/06 17:01:24 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.102 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -156,7 +156,8 @@ extern ItemPointer heap_get_latest_tid(Relation relation, Snapshot snapshot,
ItemPointer tid);
extern void setLastTid(const ItemPointer tid);
extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid);
extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid,
bool use_wal, bool use_fsm);
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid,
CommandId cid, Snapshot crosscheck, bool wait);
extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple tup,

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/hio.h,v 1.27 2004/12/31 22:03:21 pgsql Exp $
* $PostgreSQL: pgsql/src/include/access/hio.h,v 1.28 2005/06/20 18:37:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -19,6 +19,6 @@
extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
HeapTuple tuple);
extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
Buffer otherBuffer);
Buffer otherBuffer, bool use_fsm);
#endif /* HIO_H */

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.134 2005/06/15 07:27:44 neilc Exp $
* $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.135 2005/06/20 18:37:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -304,7 +304,9 @@ typedef struct EState
ResultRelInfo *es_result_relation_info; /* currently active array
* elt */
JunkFilter *es_junkFilter; /* currently active junk filter */
Relation es_into_relation_descriptor; /* for SELECT INTO */
bool es_into_relation_use_wal;
/* Parameter info: */
ParamListInfo es_param_list_info; /* values of external params */