tableam: Add table_finish_bulk_insert().

This replaces the previous calls of heap_sync() in places using
bulk-insert. By passing in the flags used for bulk-insert the AM can
decide (first at insert time and then during the finish call) which of
the optimizations apply to it, and what operations are necessary to
finish a bulk insert operation.

Also change HEAP_INSERT_* flags to TABLE_INSERT, and rename hi_options
to ti_options.

These changes are made even in copy.c, which hasn't yet been converted
to tableam. There's no harm in doing so.

Author: Andres Freund
Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
This commit is contained in:
Andres Freund 2019-04-01 14:41:42 -07:00
parent 26a76cb640
commit d45e401586
6 changed files with 78 additions and 46 deletions

View File

@ -540,6 +540,17 @@ tuple_lock_retry:
return result; return result;
} }
static void
heapam_finish_bulk_insert(Relation relation, int options)
{
/*
* If we skipped writing WAL, then we need to sync the heap (but not
* indexes since those use WAL anyway / don't go through tableam)
*/
if (options & HEAP_INSERT_SKIP_WAL)
heap_sync(relation);
}
/* ------------------------------------------------------------------------ /* ------------------------------------------------------------------------
* DDL related callbacks for heap AM. * DDL related callbacks for heap AM.
@ -2401,6 +2412,7 @@ static const TableAmRoutine heapam_methods = {
.tuple_delete = heapam_tuple_delete, .tuple_delete = heapam_tuple_delete,
.tuple_update = heapam_tuple_update, .tuple_update = heapam_tuple_update,
.tuple_lock = heapam_tuple_lock, .tuple_lock = heapam_tuple_lock,
.finish_bulk_insert = heapam_finish_bulk_insert,
.tuple_fetch_row_version = heapam_fetch_row_version, .tuple_fetch_row_version = heapam_fetch_row_version,
.tuple_get_latest_tid = heap_get_latest_tid, .tuple_get_latest_tid = heap_get_latest_tid,

View File

@ -319,7 +319,7 @@ static uint64 CopyTo(CopyState cstate);
static void CopyOneRowTo(CopyState cstate, static void CopyOneRowTo(CopyState cstate,
Datum *values, bool *nulls); Datum *values, bool *nulls);
static void CopyFromInsertBatch(CopyState cstate, EState *estate, static void CopyFromInsertBatch(CopyState cstate, EState *estate,
CommandId mycid, int hi_options, CommandId mycid, int ti_options,
ResultRelInfo *resultRelInfo, TupleTableSlot *myslot, ResultRelInfo *resultRelInfo, TupleTableSlot *myslot,
BulkInsertState bistate, BulkInsertState bistate,
int nBufferedTuples, HeapTuple *bufferedTuples, int nBufferedTuples, HeapTuple *bufferedTuples,
@ -2328,7 +2328,7 @@ CopyFrom(CopyState cstate)
PartitionTupleRouting *proute = NULL; PartitionTupleRouting *proute = NULL;
ErrorContextCallback errcallback; ErrorContextCallback errcallback;
CommandId mycid = GetCurrentCommandId(true); CommandId mycid = GetCurrentCommandId(true);
int hi_options = 0; /* start with default heap_insert options */ int ti_options = 0; /* start with default table_insert options */
BulkInsertState bistate; BulkInsertState bistate;
CopyInsertMethod insertMethod; CopyInsertMethod insertMethod;
uint64 processed = 0; uint64 processed = 0;
@ -2392,8 +2392,8 @@ CopyFrom(CopyState cstate)
* - data is being written to relfilenode created in this transaction * - data is being written to relfilenode created in this transaction
* then we can skip writing WAL. It's safe because if the transaction * then we can skip writing WAL. It's safe because if the transaction
* doesn't commit, we'll discard the table (or the new relfilenode file). * doesn't commit, we'll discard the table (or the new relfilenode file).
* If it does commit, we'll have done the heap_sync at the bottom of this * If it does commit, we'll have done the table_finish_bulk_insert() at
* routine first. * the bottom of this routine first.
* *
* As mentioned in comments in utils/rel.h, the in-same-transaction test * As mentioned in comments in utils/rel.h, the in-same-transaction test
* is not always set correctly, since in rare cases rd_newRelfilenodeSubid * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
@ -2437,9 +2437,9 @@ CopyFrom(CopyState cstate)
(cstate->rel->rd_createSubid != InvalidSubTransactionId || (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)) cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId))
{ {
hi_options |= HEAP_INSERT_SKIP_FSM; ti_options |= TABLE_INSERT_SKIP_FSM;
if (!XLogIsNeeded()) if (!XLogIsNeeded())
hi_options |= HEAP_INSERT_SKIP_WAL; ti_options |= TABLE_INSERT_SKIP_WAL;
} }
/* /*
@ -2491,7 +2491,7 @@ CopyFrom(CopyState cstate)
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("cannot perform FREEZE because the table was not created or truncated in the current subtransaction"))); errmsg("cannot perform FREEZE because the table was not created or truncated in the current subtransaction")));
hi_options |= HEAP_INSERT_FROZEN; ti_options |= TABLE_INSERT_FROZEN;
} }
/* /*
@ -2755,7 +2755,7 @@ CopyFrom(CopyState cstate)
{ {
MemoryContext oldcontext; MemoryContext oldcontext;
CopyFromInsertBatch(cstate, estate, mycid, hi_options, CopyFromInsertBatch(cstate, estate, mycid, ti_options,
prevResultRelInfo, myslot, bistate, prevResultRelInfo, myslot, bistate,
nBufferedTuples, bufferedTuples, nBufferedTuples, bufferedTuples,
firstBufferedLineNo); firstBufferedLineNo);
@ -2978,7 +2978,7 @@ CopyFrom(CopyState cstate)
if (nBufferedTuples == MAX_BUFFERED_TUPLES || if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
bufferedTuplesSize > 65535) bufferedTuplesSize > 65535)
{ {
CopyFromInsertBatch(cstate, estate, mycid, hi_options, CopyFromInsertBatch(cstate, estate, mycid, ti_options,
resultRelInfo, myslot, bistate, resultRelInfo, myslot, bistate,
nBufferedTuples, bufferedTuples, nBufferedTuples, bufferedTuples,
firstBufferedLineNo); firstBufferedLineNo);
@ -3015,7 +3015,7 @@ CopyFrom(CopyState cstate)
{ {
tuple = ExecFetchSlotHeapTuple(slot, true, NULL); tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
heap_insert(resultRelInfo->ri_RelationDesc, tuple, heap_insert(resultRelInfo->ri_RelationDesc, tuple,
mycid, hi_options, bistate); mycid, ti_options, bistate);
ItemPointerCopy(&tuple->t_self, &slot->tts_tid); ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
} }
@ -3050,13 +3050,13 @@ CopyFrom(CopyState cstate)
{ {
if (insertMethod == CIM_MULTI_CONDITIONAL) if (insertMethod == CIM_MULTI_CONDITIONAL)
{ {
CopyFromInsertBatch(cstate, estate, mycid, hi_options, CopyFromInsertBatch(cstate, estate, mycid, ti_options,
prevResultRelInfo, myslot, bistate, prevResultRelInfo, myslot, bistate,
nBufferedTuples, bufferedTuples, nBufferedTuples, bufferedTuples,
firstBufferedLineNo); firstBufferedLineNo);
} }
else else
CopyFromInsertBatch(cstate, estate, mycid, hi_options, CopyFromInsertBatch(cstate, estate, mycid, ti_options,
resultRelInfo, myslot, bistate, resultRelInfo, myslot, bistate,
nBufferedTuples, bufferedTuples, nBufferedTuples, bufferedTuples,
firstBufferedLineNo); firstBufferedLineNo);
@ -3106,12 +3106,7 @@ CopyFrom(CopyState cstate)
FreeExecutorState(estate); FreeExecutorState(estate);
/* table_finish_bulk_insert(cstate->rel, ti_options);
* If we skipped writing WAL, then we need to sync the heap (but not
* indexes since those use WAL anyway)
*/
if (hi_options & HEAP_INSERT_SKIP_WAL)
heap_sync(cstate->rel);
return processed; return processed;
} }
@ -3123,7 +3118,7 @@ CopyFrom(CopyState cstate)
*/ */
static void static void
CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid, CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
int hi_options, ResultRelInfo *resultRelInfo, int ti_options, ResultRelInfo *resultRelInfo,
TupleTableSlot *myslot, BulkInsertState bistate, TupleTableSlot *myslot, BulkInsertState bistate,
int nBufferedTuples, HeapTuple *bufferedTuples, int nBufferedTuples, HeapTuple *bufferedTuples,
uint64 firstBufferedLineNo) uint64 firstBufferedLineNo)
@ -3149,7 +3144,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
bufferedTuples, bufferedTuples,
nBufferedTuples, nBufferedTuples,
mycid, mycid,
hi_options, ti_options,
bistate); bistate);
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);

View File

@ -27,8 +27,8 @@
#include "access/heapam.h" #include "access/heapam.h"
#include "access/reloptions.h" #include "access/reloptions.h"
#include "access/htup_details.h" #include "access/htup_details.h"
#include "access/tableam.h"
#include "access/sysattr.h" #include "access/sysattr.h"
#include "access/tableam.h"
#include "access/xact.h" #include "access/xact.h"
#include "access/xlog.h" #include "access/xlog.h"
#include "catalog/namespace.h" #include "catalog/namespace.h"
@ -60,7 +60,7 @@ typedef struct
Relation rel; /* relation to write to */ Relation rel; /* relation to write to */
ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */ ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */
CommandId output_cid; /* cmin to insert in output tuples */ CommandId output_cid; /* cmin to insert in output tuples */
int hi_options; /* heap_insert performance options */ int ti_options; /* table_insert performance options */
BulkInsertState bistate; /* bulk insert state */ BulkInsertState bistate; /* bulk insert state */
} DR_intorel; } DR_intorel;
@ -558,8 +558,8 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
* We can skip WAL-logging the insertions, unless PITR or streaming * We can skip WAL-logging the insertions, unless PITR or streaming
* replication is in use. We can skip the FSM in any case. * replication is in use. We can skip the FSM in any case.
*/ */
myState->hi_options = HEAP_INSERT_SKIP_FSM | myState->ti_options = TABLE_INSERT_SKIP_FSM |
(XLogIsNeeded() ? 0 : HEAP_INSERT_SKIP_WAL); (XLogIsNeeded() ? 0 : TABLE_INSERT_SKIP_WAL);
myState->bistate = GetBulkInsertState(); myState->bistate = GetBulkInsertState();
/* Not using WAL requires smgr_targblock be initially invalid */ /* Not using WAL requires smgr_targblock be initially invalid */
@ -586,7 +586,7 @@ intorel_receive(TupleTableSlot *slot, DestReceiver *self)
table_insert(myState->rel, table_insert(myState->rel,
slot, slot,
myState->output_cid, myState->output_cid,
myState->hi_options, myState->ti_options,
myState->bistate); myState->bistate);
/* We know this is a newly created relation, so there are no indexes */ /* We know this is a newly created relation, so there are no indexes */
@ -604,9 +604,7 @@ intorel_shutdown(DestReceiver *self)
FreeBulkInsertState(myState->bistate); FreeBulkInsertState(myState->bistate);
/* If we skipped using WAL, must heap_sync before commit */ table_finish_bulk_insert(myState->rel, myState->ti_options);
if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
heap_sync(myState->rel);
/* close rel, but keep lock until commit */ /* close rel, but keep lock until commit */
table_close(myState->rel, NoLock); table_close(myState->rel, NoLock);

View File

@ -18,6 +18,7 @@
#include "access/heapam.h" #include "access/heapam.h"
#include "access/htup_details.h" #include "access/htup_details.h"
#include "access/multixact.h" #include "access/multixact.h"
#include "access/tableam.h"
#include "access/xact.h" #include "access/xact.h"
#include "access/xlog.h" #include "access/xlog.h"
#include "catalog/catalog.h" #include "catalog/catalog.h"
@ -53,7 +54,7 @@ typedef struct
/* These fields are filled by transientrel_startup: */ /* These fields are filled by transientrel_startup: */
Relation transientrel; /* relation to write to */ Relation transientrel; /* relation to write to */
CommandId output_cid; /* cmin to insert in output tuples */ CommandId output_cid; /* cmin to insert in output tuples */
int hi_options; /* heap_insert performance options */ int ti_options; /* table_insert performance options */
BulkInsertState bistate; /* bulk insert state */ BulkInsertState bistate; /* bulk insert state */
} DR_transientrel; } DR_transientrel;
@ -257,7 +258,7 @@ ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString,
* as open scans. * as open scans.
* *
* NB: We count on this to protect us against problems with refreshing the * NB: We count on this to protect us against problems with refreshing the
* data using HEAP_INSERT_FROZEN. * data using TABLE_INSERT_FROZEN.
*/ */
CheckTableNotInUse(matviewRel, "REFRESH MATERIALIZED VIEW"); CheckTableNotInUse(matviewRel, "REFRESH MATERIALIZED VIEW");
@ -461,9 +462,9 @@ transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
* We can skip WAL-logging the insertions, unless PITR or streaming * We can skip WAL-logging the insertions, unless PITR or streaming
* replication is in use. We can skip the FSM in any case. * replication is in use. We can skip the FSM in any case.
*/ */
myState->hi_options = HEAP_INSERT_SKIP_FSM | HEAP_INSERT_FROZEN; myState->ti_options = TABLE_INSERT_SKIP_FSM | TABLE_INSERT_FROZEN;
if (!XLogIsNeeded()) if (!XLogIsNeeded())
myState->hi_options |= HEAP_INSERT_SKIP_WAL; myState->ti_options |= TABLE_INSERT_SKIP_WAL;
myState->bistate = GetBulkInsertState(); myState->bistate = GetBulkInsertState();
/* Not using WAL requires smgr_targblock be initially invalid */ /* Not using WAL requires smgr_targblock be initially invalid */
@ -490,7 +491,7 @@ transientrel_receive(TupleTableSlot *slot, DestReceiver *self)
table_insert(myState->transientrel, table_insert(myState->transientrel,
slot, slot,
myState->output_cid, myState->output_cid,
myState->hi_options, myState->ti_options,
myState->bistate); myState->bistate);
/* We know this is a newly created relation, so there are no indexes */ /* We know this is a newly created relation, so there are no indexes */
@ -508,9 +509,7 @@ transientrel_shutdown(DestReceiver *self)
FreeBulkInsertState(myState->bistate); FreeBulkInsertState(myState->bistate);
/* If we skipped using WAL, must heap_sync before commit */ table_finish_bulk_insert(myState->transientrel, myState->ti_options);
if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
heap_sync(myState->transientrel);
/* close transientrel, but keep lock until commit */ /* close transientrel, but keep lock until commit */
table_close(myState->transientrel, NoLock); table_close(myState->transientrel, NoLock);

View File

@ -4687,7 +4687,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
EState *estate; EState *estate;
CommandId mycid; CommandId mycid;
BulkInsertState bistate; BulkInsertState bistate;
int hi_options; int ti_options;
ExprState *partqualstate = NULL; ExprState *partqualstate = NULL;
/* /*
@ -4704,7 +4704,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
newrel = NULL; newrel = NULL;
/* /*
* Prepare a BulkInsertState and options for heap_insert. Because we're * Prepare a BulkInsertState and options for table_insert. Because we're
* building a new heap, we can skip WAL-logging and fsync it to disk at * building a new heap, we can skip WAL-logging and fsync it to disk at
* the end instead (unless WAL-logging is required for archiving or * the end instead (unless WAL-logging is required for archiving or
* streaming replication). The FSM is empty too, so don't bother using it. * streaming replication). The FSM is empty too, so don't bother using it.
@ -4714,16 +4714,16 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
mycid = GetCurrentCommandId(true); mycid = GetCurrentCommandId(true);
bistate = GetBulkInsertState(); bistate = GetBulkInsertState();
hi_options = HEAP_INSERT_SKIP_FSM; ti_options = TABLE_INSERT_SKIP_FSM;
if (!XLogIsNeeded()) if (!XLogIsNeeded())
hi_options |= HEAP_INSERT_SKIP_WAL; ti_options |= TABLE_INSERT_SKIP_WAL;
} }
else else
{ {
/* keep compiler quiet about using these uninitialized */ /* keep compiler quiet about using these uninitialized */
mycid = 0; mycid = 0;
bistate = NULL; bistate = NULL;
hi_options = 0; ti_options = 0;
} }
/* /*
@ -4977,7 +4977,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
/* Write the tuple out to the new relation */ /* Write the tuple out to the new relation */
if (newrel) if (newrel)
table_insert(newrel, insertslot, mycid, hi_options, bistate); table_insert(newrel, insertslot, mycid, ti_options, bistate);
ResetExprContext(econtext); ResetExprContext(econtext);
@ -5000,9 +5000,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
{ {
FreeBulkInsertState(bistate); FreeBulkInsertState(bistate);
/* If we skipped writing WAL, then we need to sync the heap. */ table_finish_bulk_insert(newrel, ti_options);
if (hi_options & HEAP_INSERT_SKIP_WAL)
heap_sync(newrel);
table_close(newrel, NoLock); table_close(newrel, NoLock);
} }

View File

@ -380,6 +380,21 @@ typedef struct TableAmRoutine
uint8 flags, uint8 flags,
TM_FailureData *tmfd); TM_FailureData *tmfd);
/*
* Perform operations necessary to complete insertions made via
* tuple_insert and multi_insert with a BulkInsertState specified. This
* e.g. may e.g. used to flush the relation when inserting with
* TABLE_INSERT_SKIP_WAL specified.
*
* Typically callers of tuple_insert and multi_insert will just pass all
* the flags the apply to them, and each AM has to decide which of them
* make sense for it, and then only take actions in finish_bulk_insert
* that make sense for a specific AM.
*
* Optional callback.
*/
void (*finish_bulk_insert) (Relation rel, int options);
/* ------------------------------------------------------------------------ /* ------------------------------------------------------------------------
* DDL related functionality. * DDL related functionality.
@ -1011,7 +1026,8 @@ table_compute_xid_horizon_for_tuples(Relation rel,
* *
* *
* The BulkInsertState object (if any; bistate can be NULL for default * The BulkInsertState object (if any; bistate can be NULL for default
* behavior) is also just passed through to RelationGetBufferForTuple. * behavior) is also just passed through to RelationGetBufferForTuple. If
* `bistate` is provided, table_finish_bulk_insert() needs to be called.
* *
* On return the slot's tts_tid and tts_tableOid are updated to reflect the * On return the slot's tts_tid and tts_tableOid are updated to reflect the
* insertion. But note that any toasting of fields within the slot is NOT * insertion. But note that any toasting of fields within the slot is NOT
@ -1185,6 +1201,20 @@ table_lock_tuple(Relation rel, ItemPointer tid, Snapshot snapshot,
flags, tmfd); flags, tmfd);
} }
/*
* Perform operations necessary to complete insertions made via
* tuple_insert and multi_insert with a BulkInsertState specified. This
* e.g. may e.g. used to flush the relation when inserting with
* TABLE_INSERT_SKIP_WAL specified.
*/
static inline void
table_finish_bulk_insert(Relation rel, int options)
{
/* optional callback */
if (rel->rd_tableam && rel->rd_tableam->finish_bulk_insert)
rel->rd_tableam->finish_bulk_insert(rel, options);
}
/* ------------------------------------------------------------------------ /* ------------------------------------------------------------------------
* DDL related functionality. * DDL related functionality.