Code review for transaction commit timestamps

There are three main changes here:

1. No longer cause a start failure in a standby if the feature is
disabled in postgresql.conf but enabled in the master.  This reverts one
part of commit 4f3924d9cd43; what we keep is the ability of the standby
to activate/deactivate the module (which includes creating and removing
segments as appropriate) during replay of such actions in the master.

2. Replay WAL records affecting commitTS even if the feature is
disabled.  This means the standby will always have the same state as the
master after replay.

3. Have COMMIT PREPARE record the transaction commit time as well.  We
were previously only applying it in the normal transaction commit path.

Author: Petr Jelínek
Discussion: http://www.postgresql.org/message-id/CAHGQGwHereDzzzmfxEBYcVQu3oZv6vZcgu1TPeERWbDc+gQ06g@mail.gmail.com
Discussion: http://www.postgresql.org/message-id/CAHGQGwFuzfO4JscM9LCAmCDCxp_MfLvN4QdB+xWsS-FijbjTYQ@mail.gmail.com

Additionally, I cleaned up nearby code related to replication origins,
which I found a bit hard to follow, and fixed a couple of typos.

Backpatch to 9.5, where this code was introduced.

Per bug reports from Fujii Masao and subsequent discussion.
This commit is contained in:
Alvaro Herrera 2015-09-29 14:40:56 -03:00
parent b631a46ed8
commit 6b61955135
6 changed files with 90 additions and 48 deletions

View File

@ -111,7 +111,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
appendStringInfo(buf, "max_connections=%d max_worker_processes=%d " appendStringInfo(buf, "max_connections=%d max_worker_processes=%d "
"max_prepared_xacts=%d max_locks_per_xact=%d " "max_prepared_xacts=%d max_locks_per_xact=%d "
"wal_level=%s wal_log_hints=%s " "wal_level=%s wal_log_hints=%s "
"track_commit_timestamps=%s", "track_commit_timestamp=%s",
xlrec.MaxConnections, xlrec.MaxConnections,
xlrec.max_worker_processes, xlrec.max_worker_processes,
xlrec.max_prepared_xacts, xlrec.max_prepared_xacts,

View File

@ -122,29 +122,39 @@ static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids,
* subtrans implementation changes in the future, we might want to revisit the * subtrans implementation changes in the future, we might want to revisit the
* decision of storing timestamp info for each subxid. * decision of storing timestamp info for each subxid.
* *
* The do_xlog parameter tells us whether to include an XLog record of this * The replaying_xlog parameter indicates whether the module should execute
* or not. Normal path through RecordTransactionCommit() will be related * its write even if the feature is nominally disabled, because we're replaying
* to a transaction commit XLog record, and so should pass "false" here. * a record generated from a master where the feature is enabled.
* Other callers probably want to pass true, so that the given values persist *
* in case of crashes. * The write_xlog parameter tells us whether to include an XLog record of this
* or not. Normally, this is called from transaction commit routines (both
* normal and prepared) and the information will be stored in the transaction
* commit XLog record, and so they should pass "false" for this. The XLog redo
* code should use "false" here as well. Other callers probably want to pass
* true, so that the given values persist in case of crashes.
*/ */
void void
TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids, TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
TransactionId *subxids, TimestampTz timestamp, TransactionId *subxids, TimestampTz timestamp,
RepOriginId nodeid, bool do_xlog) RepOriginId nodeid,
bool replaying_xlog, bool write_xlog)
{ {
int i; int i;
TransactionId headxid; TransactionId headxid;
TransactionId newestXact; TransactionId newestXact;
if (!track_commit_timestamp) /* We'd better not try to write xlog during replay */
Assert(!(write_xlog && replaying_xlog));
/* No-op if feature not enabled, unless replaying WAL */
if (!track_commit_timestamp && !replaying_xlog)
return; return;
/* /*
* Comply with the WAL-before-data rule: if caller specified it wants this * Comply with the WAL-before-data rule: if caller specified it wants this
* value to be recorded in WAL, do so before touching the data. * value to be recorded in WAL, do so before touching the data.
*/ */
if (do_xlog) if (write_xlog)
WriteSetTimestampXlogRec(xid, nsubxids, subxids, timestamp, nodeid); WriteSetTimestampXlogRec(xid, nsubxids, subxids, timestamp, nodeid);
/* /*
@ -906,7 +916,8 @@ commit_ts_redo(XLogReaderState *record)
subxids = NULL; subxids = NULL;
TransactionTreeSetCommitTsData(setts->mainxid, nsubxids, subxids, TransactionTreeSetCommitTsData(setts->mainxid, nsubxids, subxids,
setts->timestamp, setts->nodeid, false); setts->timestamp, setts->nodeid, false,
true);
if (subxids) if (subxids)
pfree(subxids); pfree(subxids);
} }

View File

@ -41,6 +41,7 @@
#include <time.h> #include <time.h>
#include <unistd.h> #include <unistd.h>
#include "access/commit_ts.h"
#include "access/htup_details.h" #include "access/htup_details.h"
#include "access/subtrans.h" #include "access/subtrans.h"
#include "access/transam.h" #include "access/transam.h"
@ -56,8 +57,9 @@
#include "miscadmin.h" #include "miscadmin.h"
#include "pg_trace.h" #include "pg_trace.h"
#include "pgstat.h" #include "pgstat.h"
#include "replication/walsender.h" #include "replication/origin.h"
#include "replication/syncrep.h" #include "replication/syncrep.h"
#include "replication/walsender.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/predicate.h" #include "storage/predicate.h"
@ -2070,8 +2072,9 @@ RecoverPreparedTransactions(void)
/* /*
* RecordTransactionCommitPrepared * RecordTransactionCommitPrepared
* *
* This is basically the same as RecordTransactionCommit: in particular, * This is basically the same as RecordTransactionCommit (q.v. if you change
* we must set the delayChkpt flag to avoid a race condition. * this function): in particular, we must set the delayChkpt flag to avoid a
* race condition.
* *
* We know the transaction made at least one XLOG entry (its PREPARE), * We know the transaction made at least one XLOG entry (its PREPARE),
* so it is never possible to optimize out the commit record. * so it is never possible to optimize out the commit record.
@ -2087,6 +2090,15 @@ RecordTransactionCommitPrepared(TransactionId xid,
bool initfileinval) bool initfileinval)
{ {
XLogRecPtr recptr; XLogRecPtr recptr;
TimestampTz committs = GetCurrentTimestamp();
bool replorigin;
/*
* Are we using the replication origins feature? Or, in other words, are
* we replaying remote actions?
*/
replorigin = (replorigin_session_origin != InvalidRepOriginId &&
replorigin_session_origin != DoNotReplicateId);
START_CRIT_SECTION(); START_CRIT_SECTION();
@ -2094,12 +2106,33 @@ RecordTransactionCommitPrepared(TransactionId xid,
MyPgXact->delayChkpt = true; MyPgXact->delayChkpt = true;
/* Emit the XLOG commit record */ /* Emit the XLOG commit record */
recptr = XactLogCommitRecord(GetCurrentTimestamp(), recptr = XactLogCommitRecord(committs,
nchildren, children, nrels, rels, nchildren, children, nrels, rels,
ninvalmsgs, invalmsgs, ninvalmsgs, invalmsgs,
initfileinval, false, initfileinval, false,
xid); xid);
if (replorigin)
/* Move LSNs forward for this replication origin */
replorigin_session_advance(replorigin_session_origin_lsn,
XactLastRecEnd);
/*
* Record commit timestamp. The value comes from plain commit timestamp
* if replorigin is not enabled, or replorigin already set a value for us
* in replorigin_session_origin_timestamp otherwise.
*
* We don't need to WAL-log anything here, as the commit record written
* above already contains the data.
*/
if (!replorigin || replorigin_session_origin_timestamp == 0)
replorigin_session_origin_timestamp = committs;
TransactionTreeSetCommitTsData(xid, nchildren, children,
replorigin_session_origin_timestamp,
replorigin_session_origin, false, false);
/* /*
* We don't currently try to sleep before flush here ... nor is there any * We don't currently try to sleep before flush here ... nor is there any
* support for async commit of a prepared xact (the very idea is probably * support for async commit of a prepared xact (the very idea is probably

View File

@ -42,9 +42,9 @@
#include "miscadmin.h" #include "miscadmin.h"
#include "pgstat.h" #include "pgstat.h"
#include "replication/logical.h" #include "replication/logical.h"
#include "replication/walsender.h"
#include "replication/syncrep.h"
#include "replication/origin.h" #include "replication/origin.h"
#include "replication/syncrep.h"
#include "replication/walsender.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/predicate.h" #include "storage/predicate.h"
@ -1119,6 +1119,8 @@ AtSubStart_ResourceOwner(void)
* *
* Returns latest XID among xact and its children, or InvalidTransactionId * Returns latest XID among xact and its children, or InvalidTransactionId
* if the xact has no XID. (We compute that here just because it's easier.) * if the xact has no XID. (We compute that here just because it's easier.)
*
* If you change this function, see RecordTransactionCommitPrepared also.
*/ */
static TransactionId static TransactionId
RecordTransactionCommit(void) RecordTransactionCommit(void)
@ -1172,6 +1174,15 @@ RecordTransactionCommit(void)
} }
else else
{ {
bool replorigin;
/*
* Are we using the replication origins feature? Or, in other words,
* are we replaying remote actions?
*/
replorigin = (replorigin_session_origin != InvalidRepOriginId &&
replorigin_session_origin != DoNotReplicateId);
/* /*
* Begin commit critical section and insert the commit XLOG record. * Begin commit critical section and insert the commit XLOG record.
*/ */
@ -1206,26 +1217,28 @@ RecordTransactionCommit(void)
RelcacheInitFileInval, forceSyncCommit, RelcacheInitFileInval, forceSyncCommit,
InvalidTransactionId /* plain commit */ ); InvalidTransactionId /* plain commit */ );
/* if (replorigin)
* Record plain commit ts if not replaying remote actions, or if no /* Move LSNs forward for this replication origin */
* timestamp is configured.
*/
if (replorigin_session_origin == InvalidRepOriginId ||
replorigin_session_origin == DoNotReplicateId ||
replorigin_session_origin_timestamp == 0)
replorigin_session_origin_timestamp = xactStopTimestamp;
else
replorigin_session_advance(replorigin_session_origin_lsn, replorigin_session_advance(replorigin_session_origin_lsn,
XactLastRecEnd); XactLastRecEnd);
/* /*
* We don't need to WAL log origin or timestamp here, the commit * Record commit timestamp. The value comes from plain commit
* record contains all the necessary information and will redo the SET * timestamp if there's no replication origin; otherwise, the
* action during replay. * timestamp was already set in replorigin_session_origin_timestamp by
* replication.
*
* We don't need to WAL-log anything here, as the commit record
* written above already contains the data.
*/ */
if (!replorigin || replorigin_session_origin_timestamp == 0)
replorigin_session_origin_timestamp = xactStopTimestamp;
TransactionTreeSetCommitTsData(xid, nchildren, children, TransactionTreeSetCommitTsData(xid, nchildren, children,
replorigin_session_origin_timestamp, replorigin_session_origin_timestamp,
replorigin_session_origin, false); replorigin_session_origin,
false, false);
} }
/* /*
@ -5321,7 +5334,7 @@ xact_redo_commit(xl_xact_parsed_commit *parsed,
/* Set the transaction commit timestamp and metadata */ /* Set the transaction commit timestamp and metadata */
TransactionTreeSetCommitTsData(xid, parsed->nsubxacts, parsed->subxacts, TransactionTreeSetCommitTsData(xid, parsed->nsubxacts, parsed->subxacts,
commit_time, origin_id, commit_time, origin_id,
false); true, false);
if (standbyState == STANDBY_DISABLED) if (standbyState == STANDBY_DISABLED)
{ {

View File

@ -5826,19 +5826,6 @@ do { \
minValue))); \ minValue))); \
} while(0) } while(0)
#define RecoveryRequiresBoolParameter(param_name, currValue, masterValue) \
do { \
bool _currValue = (currValue); \
bool _masterValue = (masterValue); \
if (_currValue != _masterValue) \
ereport(ERROR, \
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
errmsg("hot standby is not possible because it requires \"%s\" to be same on master and standby (master has \"%s\", standby has \"%s\")", \
param_name, \
_masterValue ? "true" : "false", \
_currValue ? "true" : "false"))); \
} while(0)
/* /*
* Check to see if required parameters are set high enough on this server * Check to see if required parameters are set high enough on this server
* for various aspects of recovery operation. * for various aspects of recovery operation.
@ -5885,9 +5872,6 @@ CheckRequiredParameterValues(void)
RecoveryRequiresIntParameter("max_locks_per_transaction", RecoveryRequiresIntParameter("max_locks_per_transaction",
max_locks_per_xact, max_locks_per_xact,
ControlFile->max_locks_per_xact); ControlFile->max_locks_per_xact);
RecoveryRequiresBoolParameter("track_commit_timestamp",
track_commit_timestamp,
ControlFile->track_commit_timestamp);
} }
} }

View File

@ -24,7 +24,8 @@ extern bool check_track_commit_timestamp(bool *newval, void **extra,
extern void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids, extern void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
TransactionId *subxids, TimestampTz timestamp, TransactionId *subxids, TimestampTz timestamp,
RepOriginId nodeid, bool do_xlog); RepOriginId nodeid,
bool replaying_xlog, bool write_xlog);
extern bool TransactionIdGetCommitTsData(TransactionId xid, extern bool TransactionIdGetCommitTsData(TransactionId xid,
TimestampTz *ts, RepOriginId *nodeid); TimestampTz *ts, RepOriginId *nodeid);
extern TransactionId GetLatestCommitTsData(TimestampTz *ts, extern TransactionId GetLatestCommitTsData(TimestampTz *ts,
@ -67,4 +68,4 @@ extern void commit_ts_redo(XLogReaderState *record);
extern void commit_ts_desc(StringInfo buf, XLogReaderState *record); extern void commit_ts_desc(StringInfo buf, XLogReaderState *record);
extern const char *commit_ts_identify(uint8 info); extern const char *commit_ts_identify(uint8 info);
#endif /* COMMITTS_H */ #endif /* COMMIT_TS_H */