2012-11-28 16:35:01 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* xactdesc.c
|
2013-05-29 22:58:43 +02:00
|
|
|
* rmgr descriptor routines for access/transam/xact.c
|
2012-11-28 16:35:01 +01:00
|
|
|
*
|
2014-01-07 22:05:30 +01:00
|
|
|
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
|
2012-11-28 16:35:01 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2013-05-29 22:58:43 +02:00
|
|
|
* src/backend/access/rmgrdesc/xactdesc.c
|
2012-11-28 16:35:01 +01:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "access/xact.h"
|
|
|
|
#include "catalog/catalog.h"
|
|
|
|
#include "storage/sinval.h"
|
|
|
|
#include "utils/timestamp.h"
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
TransactionId *subxacts;
|
|
|
|
|
|
|
|
subxacts = (TransactionId *) &xlrec->xnodes[xlrec->nrels];
|
|
|
|
|
|
|
|
appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
|
|
|
|
|
|
|
|
if (xlrec->nrels > 0)
|
|
|
|
{
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(buf, "; rels:");
|
2012-11-28 16:35:01 +01:00
|
|
|
for (i = 0; i < xlrec->nrels; i++)
|
|
|
|
{
|
|
|
|
char *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM);
|
|
|
|
|
|
|
|
appendStringInfo(buf, " %s", path);
|
|
|
|
pfree(path);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (xlrec->nsubxacts > 0)
|
|
|
|
{
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(buf, "; subxacts:");
|
2012-11-28 16:35:01 +01:00
|
|
|
for (i = 0; i < xlrec->nsubxacts; i++)
|
|
|
|
appendStringInfo(buf, " %u", subxacts[i]);
|
|
|
|
}
|
|
|
|
if (xlrec->nmsgs > 0)
|
|
|
|
{
|
|
|
|
SharedInvalidationMessage *msgs;
|
|
|
|
|
|
|
|
msgs = (SharedInvalidationMessage *) &subxacts[xlrec->nsubxacts];
|
|
|
|
|
|
|
|
if (XactCompletionRelcacheInitFileInval(xlrec->xinfo))
|
|
|
|
appendStringInfo(buf, "; relcache init file inval dbid %u tsid %u",
|
|
|
|
xlrec->dbId, xlrec->tsId);
|
|
|
|
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(buf, "; inval msgs:");
|
2012-11-28 16:35:01 +01:00
|
|
|
for (i = 0; i < xlrec->nmsgs; i++)
|
|
|
|
{
|
|
|
|
SharedInvalidationMessage *msg = &msgs[i];
|
|
|
|
|
|
|
|
if (msg->id >= 0)
|
|
|
|
appendStringInfo(buf, " catcache %d", msg->id);
|
|
|
|
else if (msg->id == SHAREDINVALCATALOG_ID)
|
|
|
|
appendStringInfo(buf, " catalog %u", msg->cat.catId);
|
|
|
|
else if (msg->id == SHAREDINVALRELCACHE_ID)
|
|
|
|
appendStringInfo(buf, " relcache %u", msg->rc.relId);
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
/* not expected, but print something anyway */
|
2012-11-28 16:35:01 +01:00
|
|
|
else if (msg->id == SHAREDINVALSMGR_ID)
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(buf, " smgr");
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
/* not expected, but print something anyway */
|
2012-11-28 16:35:01 +01:00
|
|
|
else if (msg->id == SHAREDINVALRELMAP_ID)
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(buf, " relmap");
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
else if (msg->id == SHAREDINVALSNAPSHOT_ID)
|
|
|
|
appendStringInfo(buf, " snapshot %u", msg->sn.relId);
|
2012-11-28 16:35:01 +01:00
|
|
|
else
|
|
|
|
appendStringInfo(buf, " unknown id %d", msg->id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
xact_desc_commit_compact(StringInfo buf, xl_xact_commit_compact *xlrec)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
|
|
|
|
|
|
|
|
if (xlrec->nsubxacts > 0)
|
|
|
|
{
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(buf, "; subxacts:");
|
2012-11-28 16:35:01 +01:00
|
|
|
for (i = 0; i < xlrec->nsubxacts; i++)
|
|
|
|
appendStringInfo(buf, " %u", xlrec->subxacts[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time));
|
|
|
|
if (xlrec->nrels > 0)
|
|
|
|
{
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(buf, "; rels:");
|
2012-11-28 16:35:01 +01:00
|
|
|
for (i = 0; i < xlrec->nrels; i++)
|
|
|
|
{
|
|
|
|
char *path = relpathperm(xlrec->xnodes[i], MAIN_FORKNUM);
|
|
|
|
|
|
|
|
appendStringInfo(buf, " %s", path);
|
|
|
|
pfree(path);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (xlrec->nsubxacts > 0)
|
|
|
|
{
|
|
|
|
TransactionId *xacts = (TransactionId *)
|
|
|
|
&xlrec->xnodes[xlrec->nrels];
|
|
|
|
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(buf, "; subxacts:");
|
2012-11-28 16:35:01 +01:00
|
|
|
for (i = 0; i < xlrec->nsubxacts; i++)
|
|
|
|
appendStringInfo(buf, " %u", xacts[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
xact_desc_assignment(StringInfo buf, xl_xact_assignment *xlrec)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2013-10-31 15:55:59 +01:00
|
|
|
appendStringInfoString(buf, "subxacts:");
|
2012-11-28 16:35:01 +01:00
|
|
|
|
|
|
|
for (i = 0; i < xlrec->nsubxacts; i++)
|
|
|
|
appendStringInfo(buf, " %u", xlrec->xsub[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
xact_desc(StringInfo buf, XLogReaderState *record)
|
2012-11-28 16:35:01 +01:00
|
|
|
{
|
2014-06-14 09:46:48 +02:00
|
|
|
char *rec = XLogRecGetData(record);
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
2012-11-28 16:35:01 +01:00
|
|
|
|
|
|
|
if (info == XLOG_XACT_COMMIT_COMPACT)
|
|
|
|
{
|
|
|
|
xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) rec;
|
|
|
|
|
|
|
|
xact_desc_commit_compact(buf, xlrec);
|
|
|
|
}
|
|
|
|
else if (info == XLOG_XACT_COMMIT)
|
|
|
|
{
|
|
|
|
xl_xact_commit *xlrec = (xl_xact_commit *) rec;
|
|
|
|
|
|
|
|
xact_desc_commit(buf, xlrec);
|
|
|
|
}
|
|
|
|
else if (info == XLOG_XACT_ABORT)
|
|
|
|
{
|
|
|
|
xl_xact_abort *xlrec = (xl_xact_abort *) rec;
|
|
|
|
|
|
|
|
xact_desc_abort(buf, xlrec);
|
|
|
|
}
|
|
|
|
else if (info == XLOG_XACT_COMMIT_PREPARED)
|
|
|
|
{
|
|
|
|
xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) rec;
|
|
|
|
|
2014-09-19 15:17:12 +02:00
|
|
|
appendStringInfo(buf, "%u: ", xlrec->xid);
|
2012-11-28 16:35:01 +01:00
|
|
|
xact_desc_commit(buf, &xlrec->crec);
|
|
|
|
}
|
|
|
|
else if (info == XLOG_XACT_ABORT_PREPARED)
|
|
|
|
{
|
|
|
|
xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) rec;
|
|
|
|
|
2014-09-19 15:17:12 +02:00
|
|
|
appendStringInfo(buf, "%u: ", xlrec->xid);
|
2012-11-28 16:35:01 +01:00
|
|
|
xact_desc_abort(buf, &xlrec->arec);
|
|
|
|
}
|
|
|
|
else if (info == XLOG_XACT_ASSIGNMENT)
|
|
|
|
{
|
|
|
|
xl_xact_assignment *xlrec = (xl_xact_assignment *) rec;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note that we ignore the WAL record's xid, since we're more
|
|
|
|
* interested in the top-level xid that issued the record and which
|
|
|
|
* xids are being reported here.
|
|
|
|
*/
|
2014-09-19 15:17:12 +02:00
|
|
|
appendStringInfo(buf, "xtop %u: ", xlrec->xtop);
|
2012-11-28 16:35:01 +01:00
|
|
|
xact_desc_assignment(buf, xlrec);
|
|
|
|
}
|
2014-09-19 15:17:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
const char *
|
|
|
|
xact_identify(uint8 info)
|
|
|
|
{
|
|
|
|
const char *id = NULL;
|
|
|
|
|
2014-09-22 16:48:14 +02:00
|
|
|
switch (info & ~XLR_INFO_MASK)
|
2014-09-19 15:17:12 +02:00
|
|
|
{
|
|
|
|
case XLOG_XACT_COMMIT:
|
|
|
|
id = "COMMIT";
|
|
|
|
break;
|
|
|
|
case XLOG_XACT_PREPARE:
|
|
|
|
id = "PREPARE";
|
|
|
|
break;
|
|
|
|
case XLOG_XACT_ABORT:
|
|
|
|
id = "ABORT";
|
|
|
|
break;
|
|
|
|
case XLOG_XACT_COMMIT_PREPARED:
|
|
|
|
id = "COMMIT_PREPARED";
|
|
|
|
break;
|
|
|
|
case XLOG_XACT_ABORT_PREPARED:
|
|
|
|
id = "ABORT_PREPARED";
|
|
|
|
break;
|
|
|
|
case XLOG_XACT_ASSIGNMENT:
|
|
|
|
id = "ASSIGNMENT";
|
|
|
|
break;
|
|
|
|
case XLOG_XACT_COMMIT_COMPACT:
|
|
|
|
id = "COMMIT_COMPACT";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return id;
|
2012-11-28 16:35:01 +01:00
|
|
|
}
|