/* * reorderbuffer.h * PostgreSQL logical replay/reorder buffer management. * * Copyright (c) 2012-2021, PostgreSQL Global Development Group * * src/include/replication/reorderbuffer.h */ #ifndef REORDERBUFFER_H #define REORDERBUFFER_H #include "access/htup_details.h" #include "lib/ilist.h" #include "storage/sinval.h" #include "utils/hsearch.h" #include "utils/relcache.h" #include "utils/snapshot.h" #include "utils/timestamp.h" extern PGDLLIMPORT int logical_decoding_work_mem; /* an individual tuple, stored in one chunk of memory */ typedef struct ReorderBufferTupleBuf { /* position in preallocated list */ slist_node node; /* tuple header, the interesting bit for users of logical decoding */ HeapTupleData tuple; /* pre-allocated size of tuple buffer, different from tuple size */ Size alloc_tuple_size; /* actual tuple data follows */ } ReorderBufferTupleBuf; /* pointer to the data stored in a TupleBuf */ #define ReorderBufferTupleBufData(p) \ ((HeapTupleHeader) MAXALIGN(((char *) p) + sizeof(ReorderBufferTupleBuf))) /* * Types of the change passed to a 'change' callback. * * For efficiency and simplicity reasons we want to keep Snapshots, CommandIds * and ComboCids in the same list with the user visible INSERT/UPDATE/DELETE * changes. Users of the decoding facilities will never see changes with * *_INTERNAL_* actions. * * The INTERNAL_SPEC_INSERT and INTERNAL_SPEC_CONFIRM changes concern * "speculative insertions", and their confirmation respectively. They're * used by INSERT .. ON CONFLICT .. UPDATE. Users of logical decoding don't * have to care about these. */ enum ReorderBufferChangeType { REORDER_BUFFER_CHANGE_INSERT, REORDER_BUFFER_CHANGE_UPDATE, REORDER_BUFFER_CHANGE_DELETE, REORDER_BUFFER_CHANGE_MESSAGE, REORDER_BUFFER_CHANGE_INVALIDATION, REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT, REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM, REORDER_BUFFER_CHANGE_TRUNCATE }; /* forward declaration */ struct ReorderBufferTXN; /* * a single 'change', can be an insert (with one tuple), an update (old, new), * or a delete (old). * * The same struct is also used internally for other purposes but that should * never be visible outside reorderbuffer.c. */ typedef struct ReorderBufferChange { XLogRecPtr lsn; /* The type of change. */ enum ReorderBufferChangeType action; /* Transaction this change belongs to. */ struct ReorderBufferTXN *txn; RepOriginId origin_id; /* * Context data for the change. Which part of the union is valid depends * on action. */ union { /* Old, new tuples when action == *_INSERT|UPDATE|DELETE */ struct { /* relation that has been changed */ RelFileNode relnode; /* no previously reassembled toast chunks are necessary anymore */ bool clear_toast_afterwards; /* valid for DELETE || UPDATE */ ReorderBufferTupleBuf *oldtuple; /* valid for INSERT || UPDATE */ ReorderBufferTupleBuf *newtuple; } tp; /* * Truncate data for REORDER_BUFFER_CHANGE_TRUNCATE representing one * set of relations to be truncated. */ struct { Size nrelids; bool cascade; bool restart_seqs; Oid *relids; } truncate; /* Message with arbitrary data. */ struct { char *prefix; Size message_size; char *message; } msg; /* New snapshot, set when action == *_INTERNAL_SNAPSHOT */ Snapshot snapshot; /* * New command id for existing snapshot in a catalog changing tx. Set * when action == *_INTERNAL_COMMAND_ID. */ CommandId command_id; /* * New cid mapping for catalog changing transaction, set when action * == *_INTERNAL_TUPLECID. */ struct { RelFileNode node; ItemPointerData tid; CommandId cmin; CommandId cmax; CommandId combocid; } tuplecid; /* Invalidation. */ struct { uint32 ninvalidations; /* Number of messages */ SharedInvalidationMessage *invalidations; /* invalidation message */ } inval; } data; /* * While in use this is how a change is linked into a transactions, * otherwise it's the preallocated list. */ dlist_node node; } ReorderBufferChange; /* ReorderBufferTXN txn_flags */ #define RBTXN_HAS_CATALOG_CHANGES 0x0001 #define RBTXN_IS_SUBXACT 0x0002 #define RBTXN_IS_SERIALIZED 0x0004 #define RBTXN_IS_SERIALIZED_CLEAR 0x0008 #define RBTXN_IS_STREAMED 0x0010 #define RBTXN_HAS_TOAST_INSERT 0x0020 #define RBTXN_HAS_SPEC_INSERT 0x0040 #define RBTXN_PREPARE 0x0080 #define RBTXN_SKIPPED_PREPARE 0x0100 /* Does the transaction have catalog changes? */ #define rbtxn_has_catalog_changes(txn) \ ( \ ((txn)->txn_flags & RBTXN_HAS_CATALOG_CHANGES) != 0 \ ) /* Is the transaction known as a subxact? */ #define rbtxn_is_known_subxact(txn) \ ( \ ((txn)->txn_flags & RBTXN_IS_SUBXACT) != 0 \ ) /* Has this transaction been spilled to disk? */ #define rbtxn_is_serialized(txn) \ ( \ ((txn)->txn_flags & RBTXN_IS_SERIALIZED) != 0 \ ) /* Has this transaction ever been spilled to disk? */ #define rbtxn_is_serialized_clear(txn) \ ( \ ((txn)->txn_flags & RBTXN_IS_SERIALIZED_CLEAR) != 0 \ ) /* This transaction's changes has toast insert, without main table insert. */ #define rbtxn_has_toast_insert(txn) \ ( \ ((txn)->txn_flags & RBTXN_HAS_TOAST_INSERT) != 0 \ ) /* * This transaction's changes has speculative insert, without speculative * confirm. */ #define rbtxn_has_spec_insert(txn) \ ( \ ((txn)->txn_flags & RBTXN_HAS_SPEC_INSERT) != 0 \ ) /* Check whether this transaction has an incomplete change. */ #define rbtxn_has_incomplete_tuple(txn) \ ( \ rbtxn_has_toast_insert(txn) || rbtxn_has_spec_insert(txn) \ ) /* * Has this transaction been streamed to downstream? * * (It's not possible to deduce this from nentries and nentries_mem for * various reasons. For example, all changes may be in subtransactions in * which case we'd have nentries==0 for the toplevel one, which would say * nothing about the streaming. So we maintain this flag, but only for the * toplevel transaction.) */ #define rbtxn_is_streamed(txn) \ ( \ ((txn)->txn_flags & RBTXN_IS_STREAMED) != 0 \ ) /* Has this transaction been prepared? */ #define rbtxn_prepared(txn) \ ( \ ((txn)->txn_flags & RBTXN_PREPARE) != 0 \ ) /* prepare for this transaction skipped? */ #define rbtxn_skip_prepared(txn) \ ( \ ((txn)->txn_flags & RBTXN_SKIPPED_PREPARE) != 0 \ ) typedef struct ReorderBufferTXN { /* See above */ bits32 txn_flags; /* The transaction's transaction id, can be a toplevel or sub xid. */ TransactionId xid; /* Xid of top-level transaction, if known */ TransactionId toplevel_xid; /* * Global transaction id required for identification of prepared * transactions. */ char *gid; /* * LSN of the first data carrying, WAL record with knowledge about this * xid. This is allowed to *not* be first record adorned with this xid, if * the previous records aren't relevant for logical decoding. */ XLogRecPtr first_lsn; /* ---- * LSN of the record that lead to this xact to be prepared or committed or * aborted. This can be a * * plain commit record * * plain commit record, of a parent transaction * * prepared tansaction * * prepared transaction commit * * plain abort record * * prepared transaction abort * * This can also become set to earlier values than transaction end when * a transaction is spilled to disk; specifically it's set to the LSN of * the latest change written to disk so far. * ---- */ XLogRecPtr final_lsn; /* * LSN pointing to the end of the commit record + 1. */ XLogRecPtr end_lsn; /* Toplevel transaction for this subxact (NULL for top-level). */ struct ReorderBufferTXN *toptxn; /* * LSN of the last lsn at which snapshot information reside, so we can * restart decoding from there and fully recover this transaction from * WAL. */ XLogRecPtr restart_decoding_lsn; /* origin of the change that caused this transaction */ RepOriginId origin_id; XLogRecPtr origin_lsn; /* * Commit or Prepare time, only known when we read the actual commit or * prepare record. */ TimestampTz commit_time; /* * The base snapshot is used to decode all changes until either this * transaction modifies the catalog, or another catalog-modifying * transaction commits. */ Snapshot base_snapshot; XLogRecPtr base_snapshot_lsn; dlist_node base_snapshot_node; /* link in txns_by_base_snapshot_lsn */ /* * Snapshot/CID from the previous streaming run. Only valid for already * streamed transactions (NULL/InvalidCommandId otherwise). */ Snapshot snapshot_now; CommandId command_id; /* * How many ReorderBufferChange's do we have in this txn. * * Changes in subtransactions are *not* included but tracked separately. */ uint64 nentries; /* * How many of the above entries are stored in memory in contrast to being * spilled to disk. */ uint64 nentries_mem; /* * List of ReorderBufferChange structs, including new Snapshots, new * CommandIds and command invalidation messages. */ dlist_head changes; /* * List of (relation, ctid) => (cmin, cmax) mappings for catalog tuples. * Those are always assigned to the toplevel transaction. (Keep track of * #entries to create a hash of the right size) */ dlist_head tuplecids; uint64 ntuplecids; /* * On-demand built hash for looking up the above values. */ HTAB *tuplecid_hash; /* * Hash containing (potentially partial) toast entries. NULL if no toast * tuples have been found for the current change. */ HTAB *toast_hash; /* * non-hierarchical list of subtransactions that are *not* aborted. Only * used in toplevel transactions. */ dlist_head subtxns; uint32 nsubtxns; /* * Stored cache invalidations. This is not a linked list because we get * all the invalidations at once. */ uint32 ninvalidations; SharedInvalidationMessage *invalidations; /* --- * Position in one of three lists: * * list of subtransactions if we are *known* to be subxact * * list of toplevel xacts (can be an as-yet unknown subxact) * * list of preallocated ReorderBufferTXNs (if unused) * --- */ dlist_node node; /* * Size of this transaction (changes currently in memory, in bytes). */ Size size; /* Size of top-transaction including sub-transactions. */ Size total_size; /* If we have detected concurrent abort then ignore future changes. */ bool concurrent_abort; /* * Private data pointer of the output plugin. */ void *output_plugin_private; } ReorderBufferTXN; /* so we can define the callbacks used inside struct ReorderBuffer itself */ typedef struct ReorderBuffer ReorderBuffer; /* change callback signature */ typedef void (*ReorderBufferApplyChangeCB) (ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change); /* truncate callback signature */ typedef void (*ReorderBufferApplyTruncateCB) (ReorderBuffer *rb, ReorderBufferTXN *txn, int nrelations, Relation relations[], ReorderBufferChange *change); /* begin callback signature */ typedef void (*ReorderBufferBeginCB) (ReorderBuffer *rb, ReorderBufferTXN *txn); /* commit callback signature */ typedef void (*ReorderBufferCommitCB) (ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr commit_lsn); /* message callback signature */ typedef void (*ReorderBufferMessageCB) (ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr message_lsn, bool transactional, const char *prefix, Size sz, const char *message); /* begin prepare callback signature */ typedef void (*ReorderBufferBeginPrepareCB) (ReorderBuffer *rb, ReorderBufferTXN *txn); /* prepare callback signature */ typedef void (*ReorderBufferPrepareCB) (ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr prepare_lsn); /* commit prepared callback signature */ typedef void (*ReorderBufferCommitPreparedCB) (ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr commit_lsn); /* rollback prepared callback signature */ typedef void (*ReorderBufferRollbackPreparedCB) (ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr prepare_end_lsn, TimestampTz prepare_time); /* start streaming transaction callback signature */ typedef void (*ReorderBufferStreamStartCB) ( ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr first_lsn); /* stop streaming transaction callback signature */ typedef void (*ReorderBufferStreamStopCB) ( ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr last_lsn); /* discard streamed transaction callback signature */ typedef void (*ReorderBufferStreamAbortCB) ( ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr abort_lsn); /* prepare streamed transaction callback signature */ typedef void (*ReorderBufferStreamPrepareCB) ( ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr prepare_lsn); /* commit streamed transaction callback signature */ typedef void (*ReorderBufferStreamCommitCB) ( ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr commit_lsn); /* stream change callback signature */ typedef void (*ReorderBufferStreamChangeCB) ( ReorderBuffer *rb, ReorderBufferTXN *txn, Relation relation, ReorderBufferChange *change); /* stream message callback signature */ typedef void (*ReorderBufferStreamMessageCB) ( ReorderBuffer *rb, ReorderBufferTXN *txn, XLogRecPtr message_lsn, bool transactional, const char *prefix, Size sz, const char *message); /* stream truncate callback signature */ typedef void (*ReorderBufferStreamTruncateCB) ( ReorderBuffer *rb, ReorderBufferTXN *txn, int nrelations, Relation relations[], ReorderBufferChange *change); struct ReorderBuffer { /* * xid => ReorderBufferTXN lookup table */ HTAB *by_txn; /* * Transactions that could be a toplevel xact, ordered by LSN of the first * record bearing that xid. */ dlist_head toplevel_by_lsn; /* * Transactions and subtransactions that have a base snapshot, ordered by * LSN of the record which caused us to first obtain the base snapshot. * This is not the same as toplevel_by_lsn, because we only set the base * snapshot on the first logical-decoding-relevant record (eg. heap * writes), whereas the initial LSN could be set by other operations. */ dlist_head txns_by_base_snapshot_lsn; /* * one-entry sized cache for by_txn. Very frequently the same txn gets * looked up over and over again. */ TransactionId by_txn_last_xid; ReorderBufferTXN *by_txn_last_txn; /* * Callbacks to be called when a transactions commits. */ ReorderBufferBeginCB begin; ReorderBufferApplyChangeCB apply_change; ReorderBufferApplyTruncateCB apply_truncate; ReorderBufferCommitCB commit; ReorderBufferMessageCB message; /* * Callbacks to be called when streaming a transaction at prepare time. */ ReorderBufferBeginCB begin_prepare; ReorderBufferPrepareCB prepare; ReorderBufferCommitPreparedCB commit_prepared; ReorderBufferRollbackPreparedCB rollback_prepared; /* * Callbacks to be called when streaming a transaction. */ ReorderBufferStreamStartCB stream_start; ReorderBufferStreamStopCB stream_stop; ReorderBufferStreamAbortCB stream_abort; ReorderBufferStreamPrepareCB stream_prepare; ReorderBufferStreamCommitCB stream_commit; ReorderBufferStreamChangeCB stream_change; ReorderBufferStreamMessageCB stream_message; ReorderBufferStreamTruncateCB stream_truncate; /* * Pointer that will be passed untouched to the callbacks. */ void *private_data; /* * Saved output plugin option */ bool output_rewrites; /* * Private memory context. */ MemoryContext context; /* * Memory contexts for specific types objects */ MemoryContext change_context; MemoryContext txn_context; MemoryContext tup_context; XLogRecPtr current_restart_decoding_lsn; /* buffer for disk<->memory conversions */ char *outbuf; Size outbufsize; /* memory accounting */ Size size; /* * Statistics about transactions spilled to disk. * * A single transaction may be spilled repeatedly, which is why we keep * two different counters. For spilling, the transaction counter includes * both toplevel transactions and subtransactions. */ int64 spillTxns; /* number of transactions spilled to disk */ int64 spillCount; /* spill-to-disk invocation counter */ int64 spillBytes; /* amount of data spilled to disk */ /* Statistics about transactions streamed to the decoding output plugin */ int64 streamTxns; /* number of transactions streamed */ int64 streamCount; /* streaming invocation counter */ int64 streamBytes; /* amount of data streamed */ }; ReorderBuffer *ReorderBufferAllocate(void); void ReorderBufferFree(ReorderBuffer *); ReorderBufferTupleBuf *ReorderBufferGetTupleBuf(ReorderBuffer *, Size tuple_len); void ReorderBufferReturnTupleBuf(ReorderBuffer *, ReorderBufferTupleBuf *tuple); ReorderBufferChange *ReorderBufferGetChange(ReorderBuffer *); void ReorderBufferReturnChange(ReorderBuffer *, ReorderBufferChange *, bool); Oid *ReorderBufferGetRelids(ReorderBuffer *, int nrelids); void ReorderBufferReturnRelids(ReorderBuffer *, Oid *relids); void ReorderBufferQueueChange(ReorderBuffer *, TransactionId, XLogRecPtr lsn, ReorderBufferChange *, bool toast_insert); void ReorderBufferQueueMessage(ReorderBuffer *, TransactionId, Snapshot snapshot, XLogRecPtr lsn, bool transactional, const char *prefix, Size message_size, const char *message); void ReorderBufferCommit(ReorderBuffer *, TransactionId, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, RepOriginId origin_id, XLogRecPtr origin_lsn); void ReorderBufferFinishPrepared(ReorderBuffer *rb, TransactionId xid, XLogRecPtr commit_lsn, XLogRecPtr end_lsn, TimestampTz commit_time, RepOriginId origin_id, XLogRecPtr origin_lsn, char *gid, bool is_commit); void ReorderBufferAssignChild(ReorderBuffer *, TransactionId, TransactionId, XLogRecPtr commit_lsn); void ReorderBufferCommitChild(ReorderBuffer *, TransactionId, TransactionId, XLogRecPtr commit_lsn, XLogRecPtr end_lsn); void ReorderBufferAbort(ReorderBuffer *, TransactionId, XLogRecPtr lsn); void ReorderBufferAbortOld(ReorderBuffer *, TransactionId xid); void ReorderBufferForget(ReorderBuffer *, TransactionId, XLogRecPtr lsn); void ReorderBufferInvalidate(ReorderBuffer *, TransactionId, XLogRecPtr lsn); void ReorderBufferSetBaseSnapshot(ReorderBuffer *, TransactionId, XLogRecPtr lsn, struct SnapshotData *snap); void ReorderBufferAddSnapshot(ReorderBuffer *, TransactionId, XLogRecPtr lsn, struct SnapshotData *snap); void ReorderBufferAddNewCommandId(ReorderBuffer *, TransactionId, XLogRecPtr lsn, CommandId cid); void ReorderBufferAddNewTupleCids(ReorderBuffer *, TransactionId, XLogRecPtr lsn, RelFileNode node, ItemPointerData pt, CommandId cmin, CommandId cmax, CommandId combocid); void ReorderBufferAddInvalidations(ReorderBuffer *, TransactionId, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs); void ReorderBufferImmediateInvalidation(ReorderBuffer *, uint32 ninvalidations, SharedInvalidationMessage *invalidations); void ReorderBufferProcessXid(ReorderBuffer *, TransactionId xid, XLogRecPtr lsn); void ReorderBufferXidSetCatalogChanges(ReorderBuffer *, TransactionId xid, XLogRecPtr lsn); bool ReorderBufferXidHasCatalogChanges(ReorderBuffer *, TransactionId xid); bool ReorderBufferXidHasBaseSnapshot(ReorderBuffer *, TransactionId xid); bool ReorderBufferRememberPrepareInfo(ReorderBuffer *rb, TransactionId xid, XLogRecPtr prepare_lsn, XLogRecPtr end_lsn, TimestampTz prepare_time, RepOriginId origin_id, XLogRecPtr origin_lsn); void ReorderBufferSkipPrepare(ReorderBuffer *rb, TransactionId xid); void ReorderBufferPrepare(ReorderBuffer *rb, TransactionId xid, char *gid); ReorderBufferTXN *ReorderBufferGetOldestTXN(ReorderBuffer *); TransactionId ReorderBufferGetOldestXmin(ReorderBuffer *rb); void ReorderBufferSetRestartPoint(ReorderBuffer *, XLogRecPtr ptr); void StartupReorderBuffer(void); #endif