postgresql/src/include/access/heapam_xlog.h

/*-------------------------------------------------------------------------
 *
 * heapam_xlog.h
 *	  POSTGRES heap access XLOG definitions.
 *
 *
 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * src/include/access/heapam_xlog.h
 *
 *-------------------------------------------------------------------------
 */
#ifndef HEAPAM_XLOG_H
#define HEAPAM_XLOG_H

#include "access/htup.h"
#include "access/xlogreader.h"
#include "lib/stringinfo.h"
#include "storage/buf.h"
#include "storage/bufpage.h"
#include "storage/relfilelocator.h"
#include "utils/relcache.h"


/*
 * WAL record definitions for heapam.c's WAL operations
 *
 * XLOG allows to store some information in high 4 bits of log
 * record xl_info field.  We use 3 for opcode and one for init bit.
 */
#define XLOG_HEAP_INSERT		0x00
#define XLOG_HEAP_DELETE		0x10
#define XLOG_HEAP_UPDATE		0x20
#define XLOG_HEAP_TRUNCATE		0x30
#define XLOG_HEAP_HOT_UPDATE	0x40
#define XLOG_HEAP_CONFIRM		0x50
#define XLOG_HEAP_LOCK			0x60
#define XLOG_HEAP_INPLACE		0x70

#define XLOG_HEAP_OPMASK		0x70
/*
 * When we insert 1st item on new page in INSERT, UPDATE, HOT_UPDATE,
 * or MULTI_INSERT, we can (and we do) restore entire page in redo
 */
#define XLOG_HEAP_INIT_PAGE		0x80
/*
 * We ran out of opcodes, so heapam.c now has a second RmgrId.  These opcodes
 * are associated with RM_HEAP2_ID, but are not logically different from
 * the ones above associated with RM_HEAP_ID.  XLOG_HEAP_OPMASK applies to
 * these, too.
 *
 * There's no difference between XLOG_HEAP2_PRUNE_ON_ACCESS,
 * XLOG_HEAP2_PRUNE_VACUUM_SCAN and XLOG_HEAP2_PRUNE_VACUUM_CLEANUP records.
 * They have separate opcodes just for debugging and analysis purposes, to
 * indicate why the WAL record was emitted.
 */
#define XLOG_HEAP2_REWRITE		0x00
#define XLOG_HEAP2_PRUNE_ON_ACCESS		0x10
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN	0x20
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP	0x30
#define XLOG_HEAP2_VISIBLE		0x40
#define XLOG_HEAP2_MULTI_INSERT 0x50
#define XLOG_HEAP2_LOCK_UPDATED 0x60
#define XLOG_HEAP2_NEW_CID		0x70

/*
 * xl_heap_insert/xl_heap_multi_insert flag values, 8 bits are available.
 */
/* PD_ALL_VISIBLE was cleared */
#define XLH_INSERT_ALL_VISIBLE_CLEARED			(1<<0)
#define XLH_INSERT_LAST_IN_MULTI				(1<<1)
#define XLH_INSERT_IS_SPECULATIVE				(1<<2)
#define XLH_INSERT_CONTAINS_NEW_TUPLE			(1<<3)
#define XLH_INSERT_ON_TOAST_RELATION			(1<<4)

/* all_frozen_set always implies all_visible_set */
#define XLH_INSERT_ALL_FROZEN_SET				(1<<5)

/*
 * xl_heap_update flag values, 8 bits are available.
 */
/* PD_ALL_VISIBLE was cleared */
#define XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED		(1<<0)
/* PD_ALL_VISIBLE was cleared in the 2nd page */
#define XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED		(1<<1)
#define XLH_UPDATE_CONTAINS_OLD_TUPLE			(1<<2)
#define XLH_UPDATE_CONTAINS_OLD_KEY				(1<<3)
#define XLH_UPDATE_CONTAINS_NEW_TUPLE			(1<<4)
#define XLH_UPDATE_PREFIX_FROM_OLD				(1<<5)
#define XLH_UPDATE_SUFFIX_FROM_OLD				(1<<6)

/* convenience macro for checking whether any form of old tuple was logged */
#define XLH_UPDATE_CONTAINS_OLD						\
	(XLH_UPDATE_CONTAINS_OLD_TUPLE | XLH_UPDATE_CONTAINS_OLD_KEY)

/*
 * xl_heap_delete flag values, 8 bits are available.
 */
/* PD_ALL_VISIBLE was cleared */
#define XLH_DELETE_ALL_VISIBLE_CLEARED			(1<<0)
#define XLH_DELETE_CONTAINS_OLD_TUPLE			(1<<1)
#define XLH_DELETE_CONTAINS_OLD_KEY				(1<<2)
#define XLH_DELETE_IS_SUPER						(1<<3)
#define XLH_DELETE_IS_PARTITION_MOVE			(1<<4)

/* convenience macro for checking whether any form of old tuple was logged */
#define XLH_DELETE_CONTAINS_OLD						\
	(XLH_DELETE_CONTAINS_OLD_TUPLE | XLH_DELETE_CONTAINS_OLD_KEY)

/* This is what we need to know about delete */
typedef struct xl_heap_delete
{
	TransactionId xmax;			/* xmax of the deleted tuple */
	OffsetNumber offnum;		/* deleted tuple's offset */
	uint8		infobits_set;	/* infomask bits */
	uint8		flags;
} xl_heap_delete;

#define SizeOfHeapDelete	(offsetof(xl_heap_delete, flags) + sizeof(uint8))

/*
 * xl_heap_truncate flag values, 8 bits are available.
 */
#define XLH_TRUNCATE_CASCADE					(1<<0)
#define XLH_TRUNCATE_RESTART_SEQS				(1<<1)

/*
 * For truncate we list all truncated relids in an array, followed by all
 * sequence relids that need to be restarted, if any.
 * All rels are always within the same database, so we just list dbid once.
 */
typedef struct xl_heap_truncate
{
	Oid			dbId;
	uint32		nrelids;
	uint8		flags;
	Oid			relids[FLEXIBLE_ARRAY_MEMBER];
} xl_heap_truncate;

#define SizeOfHeapTruncate	(offsetof(xl_heap_truncate, relids))

/*
 * We don't store the whole fixed part (HeapTupleHeaderData) of an inserted
 * or updated tuple in WAL; we can save a few bytes by reconstructing the
 * fields that are available elsewhere in the WAL record, or perhaps just
 * plain needn't be reconstructed.  These are the fields we must store.
 */
typedef struct xl_heap_header
{
	uint16		t_infomask2;
	uint16		t_infomask;
	uint8		t_hoff;
} xl_heap_header;

#define SizeOfHeapHeader	(offsetof(xl_heap_header, t_hoff) + sizeof(uint8))

/* This is what we need to know about insert */
typedef struct xl_heap_insert
{
	OffsetNumber offnum;		/* inserted tuple's offset */
	uint8		flags;

	/* xl_heap_header & TUPLE DATA in backup block 0 */
} xl_heap_insert;

#define SizeOfHeapInsert	(offsetof(xl_heap_insert, flags) + sizeof(uint8))

/*
 * This is what we need to know about a multi-insert.
 *
 * The main data of the record consists of this xl_heap_multi_insert header.
 * 'offsets' array is omitted if the whole page is reinitialized
 * (XLOG_HEAP_INIT_PAGE).
 *
 * In block 0's data portion, there is an xl_multi_insert_tuple struct,
 * followed by the tuple data for each tuple. There is padding to align
 * each xl_multi_insert_tuple struct.
 */
typedef struct xl_heap_multi_insert
{
	uint8		flags;
	uint16		ntuples;
	OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
} xl_heap_multi_insert;

#define SizeOfHeapMultiInsert	offsetof(xl_heap_multi_insert, offsets)

typedef struct xl_multi_insert_tuple
{
	uint16		datalen;		/* size of tuple data that follows */
	uint16		t_infomask2;
	uint16		t_infomask;
	uint8		t_hoff;
	/* TUPLE DATA FOLLOWS AT END OF STRUCT */
} xl_multi_insert_tuple;

#define SizeOfMultiInsertTuple	(offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8))

/*
 * This is what we need to know about update|hot_update
 *
 * Backup blk 0: new page
 *
 * If XLH_UPDATE_PREFIX_FROM_OLD or XLH_UPDATE_SUFFIX_FROM_OLD flags are set,
 * the prefix and/or suffix come first, as one or two uint16s.
 *
 * After that, xl_heap_header and new tuple data follow.  The new tuple
 * data doesn't include the prefix and suffix, which are copied from the
 * old tuple on replay.
 *
 * If XLH_UPDATE_CONTAINS_NEW_TUPLE flag is given, the tuple data is
 * included even if a full-page image was taken.
 *
 * Backup blk 1: old page, if different. (no data, just a reference to the blk)
 */
typedef struct xl_heap_update
{
	TransactionId old_xmax;		/* xmax of the old tuple */
	OffsetNumber old_offnum;	/* old tuple's offset */
	uint8		old_infobits_set;	/* infomask bits to set on old tuple */
	uint8		flags;
	TransactionId new_xmax;		/* xmax of the new tuple */
	OffsetNumber new_offnum;	/* new tuple's offset */

	/*
	 * If XLH_UPDATE_CONTAINS_OLD_TUPLE or XLH_UPDATE_CONTAINS_OLD_KEY flags
	 * are set, xl_heap_header and tuple data for the old tuple follow.
	 */
} xl_heap_update;

#define SizeOfHeapUpdate	(offsetof(xl_heap_update, new_offnum) + sizeof(OffsetNumber))

/*
 * These structures and flags encode VACUUM pruning and freezing and on-access
 * pruning page modifications.
 *
 * xl_heap_prune is the main record.  The XLHP_HAS_* flags indicate which
 * "sub-records" are included and the other XLHP_* flags provide additional
 * information about the conditions for replay.
 *
 * The data for block reference 0 contains "sub-records" depending on which of
 * the XLHP_HAS_* flags are set.  See xlhp_* struct definitions below.  The
 * sub-records appear in the same order as the XLHP_* flags.  An example
 * record with every sub-record included:
 *
 *-----------------------------------------------------------------------------
 * Main data section:
 *
 *	xl_heap_prune
 *		uint8				flags
 *	TransactionId			snapshot_conflict_horizon
 *
 * Block 0 data section:
 *
 *	xlhp_freeze_plans
 *		uint16				nplans
 *		[2 bytes of padding]
 *		xlhp_freeze_plan	plans[nplans]
 *
 *	xlhp_prune_items
 *		uint16				nredirected
 *		OffsetNumber		redirected[2 * nredirected]
 *
 *	xlhp_prune_items
 *		uint16				ndead
 *		OffsetNumber		nowdead[ndead]
 *
 *	xlhp_prune_items
 *		uint16				nunused
 *		OffsetNumber		nowunused[nunused]
 *
 *	OffsetNumber			frz_offsets[sum([plan.ntuples for plan in plans])]
 *-----------------------------------------------------------------------------
 *
 * NOTE: because the record data is assembled from many optional parts, we
 * have to pay close attention to alignment.  In the main data section,
 * 'snapshot_conflict_horizon' is stored unaligned after 'flags', to save
 * space.  In the block 0 data section, the freeze plans appear first, because
 * they contain TransactionId fields that require 4-byte alignment.  All the
 * other fields require only 2-byte alignment.  This is also the reason that
 * 'frz_offsets' is stored separately from the xlhp_freeze_plan structs.
 */
typedef struct xl_heap_prune
{
	uint8		reason;
	uint8		flags;

	/*
	 * If XLHP_HAS_CONFLICT_HORIZON is set, the conflict horzion XID follows,
	 * unaligned
	 */
} xl_heap_prune;

#define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint8))

/* to handle recovery conflict during logical decoding on standby */
#define		XLHP_IS_CATALOG_REL			(1 << 1)

/*
 * Does replaying the record require a cleanup-lock?
 *
 * Pruning, in VACUUM's first pass or when otherwise accessing a page,
 * requires a cleanup lock.  For freezing, and VACUUM's second pass which
 * marks LP_DEAD line pointers as unused without moving any tuple data, an
 * ordinary exclusive lock is sufficient.
 */
#define		XLHP_CLEANUP_LOCK	       (1 << 2)

/*
 * If we remove or freeze any entries that contain xids, we need to include a
 * snapshot conflict horizon.  It's used in Hot Standby mode to ensure that
 * there are no queries running for which the removed tuples are still
 * visible, or which still consider the frozen XIDs as running.
 */
#define		XLHP_HAS_CONFLICT_HORIZON   (1 << 3)

/*
 * Indicates that an xlhp_freeze_plans sub-record and one or more
 * xlhp_freeze_plan sub-records are present.
 */
#define		XLHP_HAS_FREEZE_PLANS		(1 << 4)

/*
 * XLHP_HAS_REDIRECTIONS, XLHP_HAS_DEAD_ITEMS, and XLHP_HAS_NOW_UNUSED
 * indicate that xlhp_prune_items sub-records with redirected, dead, and
 * unused item offsets are present.
 */
#define		XLHP_HAS_REDIRECTIONS		(1 << 5)
#define		XLHP_HAS_DEAD_ITEMS	        (1 << 6)
#define		XLHP_HAS_NOW_UNUSED_ITEMS   (1 << 7)

/*
 * xlhp_freeze_plan describes how to freeze a group of one or more heap tuples
 * (appears in xl_heap_prune's xlhp_freeze_plans sub-record)
 */
/* 0x01 was XLH_FREEZE_XMIN */
#define		XLH_FREEZE_XVAC		0x02
#define		XLH_INVALID_XVAC	0x04

typedef struct xlhp_freeze_plan
{
	TransactionId xmax;
	uint16		t_infomask2;
	uint16		t_infomask;
	uint8		frzflags;

	/* Length of individual page offset numbers array for this plan */
	uint16		ntuples;
} xlhp_freeze_plan;

/*
 * This is what we need to know about a block being frozen during vacuum
 *
 * The backup block's data contains an array of xlhp_freeze_plan structs (with
 * nplans elements).  The individual item offsets are located in an array at
 * the end of the entire record with with nplans * (each plan's ntuples)
 * members.  Those offsets are in the same order as the plans.  The REDO
 * routine uses the offsets to freeze the corresponding heap tuples.
 *
 * (As of PostgreSQL 17, XLOG_HEAP2_PRUNE_VACUUM_SCAN records replace the
 * separate XLOG_HEAP2_FREEZE_PAGE records.)
 */
typedef struct xlhp_freeze_plans
{
	uint16		nplans;
	xlhp_freeze_plan plans[FLEXIBLE_ARRAY_MEMBER];
} xlhp_freeze_plans;

/*
 * Generic sub-record type contained in block reference 0 of an xl_heap_prune
 * record and used for redirect, dead, and unused items if any of
 * XLHP_HAS_REDIRECTIONS/XLHP_HAS_DEAD_ITEMS/XLHP_HAS_NOW_UNUSED_ITEMS are
 * set.  Note that in the XLHP_HAS_REDIRECTIONS variant, there are actually 2
 * * length number of OffsetNumbers in the data.
 */
typedef struct xlhp_prune_items
{
	uint16		ntargets;
	OffsetNumber data[FLEXIBLE_ARRAY_MEMBER];
} xlhp_prune_items;


/* flags for infobits_set */
#define XLHL_XMAX_IS_MULTI		0x01
#define XLHL_XMAX_LOCK_ONLY		0x02
#define XLHL_XMAX_EXCL_LOCK		0x04
#define XLHL_XMAX_KEYSHR_LOCK	0x08
#define XLHL_KEYS_UPDATED		0x10

/* flag bits for xl_heap_lock / xl_heap_lock_updated's flag field */
#define XLH_LOCK_ALL_FROZEN_CLEARED		0x01

/* This is what we need to know about lock */
typedef struct xl_heap_lock
{
	TransactionId xmax;			/* might be a MultiXactId */
	OffsetNumber offnum;		/* locked tuple's offset on page */
	uint8		infobits_set;	/* infomask and infomask2 bits to set */
	uint8		flags;			/* XLH_LOCK_* flag bits */
} xl_heap_lock;

#define SizeOfHeapLock	(offsetof(xl_heap_lock, flags) + sizeof(uint8))

/* This is what we need to know about locking an updated version of a row */
typedef struct xl_heap_lock_updated
{
	TransactionId xmax;
	OffsetNumber offnum;
	uint8		infobits_set;
	uint8		flags;
} xl_heap_lock_updated;

#define SizeOfHeapLockUpdated	(offsetof(xl_heap_lock_updated, flags) + sizeof(uint8))

/* This is what we need to know about confirmation of speculative insertion */
typedef struct xl_heap_confirm
{
	OffsetNumber offnum;		/* confirmed tuple's offset on page */
} xl_heap_confirm;

#define SizeOfHeapConfirm	(offsetof(xl_heap_confirm, offnum) + sizeof(OffsetNumber))

/* This is what we need to know about in-place update */
typedef struct xl_heap_inplace
{
	OffsetNumber offnum;		/* updated tuple's offset on page */
	/* TUPLE DATA FOLLOWS AT END OF STRUCT */
} xl_heap_inplace;

#define SizeOfHeapInplace	(offsetof(xl_heap_inplace, offnum) + sizeof(OffsetNumber))

/*
 * This is what we need to know about setting a visibility map bit
 *
 * Backup blk 0: visibility map buffer
 * Backup blk 1: heap buffer
 */
typedef struct xl_heap_visible
{
	TransactionId snapshotConflictHorizon;
	uint8		flags;
} xl_heap_visible;

#define SizeOfHeapVisible (offsetof(xl_heap_visible, flags) + sizeof(uint8))

typedef struct xl_heap_new_cid
{
	/*
	 * store toplevel xid so we don't have to merge cids from different
	 * transactions
	 */
	TransactionId top_xid;
	CommandId	cmin;
	CommandId	cmax;
	CommandId	combocid;		/* just for debugging */

	/*
	 * Store the relfilelocator/ctid pair to facilitate lookups.
	 */
	RelFileLocator target_locator;
	ItemPointerData target_tid;
} xl_heap_new_cid;

#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target_tid) + sizeof(ItemPointerData))

/* logical rewrite xlog record header */
typedef struct xl_heap_rewrite_mapping
{
	TransactionId mapped_xid;	/* xid that might need to see the row */
	Oid			mapped_db;		/* DbOid or InvalidOid for shared rels */
	Oid			mapped_rel;		/* Oid of the mapped relation */
	off_t		offset;			/* How far have we written so far */
	uint32		num_mappings;	/* Number of in-memory mappings */
	XLogRecPtr	start_lsn;		/* Insert LSN at begin of rewrite */
} xl_heap_rewrite_mapping;

extern void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple,
												  TransactionId *snapshotConflictHorizon);

extern void heap_redo(XLogReaderState *record);
extern void heap_desc(StringInfo buf, XLogReaderState *record);
extern const char *heap_identify(uint8 info);
extern void heap_mask(char *pagedata, BlockNumber blkno);
extern void heap2_redo(XLogReaderState *record);
extern void heap2_desc(StringInfo buf, XLogReaderState *record);
extern const char *heap2_identify(uint8 info);
extern void heap_xlog_logical_rewrite(XLogReaderState *r);

extern XLogRecPtr log_heap_visible(Relation rel, Buffer heap_buffer,
								   Buffer vm_buffer,
								   TransactionId snapshotConflictHorizon,
								   uint8 vmflags);

/* in heapdesc.c, so it can be shared between frontend/backend code */
extern void heap_xlog_deserialize_prune_and_freeze(char *cursor, uint8 flags,
												   int *nplans, xlhp_freeze_plan **plans,
												   OffsetNumber **frz_offsets,
												   int *nredirected, OffsetNumber **redirected,
												   int *ndead, OffsetNumber **nowdead,
												   int *nunused, OffsetNumber **nowunused);

#endif							/* HEAPAM_XLOG_H */