diff --git a/contrib/Makefile b/contrib/Makefile
index 332b486ecc..bbf220407b 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -41,6 +41,7 @@ SUBDIRS = \
pgrowlocks \
pgstattuple \
pg_visibility \
+ pg_walinspect \
postgres_fdw \
seg \
spi \
diff --git a/contrib/pg_walinspect/.gitignore b/contrib/pg_walinspect/.gitignore
new file mode 100644
index 0000000000..5dcb3ff972
--- /dev/null
+++ b/contrib/pg_walinspect/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/contrib/pg_walinspect/Makefile b/contrib/pg_walinspect/Makefile
new file mode 100644
index 0000000000..350845cd43
--- /dev/null
+++ b/contrib/pg_walinspect/Makefile
@@ -0,0 +1,23 @@
+# contrib/pg_walinspect/Makefile
+
+MODULE_big = pg_walinspect
+OBJS = \
+ $(WIN32RES) \
+ pg_walinspect.o
+PGFILEDESC = "pg_walinspect - functions to inspect contents of PostgreSQL Write-Ahead Log"
+
+EXTENSION = pg_walinspect
+DATA = pg_walinspect--1.0.sql
+
+REGRESS = pg_walinspect
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_walinspect
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_walinspect/expected/pg_walinspect.out b/contrib/pg_walinspect/expected/pg_walinspect.out
new file mode 100644
index 0000000000..634b1bb739
--- /dev/null
+++ b/contrib/pg_walinspect/expected/pg_walinspect.out
@@ -0,0 +1,165 @@
+CREATE EXTENSION pg_walinspect;
+CREATE TABLE sample_tbl(col1 int, col2 int);
+-- Make sure checkpoints don't interfere with the test.
+SELECT lsn as wal_lsn1 FROM
+ pg_create_physical_replication_slot('regress_pg_walinspect_slot', true, false)
+ \gset
+INSERT INTO sample_tbl SELECT * FROM generate_series(1, 2);
+SELECT pg_current_wal_lsn() AS wal_lsn2 \gset
+INSERT INTO sample_tbl SELECT * FROM generate_series(1, 2);
+-- ===================================================================
+-- Tests for input validation
+-- ===================================================================
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info(:'wal_lsn2', :'wal_lsn1'); -- ERROR
+ERROR: WAL start LSN must be less than end LSN
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats(:'wal_lsn2', :'wal_lsn1'); -- ERROR
+ERROR: WAL start LSN must be less than end LSN
+-- ===================================================================
+-- Tests for all function executions
+-- ===================================================================
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_record_info(:'wal_lsn1');
+ ok
+----
+ t
+(1 row)
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2');
+ ok
+----
+ t
+(1 row)
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info_till_end_of_wal(:'wal_lsn1');
+ ok
+----
+ t
+(1 row)
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats(:'wal_lsn1', :'wal_lsn2');
+ ok
+----
+ t
+(1 row)
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats_till_end_of_wal(:'wal_lsn1');
+ ok
+----
+ t
+(1 row)
+
+-- ===================================================================
+-- Test for filtering out WAL records of a particular table
+-- ===================================================================
+SELECT oid AS sample_tbl_oid FROM pg_class WHERE relname = 'sample_tbl' \gset
+SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2')
+ WHERE block_ref LIKE concat('%', :'sample_tbl_oid', '%') AND resource_manager = 'Heap';
+ ok
+----
+ t
+(1 row)
+
+-- ===================================================================
+-- Test for filtering out WAL records based on resource_manager and
+-- record_type
+-- ===================================================================
+SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2')
+ WHERE resource_manager = 'Heap' AND record_type = 'INSERT';
+ ok
+----
+ t
+(1 row)
+
+-- ===================================================================
+-- Tests for permissions
+-- ===================================================================
+CREATE ROLE regress_pg_walinspect;
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- no
+ has_function_privilege
+------------------------
+ f
+(1 row)
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- no
+ has_function_privilege
+------------------------
+ f
+(1 row)
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- no
+ has_function_privilege
+------------------------
+ f
+(1 row)
+
+-- Functions accessible by users with role pg_read_server_files
+GRANT pg_read_server_files TO regress_pg_walinspect;
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes
+ has_function_privilege
+------------------------
+ t
+(1 row)
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
+ has_function_privilege
+------------------------
+ t
+(1 row)
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes
+ has_function_privilege
+------------------------
+ t
+(1 row)
+
+REVOKE pg_read_server_files FROM regress_pg_walinspect;
+-- Superuser can grant execute to other users
+GRANT EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn)
+ TO regress_pg_walinspect;
+GRANT EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn)
+ TO regress_pg_walinspect;
+GRANT EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean)
+ TO regress_pg_walinspect;
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes
+ has_function_privilege
+------------------------
+ t
+(1 row)
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
+ has_function_privilege
+------------------------
+ t
+(1 row)
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes
+ has_function_privilege
+------------------------
+ t
+(1 row)
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn)
+ FROM regress_pg_walinspect;
+REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn)
+ FROM regress_pg_walinspect;
+REVOKE EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean)
+ FROM regress_pg_walinspect;
+-- ===================================================================
+-- Clean up
+-- ===================================================================
+DROP ROLE regress_pg_walinspect;
+SELECT pg_drop_replication_slot('regress_pg_walinspect_slot');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
+DROP TABLE sample_tbl;
diff --git a/contrib/pg_walinspect/pg_walinspect--1.0.sql b/contrib/pg_walinspect/pg_walinspect--1.0.sql
new file mode 100644
index 0000000000..4243516d8a
--- /dev/null
+++ b/contrib/pg_walinspect/pg_walinspect--1.0.sql
@@ -0,0 +1,118 @@
+/* contrib/pg_walinspect/pg_walinspect--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_walinspect" to load this file. \quit
+
+--
+-- pg_get_wal_record_info()
+--
+CREATE FUNCTION pg_get_wal_record_info(IN in_lsn pg_lsn,
+ OUT start_lsn pg_lsn,
+ OUT end_lsn pg_lsn,
+ OUT prev_lsn pg_lsn,
+ OUT xid xid,
+ OUT resource_manager text,
+ OUT record_type text,
+ OUT record_length int4,
+ OUT main_data_length int4,
+ OUT fpi_length int4,
+ OUT description text,
+ OUT block_ref text
+)
+AS 'MODULE_PATHNAME', 'pg_get_wal_record_info'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn) TO pg_read_server_files;
+
+--
+-- pg_get_wal_records_info()
+--
+CREATE FUNCTION pg_get_wal_records_info(IN start_lsn pg_lsn,
+ IN end_lsn pg_lsn,
+ OUT start_lsn pg_lsn,
+ OUT end_lsn pg_lsn,
+ OUT prev_lsn pg_lsn,
+ OUT xid xid,
+ OUT resource_manager text,
+ OUT record_type text,
+ OUT record_length int4,
+ OUT main_data_length int4,
+ OUT fpi_length int4,
+ OUT description text,
+ OUT block_ref text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_wal_records_info'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn) TO pg_read_server_files;
+
+--
+-- pg_get_wal_records_info_till_end_of_wal()
+--
+CREATE FUNCTION pg_get_wal_records_info_till_end_of_wal(IN start_lsn pg_lsn,
+ OUT start_lsn pg_lsn,
+ OUT end_lsn pg_lsn,
+ OUT prev_lsn pg_lsn,
+ OUT xid xid,
+ OUT resource_manager text,
+ OUT record_type text,
+ OUT record_length int4,
+ OUT main_data_length int4,
+ OUT fpi_length int4,
+ OUT description text,
+ OUT block_ref text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_wal_records_info_till_end_of_wal'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info_till_end_of_wal(pg_lsn) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_wal_records_info_till_end_of_wal(pg_lsn) TO pg_read_server_files;
+
+--
+-- pg_get_wal_stats()
+--
+CREATE FUNCTION pg_get_wal_stats(IN start_lsn pg_lsn,
+ IN end_lsn pg_lsn,
+ IN per_record boolean DEFAULT false,
+ OUT "resource_manager/record_type" text,
+ OUT count int8,
+ OUT count_percentage float4,
+ OUT record_size int8,
+ OUT record_size_percentage float4,
+ OUT fpi_size int8,
+ OUT fpi_size_percentage float4,
+ OUT combined_size int8,
+ OUT combined_size_percentage float4
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_wal_stats'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean) TO pg_read_server_files;
+
+--
+-- pg_get_wal_stats_till_end_of_wal()
+--
+CREATE FUNCTION pg_get_wal_stats_till_end_of_wal(IN start_lsn pg_lsn,
+ IN per_record boolean DEFAULT false,
+ OUT "resource_manager/record_type" text,
+ OUT count int8,
+ OUT count_percentage float4,
+ OUT record_size int8,
+ OUT record_size_percentage float4,
+ OUT fpi_size int8,
+ OUT fpi_size_percentage float4,
+ OUT combined_size int8,
+ OUT combined_size_percentage float4
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_wal_stats_till_end_of_wal'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_stats_till_end_of_wal(pg_lsn, boolean) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_wal_stats_till_end_of_wal(pg_lsn, boolean) TO pg_read_server_files;
diff --git a/contrib/pg_walinspect/pg_walinspect.c b/contrib/pg_walinspect/pg_walinspect.c
new file mode 100644
index 0000000000..58afa1ab1c
--- /dev/null
+++ b/contrib/pg_walinspect/pg_walinspect.c
@@ -0,0 +1,629 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_walinspect.c
+ * Functions to inspect contents of PostgreSQL Write-Ahead Log
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/pg_walinspect/pg_walinspect.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/xlog.h"
+#include "access/xlog_internal.h"
+#include "access/xlogreader.h"
+#include "access/xlogrecovery.h"
+#include "access/xlogstats.h"
+#include "access/xlogutils.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/pg_lsn.h"
+
+/*
+ * NOTE: For any code change or issue fix here, it is highly recommended to
+ * give a thought about doing the same in pg_waldump tool as well.
+ */
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(pg_get_wal_record_info);
+PG_FUNCTION_INFO_V1(pg_get_wal_records_info);
+PG_FUNCTION_INFO_V1(pg_get_wal_records_info_till_end_of_wal);
+PG_FUNCTION_INFO_V1(pg_get_wal_stats);
+PG_FUNCTION_INFO_V1(pg_get_wal_stats_till_end_of_wal);
+
+static bool IsFutureLSN(XLogRecPtr lsn, XLogRecPtr *curr_lsn);
+static XLogReaderState *InitXLogReaderState(XLogRecPtr lsn,
+ XLogRecPtr *first_record);
+static XLogRecord *ReadNextXLogRecord(XLogReaderState *xlogreader,
+ XLogRecPtr first_record);
+static void GetWALRecordInfo(XLogReaderState *record, XLogRecPtr lsn,
+ Datum *values, bool *nulls, uint32 ncols);
+static XLogRecPtr ValidateInputLSNs(bool till_end_of_wal,
+ XLogRecPtr start_lsn, XLogRecPtr end_lsn);
+static void GetWALRecordsInfo(FunctionCallInfo fcinfo, XLogRecPtr start_lsn,
+ XLogRecPtr end_lsn);
+static void GetXLogSummaryStats(XLogStats * stats, ReturnSetInfo *rsinfo,
+ Datum *values, bool *nulls, uint32 ncols,
+ bool stats_per_record);
+static void FillXLogStatsRow(const char *name, uint64 n, uint64 total_count,
+ uint64 rec_len, uint64 total_rec_len,
+ uint64 fpi_len, uint64 total_fpi_len,
+ uint64 tot_len, uint64 total_len,
+ Datum *values, bool *nulls, uint32 ncols);
+static void GetWalStats(FunctionCallInfo fcinfo, XLogRecPtr start_lsn,
+ XLogRecPtr end_lsn, bool stats_per_record);
+
+/*
+ * Check if the given LSN is in future. Also, return the LSN up to which the
+ * server has WAL.
+ */
+static bool
+IsFutureLSN(XLogRecPtr lsn, XLogRecPtr *curr_lsn)
+{
+ /*
+ * We determine the current LSN of the server similar to how page_read
+ * callback read_local_xlog_page_no_wait does.
+ */
+ if (!RecoveryInProgress())
+ *curr_lsn = GetFlushRecPtr(NULL);
+ else
+ *curr_lsn = GetXLogReplayRecPtr(NULL);
+
+ Assert(!XLogRecPtrIsInvalid(*curr_lsn));
+
+ if (lsn >= *curr_lsn)
+ return true;
+
+ return false;
+}
+
+/*
+ * Intialize WAL reader and identify first valid LSN.
+ */
+static XLogReaderState *
+InitXLogReaderState(XLogRecPtr lsn, XLogRecPtr *first_record)
+{
+ XLogReaderState *xlogreader;
+
+ /*
+ * Reading WAL below the first page of the first sgements isn't allowed.
+ * This is a bootstrap WAL page and the page_read callback fails to read
+ * it.
+ */
+ if (lsn < XLOG_BLCKSZ)
+ ereport(ERROR,
+ (errmsg("could not read WAL at LSN %X/%X",
+ LSN_FORMAT_ARGS(lsn))));
+
+ xlogreader = XLogReaderAllocate(wal_segment_size, NULL,
+ XL_ROUTINE(.page_read = &read_local_xlog_page_no_wait,
+ .segment_open = &wal_segment_open,
+ .segment_close = &wal_segment_close),
+ NULL);
+
+ if (xlogreader == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory"),
+ errdetail("Failed while allocating a WAL reading processor.")));
+
+ /* first find a valid recptr to start from */
+ *first_record = XLogFindNextRecord(xlogreader, lsn);
+
+ if (XLogRecPtrIsInvalid(*first_record))
+ ereport(ERROR,
+ (errmsg("could not find a valid record after %X/%X",
+ LSN_FORMAT_ARGS(lsn))));
+
+ return xlogreader;
+}
+
+/*
+ * Read next WAL record.
+ *
+ * By design, to be less intrusive in a running system, no slot is allocated
+ * to reserve the WAL we're about to read. Therefore this function can
+ * encounter read errors for historical WAL.
+ *
+ * We guard against ordinary errors trying to read WAL that hasn't been
+ * written yet by limiting end_lsn to the flushed WAL, but that can also
+ * encounter errors if the flush pointer falls in the middle of a record.
+ */
+static XLogRecord *
+ReadNextXLogRecord(XLogReaderState *xlogreader, XLogRecPtr first_record)
+{
+ XLogRecord *record;
+ char *errormsg;
+
+ record = XLogReadRecord(xlogreader, &errormsg);
+
+ if (record == NULL)
+ {
+ if (errormsg)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read WAL at %X/%X: %s",
+ LSN_FORMAT_ARGS(first_record), errormsg)));
+ else
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read WAL at %X/%X",
+ LSN_FORMAT_ARGS(first_record))));
+ }
+
+ return record;
+}
+
+/*
+ * Get a single WAL record info.
+ */
+static void
+GetWALRecordInfo(XLogReaderState *record, XLogRecPtr lsn,
+ Datum *values, bool *nulls, uint32 ncols)
+{
+ const char *id;
+ RmgrData desc;
+ uint32 fpi_len = 0;
+ StringInfoData rec_desc;
+ StringInfoData rec_blk_ref;
+ uint32 main_data_len;
+ int i = 0;
+
+ desc = GetRmgr(XLogRecGetRmid(record));
+ id = desc.rm_identify(XLogRecGetInfo(record));
+
+ if (id == NULL)
+ id = psprintf("UNKNOWN (%x)", XLogRecGetInfo(record) & ~XLR_INFO_MASK);
+
+ initStringInfo(&rec_desc);
+ desc.rm_desc(&rec_desc, record);
+
+ /* Block references. */
+ initStringInfo(&rec_blk_ref);
+ XLogRecGetBlockRefInfo(record, false, true, &rec_blk_ref, &fpi_len);
+
+ main_data_len = XLogRecGetDataLen(record);
+
+ values[i++] = LSNGetDatum(lsn);
+ values[i++] = LSNGetDatum(record->EndRecPtr);
+ values[i++] = LSNGetDatum(XLogRecGetPrev(record));
+ values[i++] = TransactionIdGetDatum(XLogRecGetXid(record));
+ values[i++] = CStringGetTextDatum(desc.rm_name);
+ values[i++] = CStringGetTextDatum(id);
+ values[i++] = UInt32GetDatum(XLogRecGetTotalLen(record));
+ values[i++] = UInt32GetDatum(main_data_len);
+ values[i++] = UInt32GetDatum(fpi_len);
+ values[i++] = CStringGetTextDatum(rec_desc.data);
+ values[i++] = CStringGetTextDatum(rec_blk_ref.data);
+
+ Assert(i == ncols);
+}
+
+/*
+ * Get WAL record info.
+ *
+ * This function emits an error if a future WAL LSN i.e. WAL LSN the database
+ * system doesn't know about is specified.
+ */
+Datum
+pg_get_wal_record_info(PG_FUNCTION_ARGS)
+{
+#define PG_GET_WAL_RECORD_INFO_COLS 11
+ Datum result;
+ Datum values[PG_GET_WAL_RECORD_INFO_COLS];
+ bool nulls[PG_GET_WAL_RECORD_INFO_COLS];
+ XLogRecPtr lsn;
+ XLogRecPtr curr_lsn;
+ XLogRecPtr first_record;
+ XLogReaderState *xlogreader;
+ TupleDesc tupdesc;
+ HeapTuple tuple;
+
+ lsn = PG_GETARG_LSN(0);
+
+ if (IsFutureLSN(lsn, &curr_lsn))
+ {
+ /*
+ * GetFlushRecPtr or GetXLogReplayRecPtr gives "end+1" LSN of the last
+ * record flushed or replayed respectively. But let's use the LSN up
+ * to "end" in user facing message.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot accept future input LSN"),
+ errdetail("Last known WAL LSN on the database system is at %X/%X.",
+ LSN_FORMAT_ARGS(curr_lsn))));
+ }
+
+ /* Build a tuple descriptor for our result type. */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ xlogreader = InitXLogReaderState(lsn, &first_record);
+
+ (void) ReadNextXLogRecord(xlogreader, first_record);
+
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ GetWALRecordInfo(xlogreader, first_record, values, nulls,
+ PG_GET_WAL_RECORD_INFO_COLS);
+
+ XLogReaderFree(xlogreader);
+
+ tuple = heap_form_tuple(tupdesc, values, nulls);
+ result = HeapTupleGetDatum(tuple);
+
+ PG_RETURN_DATUM(result);
+#undef PG_GET_WAL_RECORD_INFO_COLS
+}
+
+/*
+ * Validate the input LSNs and compute end LSN for till_end_of_wal versions.
+ */
+static XLogRecPtr
+ValidateInputLSNs(bool till_end_of_wal, XLogRecPtr start_lsn,
+ XLogRecPtr end_lsn)
+{
+ XLogRecPtr curr_lsn;
+
+ if (IsFutureLSN(start_lsn, &curr_lsn))
+ {
+ /*
+ * GetFlushRecPtr or GetXLogReplayRecPtr gives "end+1" LSN of the last
+ * record flushed or replayed respectively. But let's use the LSN up
+ * to "end" in user facing message.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot accept future start LSN"),
+ errdetail("Last known WAL LSN on the database system is at %X/%X.",
+ LSN_FORMAT_ARGS(curr_lsn))));
+ }
+
+ if (till_end_of_wal)
+ end_lsn = curr_lsn;
+
+ if (end_lsn > curr_lsn)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot accept future end LSN"),
+ errdetail("Last known WAL LSN on the database system is at %X/%X.",
+ LSN_FORMAT_ARGS(curr_lsn))));
+
+ if (start_lsn >= end_lsn)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("WAL start LSN must be less than end LSN")));
+
+ return end_lsn;
+}
+
+/*
+ * Get info and data of all WAL records between start LSN and end LSN.
+ */
+static void
+GetWALRecordsInfo(FunctionCallInfo fcinfo, XLogRecPtr start_lsn,
+ XLogRecPtr end_lsn)
+{
+#define PG_GET_WAL_RECORDS_INFO_COLS 11
+ XLogRecPtr first_record;
+ XLogReaderState *xlogreader;
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ Datum values[PG_GET_WAL_RECORDS_INFO_COLS];
+ bool nulls[PG_GET_WAL_RECORDS_INFO_COLS];
+
+ SetSingleFuncCall(fcinfo, 0);
+
+ xlogreader = InitXLogReaderState(start_lsn, &first_record);
+
+ Assert(xlogreader);
+
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ for (;;)
+ {
+ (void) ReadNextXLogRecord(xlogreader, first_record);
+
+ if (xlogreader->EndRecPtr <= end_lsn)
+ {
+ GetWALRecordInfo(xlogreader, xlogreader->currRecPtr, values, nulls,
+ PG_GET_WAL_RECORDS_INFO_COLS);
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+
+ /* if we read up to end_lsn, we're done */
+ if (xlogreader->EndRecPtr >= end_lsn)
+ break;
+
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ XLogReaderFree(xlogreader);
+
+#undef PG_GET_WAL_RECORDS_INFO_COLS
+}
+
+/*
+ * Get info and data of all WAL records between start LSN and end LSN.
+ *
+ * This function emits an error if a future start or end WAL LSN i.e. WAL LSN
+ * the database system doesn't know about is specified.
+ */
+Datum
+pg_get_wal_records_info(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr start_lsn;
+ XLogRecPtr end_lsn;
+
+ start_lsn = PG_GETARG_LSN(0);
+ end_lsn = PG_GETARG_LSN(1);
+
+ end_lsn = ValidateInputLSNs(false, start_lsn, end_lsn);
+
+ GetWALRecordsInfo(fcinfo, start_lsn, end_lsn);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Get info and data of all WAL records from start LSN till end of WAL.
+ *
+ * This function emits an error if a future start i.e. WAL LSN the database
+ * system doesn't know about is specified.
+ */
+Datum
+pg_get_wal_records_info_till_end_of_wal(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr start_lsn;
+ XLogRecPtr end_lsn = InvalidXLogRecPtr;
+
+ start_lsn = PG_GETARG_LSN(0);
+
+ end_lsn = ValidateInputLSNs(true, start_lsn, end_lsn);
+
+ GetWALRecordsInfo(fcinfo, start_lsn, end_lsn);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Fill single row of record counts and sizes for an rmgr or record.
+ */
+static void
+FillXLogStatsRow(const char *name,
+ uint64 n, uint64 total_count,
+ uint64 rec_len, uint64 total_rec_len,
+ uint64 fpi_len, uint64 total_fpi_len,
+ uint64 tot_len, uint64 total_len,
+ Datum *values, bool *nulls, uint32 ncols)
+{
+ double n_pct,
+ rec_len_pct,
+ fpi_len_pct,
+ tot_len_pct;
+ int i = 0;
+
+ n_pct = 0;
+ if (total_count != 0)
+ n_pct = 100 * (double) n / total_count;
+
+ rec_len_pct = 0;
+ if (total_rec_len != 0)
+ rec_len_pct = 100 * (double) rec_len / total_rec_len;
+
+ fpi_len_pct = 0;
+ if (total_fpi_len != 0)
+ fpi_len_pct = 100 * (double) fpi_len / total_fpi_len;
+
+ tot_len_pct = 0;
+ if (total_len != 0)
+ tot_len_pct = 100 * (double) tot_len / total_len;
+
+ values[i++] = CStringGetTextDatum(name);
+ values[i++] = Int64GetDatum(n);
+ values[i++] = Float4GetDatum(n_pct);
+ values[i++] = Int64GetDatum(rec_len);
+ values[i++] = Float4GetDatum(rec_len_pct);
+ values[i++] = Int64GetDatum(fpi_len);
+ values[i++] = Float4GetDatum(fpi_len_pct);
+ values[i++] = Int64GetDatum(tot_len);
+ values[i++] = Float4GetDatum(tot_len_pct);
+
+ Assert(i == ncols);
+}
+
+/*
+ * Get summary statistics about the records seen so far.
+ */
+static void
+GetXLogSummaryStats(XLogStats *stats, ReturnSetInfo *rsinfo,
+ Datum *values, bool *nulls, uint32 ncols,
+ bool stats_per_record)
+{
+ uint64 total_count = 0;
+ uint64 total_rec_len = 0;
+ uint64 total_fpi_len = 0;
+ uint64 total_len = 0;
+ int ri;
+
+ /*
+ * Each row shows its percentages of the total, so make a first pass to
+ * calculate column totals.
+ */
+ for (ri = 0; ri <= RM_MAX_ID; ri++)
+ {
+ if (!RmgrIdIsValid(ri))
+ continue;
+
+ total_count += stats->rmgr_stats[ri].count;
+ total_rec_len += stats->rmgr_stats[ri].rec_len;
+ total_fpi_len += stats->rmgr_stats[ri].fpi_len;
+ }
+ total_len = total_rec_len + total_fpi_len;
+
+ for (ri = 0; ri <= RM_MAX_ID; ri++)
+ {
+ uint64 count;
+ uint64 rec_len;
+ uint64 fpi_len;
+ uint64 tot_len;
+ RmgrData desc;
+
+ if (!RmgrIdIsValid(ri))
+ continue;
+
+ if (!RmgrIdExists(ri))
+ continue;
+
+ desc = GetRmgr(ri);
+
+ if (stats_per_record)
+ {
+ int rj;
+
+ for (rj = 0; rj < MAX_XLINFO_TYPES; rj++)
+ {
+ const char *id;
+
+ count = stats->record_stats[ri][rj].count;
+ rec_len = stats->record_stats[ri][rj].rec_len;
+ fpi_len = stats->record_stats[ri][rj].fpi_len;
+ tot_len = rec_len + fpi_len;
+
+ /* Skip undefined combinations and ones that didn't occur */
+ if (count == 0)
+ continue;
+
+ /* the upper four bits in xl_info are the rmgr's */
+ id = desc.rm_identify(rj << 4);
+ if (id == NULL)
+ id = psprintf("UNKNOWN (%x)", rj << 4);
+
+ FillXLogStatsRow(psprintf("%s/%s", desc.rm_name, id), count,
+ total_count, rec_len, total_rec_len, fpi_len,
+ total_fpi_len, tot_len, total_len,
+ values, nulls, ncols);
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+ }
+ else
+ {
+ count = stats->rmgr_stats[ri].count;
+ rec_len = stats->rmgr_stats[ri].rec_len;
+ fpi_len = stats->rmgr_stats[ri].fpi_len;
+ tot_len = rec_len + fpi_len;
+
+ FillXLogStatsRow(desc.rm_name, count, total_count, rec_len,
+ total_rec_len, fpi_len, total_fpi_len, tot_len,
+ total_len, values, nulls, ncols);
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+ }
+}
+
+/*
+ * Get WAL stats between start LSN and end LSN.
+ */
+static void
+GetWalStats(FunctionCallInfo fcinfo, XLogRecPtr start_lsn,
+ XLogRecPtr end_lsn, bool stats_per_record)
+{
+#define PG_GET_WAL_STATS_COLS 9
+ XLogRecPtr first_record;
+ XLogReaderState *xlogreader;
+ XLogStats stats;
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ Datum values[PG_GET_WAL_STATS_COLS];
+ bool nulls[PG_GET_WAL_STATS_COLS];
+
+ SetSingleFuncCall(fcinfo, 0);
+
+ xlogreader = InitXLogReaderState(start_lsn, &first_record);
+
+ MemSet(&stats, 0, sizeof(stats));
+
+ for (;;)
+ {
+ (void) ReadNextXLogRecord(xlogreader, first_record);
+
+ if (xlogreader->EndRecPtr <= end_lsn)
+ XLogRecStoreStats(&stats, xlogreader);
+
+ /* if we read up to end_lsn, we're done */
+ if (xlogreader->EndRecPtr >= end_lsn)
+ break;
+
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ XLogReaderFree(xlogreader);
+
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ GetXLogSummaryStats(&stats, rsinfo, values, nulls,
+ PG_GET_WAL_STATS_COLS,
+ stats_per_record);
+
+#undef PG_GET_WAL_STATS_COLS
+}
+
+/*
+ * Get stats of all WAL records between start LSN and end LSN.
+ *
+ * This function emits an error if a future start or end WAL LSN i.e. WAL LSN
+ * the database system doesn't know about is specified.
+ */
+Datum
+pg_get_wal_stats(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr start_lsn;
+ XLogRecPtr end_lsn;
+ bool stats_per_record;
+
+ start_lsn = PG_GETARG_LSN(0);
+ end_lsn = PG_GETARG_LSN(1);
+ stats_per_record = PG_GETARG_BOOL(2);
+
+ end_lsn = ValidateInputLSNs(false, start_lsn, end_lsn);
+
+ GetWalStats(fcinfo, start_lsn, end_lsn, stats_per_record);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Get stats of all WAL records from start LSN till end of WAL.
+ *
+ * This function emits an error if a future start i.e. WAL LSN the database
+ * system doesn't know about is specified.
+ */
+Datum
+pg_get_wal_stats_till_end_of_wal(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr start_lsn;
+ XLogRecPtr end_lsn = InvalidXLogRecPtr;
+ bool stats_per_record;
+
+ start_lsn = PG_GETARG_LSN(0);
+ stats_per_record = PG_GETARG_BOOL(1);
+
+ end_lsn = ValidateInputLSNs(true, start_lsn, end_lsn);
+
+ GetWalStats(fcinfo, start_lsn, end_lsn, stats_per_record);
+
+ PG_RETURN_VOID();
+}
diff --git a/contrib/pg_walinspect/pg_walinspect.control b/contrib/pg_walinspect/pg_walinspect.control
new file mode 100644
index 0000000000..017e56a2bb
--- /dev/null
+++ b/contrib/pg_walinspect/pg_walinspect.control
@@ -0,0 +1,5 @@
+# pg_walinspect extension
+comment = 'functions to inspect contents of PostgreSQL Write-Ahead Log'
+default_version = '1.0'
+module_pathname = '$libdir/pg_walinspect'
+relocatable = true
diff --git a/contrib/pg_walinspect/sql/pg_walinspect.sql b/contrib/pg_walinspect/sql/pg_walinspect.sql
new file mode 100644
index 0000000000..1a9fd09e7b
--- /dev/null
+++ b/contrib/pg_walinspect/sql/pg_walinspect.sql
@@ -0,0 +1,120 @@
+CREATE EXTENSION pg_walinspect;
+
+CREATE TABLE sample_tbl(col1 int, col2 int);
+
+-- Make sure checkpoints don't interfere with the test.
+SELECT lsn as wal_lsn1 FROM
+ pg_create_physical_replication_slot('regress_pg_walinspect_slot', true, false)
+ \gset
+
+INSERT INTO sample_tbl SELECT * FROM generate_series(1, 2);
+
+SELECT pg_current_wal_lsn() AS wal_lsn2 \gset
+
+INSERT INTO sample_tbl SELECT * FROM generate_series(1, 2);
+
+-- ===================================================================
+-- Tests for input validation
+-- ===================================================================
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info(:'wal_lsn2', :'wal_lsn1'); -- ERROR
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats(:'wal_lsn2', :'wal_lsn1'); -- ERROR
+
+-- ===================================================================
+-- Tests for all function executions
+-- ===================================================================
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_record_info(:'wal_lsn1');
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2');
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info_till_end_of_wal(:'wal_lsn1');
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats(:'wal_lsn1', :'wal_lsn2');
+
+SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats_till_end_of_wal(:'wal_lsn1');
+
+-- ===================================================================
+-- Test for filtering out WAL records of a particular table
+-- ===================================================================
+
+SELECT oid AS sample_tbl_oid FROM pg_class WHERE relname = 'sample_tbl' \gset
+
+SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2')
+ WHERE block_ref LIKE concat('%', :'sample_tbl_oid', '%') AND resource_manager = 'Heap';
+
+-- ===================================================================
+-- Test for filtering out WAL records based on resource_manager and
+-- record_type
+-- ===================================================================
+
+SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2')
+ WHERE resource_manager = 'Heap' AND record_type = 'INSERT';
+
+-- ===================================================================
+-- Tests for permissions
+-- ===================================================================
+CREATE ROLE regress_pg_walinspect;
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- no
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- no
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- no
+
+-- Functions accessible by users with role pg_read_server_files
+
+GRANT pg_read_server_files TO regress_pg_walinspect;
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes
+
+REVOKE pg_read_server_files FROM regress_pg_walinspect;
+
+-- Superuser can grant execute to other users
+GRANT EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn)
+ TO regress_pg_walinspect;
+
+GRANT EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn)
+ TO regress_pg_walinspect;
+
+GRANT EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean)
+ TO regress_pg_walinspect;
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
+
+SELECT has_function_privilege('regress_pg_walinspect',
+ 'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn)
+ FROM regress_pg_walinspect;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn)
+ FROM regress_pg_walinspect;
+
+REVOKE EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean)
+ FROM regress_pg_walinspect;
+
+-- ===================================================================
+-- Clean up
+-- ===================================================================
+
+DROP ROLE regress_pg_walinspect;
+
+SELECT pg_drop_replication_slot('regress_pg_walinspect_slot');
+
+DROP TABLE sample_tbl;
diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml
index 1e42ce1a7f..4e7b87a42f 100644
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -131,6 +131,7 @@ CREATE EXTENSION module_name;
&pgsurgery;
&pgtrgm;
&pgvisibility;
+ &pgwalinspect;
&postgres-fdw;
&seg;
&sepgsql;
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 7dea670969..1e82cb2d3d 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -148,6 +148,7 @@
+
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 29c4376886..5047e090db 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -29205,7 +29205,7 @@ postgres=# SELECT * FROM pg_walfile_name_offset((pg_backup_stop()).lsn);
-
+
pg_logical_emit_message
diff --git a/doc/src/sgml/pgwalinspect.sgml b/doc/src/sgml/pgwalinspect.sgml
new file mode 100644
index 0000000000..44010a5b0a
--- /dev/null
+++ b/doc/src/sgml/pgwalinspect.sgml
@@ -0,0 +1,275 @@
+
+
+
+ pg_walinspect
+
+
+ pg_walinspect
+
+
+
+ The pg_walinspect module provides SQL functions that
+ allow you to inspect the contents of write-ahead log of
+ a running PostgreSQL database cluster at a low
+ level, which is useful for debugging or analytical or reporting or
+ educational purposes. It is similar to , but
+ accessible through SQL rather than a separate utility.
+
+
+
+ All the functions of this module will provide the WAL information using the
+ current server's timeline ID.
+
+
+
+ All the functions of this module will try to find the first valid WAL record
+ that is at or after the given in_lsn or
+ start_lsn and will emit error if no such record
+ is available. Similarly, the end_lsn must be
+ available, and if it falls in the middle of a record, the entire record must
+ be available.
+
+
+
+
+ Some functions, such as pg_logical_emit_message,
+ return the LSN after the record just
+ inserted. Therefore, if you pass that LSN as
+ in_lsn or start_lsn
+ to one of these functions, it will return the next
+ record.
+
+
+
+ By default, use of these functions is restricted to superusers and members of
+ the pg_read_server_files role. Access may be granted by
+ superusers to others using GRANT.
+
+
+
+ General Functions
+
+
+
+
+
+ pg_get_wal_record_info(in_lsn pg_lsn,
+ start_lsn OUT pg_lsn,
+ end_lsn OUT pg_lsn,
+ prev_lsn OUT pg_lsn,
+ xid OUT xid,
+ resource_manager OUT text,
+ record_type OUT text,
+ record_length OUT int4,
+ main_data_length OUT int4,
+ fpi_length OUT int4,
+ description OUT text,
+ block_ref OUT text)
+
+
+
+
+
+ Gets WAL record information of a given LSN. If the given LSN isn't
+ at the start of a WAL record, it gives the information of the next
+ available valid WAL record; or an error if no such record is found.
+
+
+
+
+
+
+
+ pg_get_wal_records_info(start_lsn pg_lsn,
+ end_lsn pg_lsn,
+ start_lsn OUT pg_lsn,
+ end_lsn OUT pg_lsn,
+ prev_lsn OUT pg_lsn,
+ xid OUT xid,
+ resource_manager OUT text,
+ record_type OUT text,
+ record_length OUT int4,
+ main_data_length OUT int4,
+ fpi_length OUT int4,
+ description OUT text,
+ block_ref OUT text)
+ returns setof record
+
+
+
+
+
+ Gets information of all the valid WAL records between
+ start_lsn and end_lsn.
+ Returns one row per WAL record. If start_lsn
+ or end_lsn are not yet available, the
+ function will raise an error. For example, usage of the function is as
+ follows:
+
+postgres=# select start_lsn, end_lsn, prev_lsn, xid, resource_manager, record_type, record_length, main_data_length, fpi_length, description from pg_get_wal_records_info('0/14F9A30', '0/15011D7');
+ start_lsn | end_lsn | prev_lsn | xid | resource_manager | record_type | record_length | main_data_length | fpi_length | description
+-----------+-----------+-----------+-----+------------------+--------------+---------------+------------------+------------+---------------------
+ 0/14FA118 | 0/14FB4B0 | 0/14F9958 | 725 | Btree | INSERT_LEAF | 5013 | 2 | 4960 | off 246
+ 0/14FB4B0 | 0/14FD050 | 0/14FA118 | 725 | Btree | INSERT_LEAF | 7045 | 2 | 6992 | off 130
+ 0/14FD050 | 0/14FD0A8 | 0/14FB4B0 | 725 | Heap2 | MULTI_INSERT | 85 | 6 | 0 | 1 tuples flags 0x02
+ 0/14FD0A8 | 0/14FD0F0 | 0/14FD050 | 725 | Btree | INSERT_LEAF | 72 | 2 | 0 | off 155
+ 0/14FD0F0 | 0/14FD138 | 0/14FD0A8 | 725 | Btree | INSERT_LEAF | 72 | 2 | 0 | off 134
+ 0/14FD138 | 0/14FD210 | 0/14FD0F0 | 725 | Heap | INSERT | 211 | 3 | 0 | off 11 flags 0x00
+ 0/14FD210 | 0/14FD250 | 0/14FD138 | 725 | Btree | INSERT_LEAF | 64 | 2 | 0 | off 246
+ 0/14FD250 | 0/14FF260 | 0/14FD210 | 725 | Btree | INSERT_LEAF | 8181 | 2 | 8128 | off 47
+ 0/14FF260 | 0/14FF2B8 | 0/14FD250 | 725 | Heap2 | MULTI_INSERT | 85 | 6 | 0 | 1 tuples flags 0x02
+ 0/14FF2B8 | 0/14FF300 | 0/14FF260 | 725 | Btree | INSERT_LEAF | 72 | 2 | 0 | off 155
+ 0/14FF300 | 0/15008D8 | 0/14FF2B8 | 725 | Btree | INSERT_LEAF | 5565 | 2 | 5512 | off 106
+ 0/15008D8 | 0/1500C48 | 0/14FF300 | 725 | Heap | INSERT | 874 | 3 | 820 | off 2 flags 0x01
+(12 rows)
+
+
+
+
+
+
+
+
+ pg_get_wal_records_info_till_end_of_wal(start_lsn pg_lsn,
+ start_lsn OUT pg_lsn,
+ end_lsn OUT pg_lsn,
+ prev_lsn OUT pg_lsn,
+ xid OUT xid,
+ resource_manager OUT text,
+ record_type OUT text,
+ record_length OUT int4,
+ main_data_length OUT int4,
+ fpi_length OUT int4,
+ description OUT text,
+ block_ref OUT text)
+ returns setof record
+
+
+
+
+
+ This function is same as pg_get_wal_records_info()
+ except that it gets information of all the valid WAL records from
+ start_lsn till the end of WAL.
+
+
+
+
+
+
+
+ pg_get_wal_stats(start_lsn pg_lsn,
+ end_lsn pg_lsn,
+ per_record boolean DEFAULT false,
+ "resource_manager/record_type" OUT text,
+ count OUT int8,
+ count_percentage OUT float4,
+ record_length OUT int8,
+ record_length_percentage OUT float4,
+ fpi_length OUT int8,
+ fpi_length_percentage OUT float4,
+ combined_size OUT int8,
+ combined_size_percentage OUT float4)
+ returns setof record
+
+
+
+
+
+ Gets statistics of all the valid WAL records between
+ start_lsn and
+ end_lsn. By default, it returns one row per
+ resource_manager type. When
+ per_record is set to true,
+ it returns one row per record_type.
+ If start_lsn
+ or end_lsn are not yet available, the
+ function will raise an error. For example, usage of the function is as
+ follows:
+
+postgres=# select * from pg_get_wal_stats('0/12FBA30', '0/15011D7') where count > 0;
+ resource_manager/record_type | count | count_percentage | record_size | record_size_percentage | fpi_size | fpi_size_percentage | combined_size | combined_size_percentage
+------------------------------+-------+------------------+-------------+------------------------+----------+---------------------+---------------+--------------------------
+ XLOG | 10 | 0.10871929 | 796 | 0.052369177 | 352 | 0.061031006 | 1148 | 0.054751817
+ Transaction | 187 | 2.0330508 | 62773 | 4.1298623 | 0 | 0 | 62773 | 2.9938467
+ Storage | 13 | 0.14133507 | 546 | 0.035921574 | 0 | 0 | 546 | 0.0260405
+ Database | 2 | 0.021743858 | 84 | 0.005526396 | 0 | 0 | 84 | 0.0040062307
+ Standby | 218 | 2.3700805 | 15908 | 1.0465941 | 0 | 0 | 15908 | 0.75870377
+ Heap2 | 1897 | 20.624048 | 383916 | 25.257998 | 364472 | 63.193447 | 748388 | 35.693035
+ Heap | 1318 | 14.329202 | 621390 | 40.88151 | 139660 | 24.214746 | 761050 | 36.29693
+ Btree | 5553 | 60.37182 | 434565 | 28.590216 | 72272 | 12.530776 | 506837 | 24.17269
+(8 rows)
+
+
+With per_record passed as true:
+
+
+postgres=# select * from pg_get_wal_stats('0/14AFC30', '0/15011D7', true) where count > 0;
+ resource_manager/record_type | count | count_percentage | record_size | record_size_percentage | fpi_size | fpi_size_percentage | combined_size | combined_size_percentage
+------------------------------+-------+------------------+-------------+------------------------+----------+---------------------+---------------+--------------------------
+ XLOG/CHECKPOINT_SHUTDOWN | 1 | 0.32894737 | 114 | 0.22891566 | 0 | 0 | 114 | 0.03534489
+ XLOG/CHECKPOINT_ONLINE | 4 | 1.3157895 | 456 | 0.91566265 | 0 | 0 | 456 | 0.14137957
+ XLOG/NEXTOID | 1 | 0.32894737 | 30 | 0.060240965 | 0 | 0 | 30 | 0.009301287
+ Transaction/COMMIT | 9 | 2.9605262 | 1173 | 2.3554218 | 0 | 0 | 1173 | 0.36368033
+ Storage/CREATE | 1 | 0.32894737 | 42 | 0.084337346 | 0 | 0 | 42 | 0.0130218025
+ Database/CREATE_FILE_COPY | 2 | 0.65789473 | 84 | 0.16867469 | 0 | 0 | 84 | 0.026043605
+ Standby/RUNNING_XACTS | 6 | 1.9736842 | 316 | 0.6345382 | 0 | 0 | 316 | 0.09797356
+ Standby/INVALIDATIONS | 45 | 14.802631 | 4018 | 8.068274 | 0 | 0 | 4018 | 1.2457525
+ Heap2/PRUNE | 4 | 1.3157895 | 270 | 0.5421687 | 0 | 0 | 270 | 0.08371159
+ Heap2/FREEZE_PAGE | 27 | 8.881579 | 20904 | 41.975903 | 0 | 0 | 20904 | 6.481137
+ Heap2/VISIBLE | 29 | 9.539474 | 1756 | 3.5261045 | 73728 | 27.032736 | 75484 | 23.403278
+ Heap2/MULTI_INSERT | 13 | 4.2763157 | 1049 | 2.1064258 | 12216 | 4.479057 | 13265 | 4.112719
+ Heap/INSERT | 19 | 6.25 | 2324 | 4.6666665 | 43884 | 16.090284 | 46208 | 14.326463
+ Heap/UPDATE | 7 | 2.3026316 | 511 | 1.0261045 | 54340 | 19.924028 | 54851 | 17.006165
+ Heap/HOT_UPDATE | 11 | 3.618421 | 1134 | 2.2771084 | 468 | 0.17159452 | 1602 | 0.49668875
+ Heap/LOCK | 8 | 2.631579 | 432 | 0.8674699 | 0 | 0 | 432 | 0.13393854
+ Heap/INPLACE | 45 | 14.802631 | 9123 | 18.319277 | 16076 | 5.894345 | 25199 | 7.8127713
+ Heap/UPDATE+INIT | 1 | 0.32894737 | 817 | 1.6405623 | 0 | 0 | 817 | 0.25330505
+ Btree/INSERT_LEAF | 70 | 23.026316 | 5183 | 10.407631 | 72024 | 26.407955 | 77207 | 23.937483
+ Btree/DEDUP | 1 | 0.32894737 | 64 | 0.12851405 | 0 | 0 | 64 | 0.019842746
+(20 rows)
+
+
+
+
+
+
+
+
+ pg_get_wal_stats_till_end_of_wal(start_lsn pg_lsn,
+ per_record boolean DEFAULT false,
+ "resource_manager/record_type" OUT text,
+ count OUT int8,
+ count_percentage OUT float4,
+ record_length OUT int8,
+ record_length_percentage OUT float4,
+ fpi_length OUT int8,
+ fpi_length_percentage OUT float4,
+ combined_size OUT int8,
+ combined_size_percentage OUT float4)
+ returns setof record
+
+
+
+
+
+ This function is same as pg_get_wal_stats() except
+ that it gets statistics of all the valid WAL records from
+ start_lsn till end of WAL.
+
+
+
+
+
+
+
+
+ Author
+
+
+ Bharath Rupireddy bharath.rupireddyforpostgres@gmail.com
+
+
+
+
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index e7452af679..dff1e7685e 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -200,3 +200,133 @@ xlog_identify(uint8 info)
return id;
}
+
+/*
+ * Returns a string giving information about all the blocks in an
+ * XLogRecord.
+ */
+void
+XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty,
+ bool detailed_format, StringInfo buf,
+ uint32 *fpi_len)
+{
+ int block_id;
+
+ Assert(record != NULL);
+
+ if (detailed_format && pretty)
+ appendStringInfoChar(buf, '\n');
+
+ for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
+ {
+ RelFileNode rnode = {InvalidOid, InvalidOid, InvalidOid};
+ ForkNumber forknum = InvalidForkNumber;
+ BlockNumber blk = InvalidBlockNumber;
+
+ if (!XLogRecHasBlockRef(record, block_id))
+ continue;
+
+ XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
+
+ if (detailed_format)
+ {
+ /* Get block references in detailed format. */
+
+ if (pretty)
+ appendStringInfoChar(buf, '\t');
+ else if (block_id > 0)
+ appendStringInfoChar(buf, ' ');
+
+ appendStringInfo(buf,
+ "blkref #%d: rel %u/%u/%u fork %s blk %u",
+ block_id,
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ forkNames[forknum],
+ blk);
+
+ if (XLogRecHasBlockImage(record, block_id))
+ {
+ uint8 bimg_info = XLogRecGetBlock(record, block_id)->bimg_info;
+
+ /* Calculate the amount of FPI data in the record. */
+ if (fpi_len)
+ *fpi_len += XLogRecGetBlock(record, block_id)->bimg_len;
+
+ if (BKPIMAGE_COMPRESSED(bimg_info))
+ {
+ const char *method;
+
+ if ((bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0)
+ method = "pglz";
+ else if ((bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0)
+ method = "lz4";
+ else if ((bimg_info & BKPIMAGE_COMPRESS_ZSTD) != 0)
+ method = "zstd";
+ else
+ method = "unknown";
+
+ appendStringInfo(buf,
+ " (FPW%s); hole: offset: %u, length: %u, "
+ "compression saved: %u, method: %s",
+ XLogRecBlockImageApply(record, block_id) ?
+ "" : " for WAL verification",
+ XLogRecGetBlock(record, block_id)->hole_offset,
+ XLogRecGetBlock(record, block_id)->hole_length,
+ BLCKSZ -
+ XLogRecGetBlock(record, block_id)->hole_length -
+ XLogRecGetBlock(record, block_id)->bimg_len,
+ method);
+ }
+ else
+ {
+ appendStringInfo(buf,
+ " (FPW%s); hole: offset: %u, length: %u",
+ XLogRecBlockImageApply(record, block_id) ?
+ "" : " for WAL verification",
+ XLogRecGetBlock(record, block_id)->hole_offset,
+ XLogRecGetBlock(record, block_id)->hole_length);
+ }
+ }
+
+ if (pretty)
+ appendStringInfoChar(buf, '\n');
+ }
+ else
+ {
+ /* Get block references in short format. */
+
+ if (forknum != MAIN_FORKNUM)
+ {
+ appendStringInfo(buf,
+ ", blkref #%d: rel %u/%u/%u fork %s blk %u",
+ block_id,
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ forkNames[forknum],
+ blk);
+ }
+ else
+ {
+ appendStringInfo(buf,
+ ", blkref #%d: rel %u/%u/%u blk %u",
+ block_id,
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ blk);
+ }
+
+ if (XLogRecHasBlockImage(record, block_id))
+ {
+ /* Calculate the amount of FPI data in the record. */
+ if (fpi_len)
+ *fpi_len += XLogRecGetBlock(record, block_id)->bimg_len;
+
+ if (XLogRecBlockImageApply(record, block_id))
+ appendStringInfo(buf, " FPW");
+ else
+ appendStringInfo(buf, " FPW for WAL verification");
+ }
+ }
+ }
+
+ if (!detailed_format && pretty)
+ appendStringInfoChar(buf, '\n');
+}
diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile
index 8c17c88dfc..3e5444a6f7 100644
--- a/src/backend/access/transam/Makefile
+++ b/src/backend/access/transam/Makefile
@@ -34,6 +34,7 @@ OBJS = \
xlogprefetcher.o \
xlogreader.o \
xlogrecovery.o \
+ xlogstats.o \
xlogutils.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 5862d9dc75..a5f1a648d3 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -1320,13 +1320,6 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
return true;
}
-#ifdef FRONTEND
-/*
- * Functions that are currently not needed in the backend, but are better
- * implemented inside xlogreader.c because of the internal facilities available
- * here.
- */
-
/*
* Find the first record with an lsn >= RecPtr.
*
@@ -1447,8 +1440,6 @@ err:
return InvalidXLogRecPtr;
}
-#endif /* FRONTEND */
-
/*
* Helper function to ease writing of XLogRoutine->page_read callbacks.
* If this function is used, caller must supply a segment_open callback in
diff --git a/src/backend/access/transam/xlogstats.c b/src/backend/access/transam/xlogstats.c
new file mode 100644
index 0000000000..aff3069ecb
--- /dev/null
+++ b/src/backend/access/transam/xlogstats.c
@@ -0,0 +1,93 @@
+/*-------------------------------------------------------------------------
+ *
+ * xlogstats.c
+ * Functions for WAL Statitstics
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/access/transam/xlogstats.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/xlogreader.h"
+#include "access/xlogstats.h"
+
+/*
+ * Calculate the size of a record, split into !FPI and FPI parts.
+ */
+void
+XLogRecGetLen(XLogReaderState *record, uint32 *rec_len,
+ uint32 *fpi_len)
+{
+ int block_id;
+
+ /*
+ * Calculate the amount of FPI data in the record.
+ *
+ * XXX: We peek into xlogreader's private decoded backup blocks for the
+ * bimg_len indicating the length of FPI data.
+ */
+ *fpi_len = 0;
+ for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
+ {
+ if (XLogRecHasBlockImage(record, block_id))
+ *fpi_len += XLogRecGetBlock(record, block_id)->bimg_len;
+ }
+
+ /*
+ * Calculate the length of the record as the total length - the length of
+ * all the block images.
+ */
+ *rec_len = XLogRecGetTotalLen(record) - *fpi_len;
+}
+
+/*
+ * Store per-rmgr and per-record statistics for a given record.
+ */
+void
+XLogRecStoreStats(XLogStats *stats, XLogReaderState *record)
+{
+ RmgrId rmid;
+ uint8 recid;
+ uint32 rec_len;
+ uint32 fpi_len;
+
+ Assert(stats != NULL && record != NULL);
+
+ stats->count++;
+
+ rmid = XLogRecGetRmid(record);
+
+ XLogRecGetLen(record, &rec_len, &fpi_len);
+
+ /* Update per-rmgr statistics */
+
+ stats->rmgr_stats[rmid].count++;
+ stats->rmgr_stats[rmid].rec_len += rec_len;
+ stats->rmgr_stats[rmid].fpi_len += fpi_len;
+
+ /*
+ * Update per-record statistics, where the record is identified by a
+ * combination of the RmgrId and the four bits of the xl_info field that
+ * are the rmgr's domain (resulting in sixteen possible entries per
+ * RmgrId).
+ */
+
+ recid = XLogRecGetInfo(record) >> 4;
+
+ /*
+ * XACT records need to be handled differently. Those records use the
+ * first bit of those four bits for an optional flag variable and the
+ * following three bits for the opcode. We filter opcode out of xl_info
+ * and use it as the identifier of the record.
+ */
+ if (rmid == RM_XACT_ID)
+ recid &= 0x07;
+
+ stats->record_stats[rmid][recid].count++;
+ stats->record_stats[rmid][recid].rec_len += rec_len;
+ stats->record_stats[rmid][recid].fpi_len += fpi_len;
+}
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index bb2d3ec991..b5d34c61e6 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -80,6 +80,10 @@ typedef struct xl_invalid_page
static HTAB *invalid_page_tab = NULL;
+static int
+read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
+ int reqLen, XLogRecPtr targetRecPtr,
+ char *cur_page, bool wait_for_wal);
/* Report a reference to an invalid page */
static void
@@ -870,6 +874,31 @@ wal_segment_close(XLogReaderState *state)
int
read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
+{
+ return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
+ targetRecPtr, cur_page, true);
+}
+
+/*
+ * Same as read_local_xlog_page except that it doesn't wait for future WAL
+ * to be available.
+ */
+int
+read_local_xlog_page_no_wait(XLogReaderState *state, XLogRecPtr targetPagePtr,
+ int reqLen, XLogRecPtr targetRecPtr,
+ char *cur_page)
+{
+ return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
+ targetRecPtr, cur_page, false);
+}
+
+/*
+ * Implementation of read_local_xlog_page and its no wait version.
+ */
+static int
+read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
+ int reqLen, XLogRecPtr targetRecPtr,
+ char *cur_page, bool wait_for_wal)
{
XLogRecPtr read_upto,
loc;
@@ -925,6 +954,10 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
if (loc <= read_upto)
break;
+ /* If asked, let's not wait for future WAL. */
+ if (!wait_for_wal)
+ break;
+
CHECK_FOR_INTERRUPTS();
pg_usleep(1000L);
}
diff --git a/src/bin/pg_waldump/.gitignore b/src/bin/pg_waldump/.gitignore
index 3be00a8b61..dabb6e34b6 100644
--- a/src/bin/pg_waldump/.gitignore
+++ b/src/bin/pg_waldump/.gitignore
@@ -23,6 +23,7 @@
/xactdesc.c
/xlogdesc.c
/xlogreader.c
+/xlogstat.c
# Generated by test suite
/tmp_check/
diff --git a/src/bin/pg_waldump/Makefile b/src/bin/pg_waldump/Makefile
index 9f333d0c8a..d6459e17c7 100644
--- a/src/bin/pg_waldump/Makefile
+++ b/src/bin/pg_waldump/Makefile
@@ -13,7 +13,8 @@ OBJS = \
compat.o \
pg_waldump.o \
rmgrdesc.o \
- xlogreader.o
+ xlogreader.o \
+ xlogstats.o
override CPPFLAGS := -DFRONTEND $(CPPFLAGS)
@@ -29,6 +30,9 @@ pg_waldump: $(OBJS) | submake-libpgport
xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/%
rm -f $@ && $(LN_S) $< .
+xlogstats.c: % : $(top_srcdir)/src/backend/access/transam/%
+ rm -f $@ && $(LN_S) $< .
+
$(RMGRDESCSOURCES): % : $(top_srcdir)/src/backend/access/rmgrdesc/%
rm -f $@ && $(LN_S) $< .
@@ -42,7 +46,7 @@ uninstall:
rm -f '$(DESTDIR)$(bindir)/pg_waldump$(X)'
clean distclean maintainer-clean:
- rm -f pg_waldump$(X) $(OBJS) $(RMGRDESCSOURCES) xlogreader.c
+ rm -f pg_waldump$(X) $(OBJS) $(RMGRDESCSOURCES) xlogreader.c xlogstats.c
rm -rf tmp_check
check:
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 30ca7684bd..8bf6899d67 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -21,11 +21,17 @@
#include "access/xlog_internal.h"
#include "access/xlogreader.h"
#include "access/xlogrecord.h"
+#include "access/xlogstats.h"
#include "common/fe_memutils.h"
#include "common/logging.h"
#include "getopt_long.h"
#include "rmgrdesc.h"
+/*
+ * NOTE: For any code change or issue fix here, it is highly recommended to
+ * give a thought about doing the same in pg_walinspect contrib module as well.
+ */
+
static const char *progname;
static int WalSegSz;
@@ -66,24 +72,6 @@ typedef struct XLogDumpConfig
bool filter_by_fpw;
} XLogDumpConfig;
-typedef struct Stats
-{
- uint64 count;
- uint64 rec_len;
- uint64 fpi_len;
-} Stats;
-
-#define MAX_XLINFO_TYPES 16
-
-typedef struct XLogDumpStats
-{
- uint64 count;
- XLogRecPtr startptr;
- XLogRecPtr endptr;
- Stats rmgr_stats[RM_MAX_ID + 1];
- Stats record_stats[RM_MAX_ID + 1][MAX_XLINFO_TYPES];
-} XLogDumpStats;
-
#define fatal_error(...) do { pg_log_fatal(__VA_ARGS__); exit(EXIT_FAILURE); } while(0)
/*
@@ -453,81 +441,6 @@ XLogRecordHasFPW(XLogReaderState *record)
return false;
}
-/*
- * Calculate the size of a record, split into !FPI and FPI parts.
- */
-static void
-XLogDumpRecordLen(XLogReaderState *record, uint32 *rec_len, uint32 *fpi_len)
-{
- int block_id;
-
- /*
- * Calculate the amount of FPI data in the record.
- *
- * XXX: We peek into xlogreader's private decoded backup blocks for the
- * bimg_len indicating the length of FPI data.
- */
- *fpi_len = 0;
- for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
- {
- if (XLogRecHasBlockImage(record, block_id))
- *fpi_len += XLogRecGetBlock(record, block_id)->bimg_len;
- }
-
- /*
- * Calculate the length of the record as the total length - the length of
- * all the block images.
- */
- *rec_len = XLogRecGetTotalLen(record) - *fpi_len;
-}
-
-/*
- * Store per-rmgr and per-record statistics for a given record.
- */
-static void
-XLogDumpCountRecord(XLogDumpConfig *config, XLogDumpStats *stats,
- XLogReaderState *record)
-{
- RmgrId rmid;
- uint8 recid;
- uint32 rec_len;
- uint32 fpi_len;
-
- stats->count++;
-
- rmid = XLogRecGetRmid(record);
-
- XLogDumpRecordLen(record, &rec_len, &fpi_len);
-
- /* Update per-rmgr statistics */
-
- stats->rmgr_stats[rmid].count++;
- stats->rmgr_stats[rmid].rec_len += rec_len;
- stats->rmgr_stats[rmid].fpi_len += fpi_len;
-
- /*
- * Update per-record statistics, where the record is identified by a
- * combination of the RmgrId and the four bits of the xl_info field that
- * are the rmgr's domain (resulting in sixteen possible entries per
- * RmgrId).
- */
-
- recid = XLogRecGetInfo(record) >> 4;
-
- /*
- * XACT records need to be handled differently. Those records use the
- * first bit of those four bits for an optional flag variable and the
- * following three bits for the opcode. We filter opcode out of xl_info
- * and use it as the identifier of the record.
- */
- if (rmid == RM_XACT_ID)
- recid &= 0x07;
-
- stats->record_stats[rmid][recid].count++;
- stats->record_stats[rmid][recid].rec_len += rec_len;
- stats->record_stats[rmid][recid].fpi_len += fpi_len;
-}
-
/*
* Print a record to stdout
*/
@@ -538,15 +451,11 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record)
const RmgrDescData *desc = GetRmgrDesc(XLogRecGetRmid(record));
uint32 rec_len;
uint32 fpi_len;
- RelFileNode rnode;
- ForkNumber forknum;
- BlockNumber blk;
- int block_id;
uint8 info = XLogRecGetInfo(record);
XLogRecPtr xl_prev = XLogRecGetPrev(record);
StringInfoData s;
- XLogDumpRecordLen(record, &rec_len, &fpi_len);
+ XLogRecGetLen(record, &rec_len, &fpi_len);
printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ",
desc->rm_name,
@@ -564,93 +473,11 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record)
initStringInfo(&s);
desc->rm_desc(&s, record);
printf("%s", s.data);
+
+ resetStringInfo(&s);
+ XLogRecGetBlockRefInfo(record, true, config->bkp_details, &s, NULL);
+ printf("%s", s.data);
pfree(s.data);
-
- if (!config->bkp_details)
- {
- /* print block references (short format) */
- for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
- {
- if (!XLogRecHasBlockRef(record, block_id))
- continue;
-
- XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
- if (forknum != MAIN_FORKNUM)
- printf(", blkref #%d: rel %u/%u/%u fork %s blk %u",
- block_id,
- rnode.spcNode, rnode.dbNode, rnode.relNode,
- forkNames[forknum],
- blk);
- else
- printf(", blkref #%d: rel %u/%u/%u blk %u",
- block_id,
- rnode.spcNode, rnode.dbNode, rnode.relNode,
- blk);
- if (XLogRecHasBlockImage(record, block_id))
- {
- if (XLogRecBlockImageApply(record, block_id))
- printf(" FPW");
- else
- printf(" FPW for WAL verification");
- }
- }
- putchar('\n');
- }
- else
- {
- /* print block references (detailed format) */
- putchar('\n');
- for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
- {
- if (!XLogRecHasBlockRef(record, block_id))
- continue;
-
- XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
- printf("\tblkref #%d: rel %u/%u/%u fork %s blk %u",
- block_id,
- rnode.spcNode, rnode.dbNode, rnode.relNode,
- forkNames[forknum],
- blk);
- if (XLogRecHasBlockImage(record, block_id))
- {
- uint8 bimg_info = XLogRecGetBlock(record, block_id)->bimg_info;
-
- if (BKPIMAGE_COMPRESSED(bimg_info))
- {
- const char *method;
-
- if ((bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0)
- method = "pglz";
- else if ((bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0)
- method = "lz4";
- else if ((bimg_info & BKPIMAGE_COMPRESS_ZSTD) != 0)
- method = "zstd";
- else
- method = "unknown";
-
- printf(" (FPW%s); hole: offset: %u, length: %u, "
- "compression saved: %u, method: %s",
- XLogRecBlockImageApply(record, block_id) ?
- "" : " for WAL verification",
- XLogRecGetBlock(record, block_id)->hole_offset,
- XLogRecGetBlock(record, block_id)->hole_length,
- BLCKSZ -
- XLogRecGetBlock(record, block_id)->hole_length -
- XLogRecGetBlock(record, block_id)->bimg_len,
- method);
- }
- else
- {
- printf(" (FPW%s); hole: offset: %u, length: %u",
- XLogRecBlockImageApply(record, block_id) ?
- "" : " for WAL verification",
- XLogRecGetBlock(record, block_id)->hole_offset,
- XLogRecGetBlock(record, block_id)->hole_length);
- }
- }
- putchar('\n');
- }
- }
}
/*
@@ -698,7 +525,7 @@ XLogDumpStatsRow(const char *name,
* Display summary statistics about the records seen so far.
*/
static void
-XLogDumpDisplayStats(XLogDumpConfig *config, XLogDumpStats *stats)
+XLogDumpDisplayStats(XLogDumpConfig *config, XLogStats *stats)
{
int ri,
rj;
@@ -722,6 +549,9 @@ XLogDumpDisplayStats(XLogDumpConfig *config, XLogDumpStats *stats)
for (ri = 0; ri <= RM_MAX_ID; ri++)
{
+ if (!RmgrIdIsValid(ri))
+ continue;
+
total_count += stats->rmgr_stats[ri].count;
total_rec_len += stats->rmgr_stats[ri].rec_len;
total_fpi_len += stats->rmgr_stats[ri].fpi_len;
@@ -867,7 +697,7 @@ main(int argc, char **argv)
XLogReaderState *xlogreader_state;
XLogDumpPrivate private;
XLogDumpConfig config;
- XLogDumpStats stats;
+ XLogStats stats;
XLogRecord *record;
XLogRecPtr first_record;
char *waldir = NULL;
@@ -921,7 +751,7 @@ main(int argc, char **argv)
memset(&private, 0, sizeof(XLogDumpPrivate));
memset(&config, 0, sizeof(XLogDumpConfig));
- memset(&stats, 0, sizeof(XLogDumpStats));
+ memset(&stats, 0, sizeof(XLogStats));
private.timeline = 1;
private.startptr = InvalidXLogRecPtr;
@@ -1319,7 +1149,7 @@ main(int argc, char **argv)
{
if (config.stats == true)
{
- XLogDumpCountRecord(&config, &stats, xlogreader_state);
+ XLogRecStoreStats(&stats, xlogreader_state);
stats.endptr = xlogreader_state->EndRecPtr;
}
else
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index e302bd102c..5e1e3446ae 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -31,7 +31,7 @@ extern XLogRecPtr XactLastRecEnd;
extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd;
/* these variables are GUC parameters related to XLOG */
-extern int wal_segment_size;
+extern PGDLLIMPORT int wal_segment_size;
extern int min_wal_size_mb;
extern int max_wal_size_mb;
extern int wal_keep_size_mb;
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index d9df7f295d..750f634120 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -320,7 +320,7 @@ typedef struct RmgrData
struct XLogRecordBuffer *buf);
} RmgrData;
-extern RmgrData RmgrTable[];
+extern PGDLLIMPORT RmgrData RmgrTable[];
extern void RmgrStartup(void);
extern void RmgrCleanup(void);
extern void RmgrNotFound(RmgrId rmid);
@@ -350,6 +350,10 @@ extern XLogRecPtr RequestXLogSwitch(bool mark_unimportant);
extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli);
+extern void XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty,
+ bool detailed_format, StringInfo buf,
+ uint32 *fpi_len);
+
/*
* Exported for the functions in timeline.c and xlogarchive.c. Only valid
* in the startup process.
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index d8eb857611..727e9fe971 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -344,9 +344,7 @@ extern void XLogReaderSetDecodeBuffer(XLogReaderState *state,
/* Position the XLogReader to given record */
extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
-#ifdef FRONTEND
extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
-#endif /* FRONTEND */
/* Return values from XLogPageReadCB. */
typedef enum XLogPageReadResult
diff --git a/src/include/access/xlogstats.h b/src/include/access/xlogstats.h
new file mode 100644
index 0000000000..453bb1f491
--- /dev/null
+++ b/src/include/access/xlogstats.h
@@ -0,0 +1,40 @@
+/*-------------------------------------------------------------------------
+ *
+ * xlogstats.h
+ * Definitions for WAL Statitstics
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/include/access/xlogstats.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef XLOGSTATS_H
+#define XLOGSTATS_H
+
+#define MAX_XLINFO_TYPES 16
+
+typedef struct XLogRecStats
+{
+ uint64 count;
+ uint64 rec_len;
+ uint64 fpi_len;
+} XLogRecStats;
+
+typedef struct XLogStats
+{
+ uint64 count;
+#ifdef FRONTEND
+ XLogRecPtr startptr;
+ XLogRecPtr endptr;
+#endif
+ XLogRecStats rmgr_stats[RM_MAX_ID + 1];
+ XLogRecStats record_stats[RM_MAX_ID + 1][MAX_XLINFO_TYPES];
+} XLogStats;
+
+extern void XLogRecGetLen(XLogReaderState *record, uint32 *rec_len,
+ uint32 *fpi_len);
+extern void XLogRecStoreStats(XLogStats *stats, XLogReaderState *record);
+
+#endif /* XLOGSTATS_H */
diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h
index ff40f96e42..3746e31e40 100644
--- a/src/include/access/xlogutils.h
+++ b/src/include/access/xlogutils.h
@@ -93,6 +93,10 @@ extern void FreeFakeRelcacheEntry(Relation fakerel);
extern int read_local_xlog_page(XLogReaderState *state,
XLogRecPtr targetPagePtr, int reqLen,
XLogRecPtr targetRecPtr, char *cur_page);
+extern int read_local_xlog_page_no_wait(XLogReaderState *state,
+ XLogRecPtr targetPagePtr, int reqLen,
+ XLogRecPtr targetRecPtr,
+ char *cur_page);
extern void wal_segment_open(XLogReaderState *state,
XLogSegNo nextSegNo,
TimeLineID *tli_p);