From 2258e76f90bf0254504644df0515cddc0c0a87f9 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 8 Apr 2022 00:02:10 -0700 Subject: [PATCH] Add contrib/pg_walinspect. Provides similar functionality to pg_waldump, but from a SQL interface rather than a separate utility. Author: Bharath Rupireddy Reviewed-by: Greg Stark, Kyotaro Horiguchi, Andres Freund, Ashutosh Sharma, Nitin Jadhav, RKN Sai Krishna Discussion: https://postgr.es/m/CALj2ACUGUYXsEQdKhEdsBzhGEyF3xggvLdD8C0VT72TNEfOiog%40mail.gmail.com --- contrib/Makefile | 1 + contrib/pg_walinspect/.gitignore | 4 + contrib/pg_walinspect/Makefile | 23 + .../pg_walinspect/expected/pg_walinspect.out | 165 +++++ contrib/pg_walinspect/pg_walinspect--1.0.sql | 118 ++++ contrib/pg_walinspect/pg_walinspect.c | 629 ++++++++++++++++++ contrib/pg_walinspect/pg_walinspect.control | 5 + contrib/pg_walinspect/sql/pg_walinspect.sql | 120 ++++ doc/src/sgml/contrib.sgml | 1 + doc/src/sgml/filelist.sgml | 1 + doc/src/sgml/func.sgml | 2 +- doc/src/sgml/pgwalinspect.sgml | 275 ++++++++ src/backend/access/rmgrdesc/xlogdesc.c | 130 ++++ src/backend/access/transam/Makefile | 1 + src/backend/access/transam/xlogreader.c | 9 - src/backend/access/transam/xlogstats.c | 93 +++ src/backend/access/transam/xlogutils.c | 33 + src/bin/pg_waldump/.gitignore | 1 + src/bin/pg_waldump/Makefile | 8 +- src/bin/pg_waldump/pg_waldump.c | 206 +----- src/include/access/xlog.h | 2 +- src/include/access/xlog_internal.h | 6 +- src/include/access/xlogreader.h | 2 - src/include/access/xlogstats.h | 40 ++ src/include/access/xlogutils.h | 4 + 25 files changed, 1675 insertions(+), 204 deletions(-) create mode 100644 contrib/pg_walinspect/.gitignore create mode 100644 contrib/pg_walinspect/Makefile create mode 100644 contrib/pg_walinspect/expected/pg_walinspect.out create mode 100644 contrib/pg_walinspect/pg_walinspect--1.0.sql create mode 100644 contrib/pg_walinspect/pg_walinspect.c create mode 100644 contrib/pg_walinspect/pg_walinspect.control create mode 100644 contrib/pg_walinspect/sql/pg_walinspect.sql create mode 100644 doc/src/sgml/pgwalinspect.sgml create mode 100644 src/backend/access/transam/xlogstats.c create mode 100644 src/include/access/xlogstats.h diff --git a/contrib/Makefile b/contrib/Makefile index 332b486ecc..bbf220407b 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -41,6 +41,7 @@ SUBDIRS = \ pgrowlocks \ pgstattuple \ pg_visibility \ + pg_walinspect \ postgres_fdw \ seg \ spi \ diff --git a/contrib/pg_walinspect/.gitignore b/contrib/pg_walinspect/.gitignore new file mode 100644 index 0000000000..5dcb3ff972 --- /dev/null +++ b/contrib/pg_walinspect/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/contrib/pg_walinspect/Makefile b/contrib/pg_walinspect/Makefile new file mode 100644 index 0000000000..350845cd43 --- /dev/null +++ b/contrib/pg_walinspect/Makefile @@ -0,0 +1,23 @@ +# contrib/pg_walinspect/Makefile + +MODULE_big = pg_walinspect +OBJS = \ + $(WIN32RES) \ + pg_walinspect.o +PGFILEDESC = "pg_walinspect - functions to inspect contents of PostgreSQL Write-Ahead Log" + +EXTENSION = pg_walinspect +DATA = pg_walinspect--1.0.sql + +REGRESS = pg_walinspect + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/pg_walinspect +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/pg_walinspect/expected/pg_walinspect.out b/contrib/pg_walinspect/expected/pg_walinspect.out new file mode 100644 index 0000000000..634b1bb739 --- /dev/null +++ b/contrib/pg_walinspect/expected/pg_walinspect.out @@ -0,0 +1,165 @@ +CREATE EXTENSION pg_walinspect; +CREATE TABLE sample_tbl(col1 int, col2 int); +-- Make sure checkpoints don't interfere with the test. +SELECT lsn as wal_lsn1 FROM + pg_create_physical_replication_slot('regress_pg_walinspect_slot', true, false) + \gset +INSERT INTO sample_tbl SELECT * FROM generate_series(1, 2); +SELECT pg_current_wal_lsn() AS wal_lsn2 \gset +INSERT INTO sample_tbl SELECT * FROM generate_series(1, 2); +-- =================================================================== +-- Tests for input validation +-- =================================================================== +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info(:'wal_lsn2', :'wal_lsn1'); -- ERROR +ERROR: WAL start LSN must be less than end LSN +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats(:'wal_lsn2', :'wal_lsn1'); -- ERROR +ERROR: WAL start LSN must be less than end LSN +-- =================================================================== +-- Tests for all function executions +-- =================================================================== +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_record_info(:'wal_lsn1'); + ok +---- + t +(1 row) + +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2'); + ok +---- + t +(1 row) + +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info_till_end_of_wal(:'wal_lsn1'); + ok +---- + t +(1 row) + +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats(:'wal_lsn1', :'wal_lsn2'); + ok +---- + t +(1 row) + +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats_till_end_of_wal(:'wal_lsn1'); + ok +---- + t +(1 row) + +-- =================================================================== +-- Test for filtering out WAL records of a particular table +-- =================================================================== +SELECT oid AS sample_tbl_oid FROM pg_class WHERE relname = 'sample_tbl' \gset +SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2') + WHERE block_ref LIKE concat('%', :'sample_tbl_oid', '%') AND resource_manager = 'Heap'; + ok +---- + t +(1 row) + +-- =================================================================== +-- Test for filtering out WAL records based on resource_manager and +-- record_type +-- =================================================================== +SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2') + WHERE resource_manager = 'Heap' AND record_type = 'INSERT'; + ok +---- + t +(1 row) + +-- =================================================================== +-- Tests for permissions +-- =================================================================== +CREATE ROLE regress_pg_walinspect; +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- no + has_function_privilege +------------------------ + f +(1 row) + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- no + has_function_privilege +------------------------ + f +(1 row) + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- no + has_function_privilege +------------------------ + f +(1 row) + +-- Functions accessible by users with role pg_read_server_files +GRANT pg_read_server_files TO regress_pg_walinspect; +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes + has_function_privilege +------------------------ + t +(1 row) + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes + has_function_privilege +------------------------ + t +(1 row) + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes + has_function_privilege +------------------------ + t +(1 row) + +REVOKE pg_read_server_files FROM regress_pg_walinspect; +-- Superuser can grant execute to other users +GRANT EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn) + TO regress_pg_walinspect; +GRANT EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn) + TO regress_pg_walinspect; +GRANT EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean) + TO regress_pg_walinspect; +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes + has_function_privilege +------------------------ + t +(1 row) + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes + has_function_privilege +------------------------ + t +(1 row) + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes + has_function_privilege +------------------------ + t +(1 row) + +REVOKE EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn) + FROM regress_pg_walinspect; +REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn) + FROM regress_pg_walinspect; +REVOKE EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean) + FROM regress_pg_walinspect; +-- =================================================================== +-- Clean up +-- =================================================================== +DROP ROLE regress_pg_walinspect; +SELECT pg_drop_replication_slot('regress_pg_walinspect_slot'); + pg_drop_replication_slot +-------------------------- + +(1 row) + +DROP TABLE sample_tbl; diff --git a/contrib/pg_walinspect/pg_walinspect--1.0.sql b/contrib/pg_walinspect/pg_walinspect--1.0.sql new file mode 100644 index 0000000000..4243516d8a --- /dev/null +++ b/contrib/pg_walinspect/pg_walinspect--1.0.sql @@ -0,0 +1,118 @@ +/* contrib/pg_walinspect/pg_walinspect--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION pg_walinspect" to load this file. \quit + +-- +-- pg_get_wal_record_info() +-- +CREATE FUNCTION pg_get_wal_record_info(IN in_lsn pg_lsn, + OUT start_lsn pg_lsn, + OUT end_lsn pg_lsn, + OUT prev_lsn pg_lsn, + OUT xid xid, + OUT resource_manager text, + OUT record_type text, + OUT record_length int4, + OUT main_data_length int4, + OUT fpi_length int4, + OUT description text, + OUT block_ref text +) +AS 'MODULE_PATHNAME', 'pg_get_wal_record_info' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn) TO pg_read_server_files; + +-- +-- pg_get_wal_records_info() +-- +CREATE FUNCTION pg_get_wal_records_info(IN start_lsn pg_lsn, + IN end_lsn pg_lsn, + OUT start_lsn pg_lsn, + OUT end_lsn pg_lsn, + OUT prev_lsn pg_lsn, + OUT xid xid, + OUT resource_manager text, + OUT record_type text, + OUT record_length int4, + OUT main_data_length int4, + OUT fpi_length int4, + OUT description text, + OUT block_ref text +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_get_wal_records_info' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn) TO pg_read_server_files; + +-- +-- pg_get_wal_records_info_till_end_of_wal() +-- +CREATE FUNCTION pg_get_wal_records_info_till_end_of_wal(IN start_lsn pg_lsn, + OUT start_lsn pg_lsn, + OUT end_lsn pg_lsn, + OUT prev_lsn pg_lsn, + OUT xid xid, + OUT resource_manager text, + OUT record_type text, + OUT record_length int4, + OUT main_data_length int4, + OUT fpi_length int4, + OUT description text, + OUT block_ref text +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_get_wal_records_info_till_end_of_wal' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info_till_end_of_wal(pg_lsn) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_get_wal_records_info_till_end_of_wal(pg_lsn) TO pg_read_server_files; + +-- +-- pg_get_wal_stats() +-- +CREATE FUNCTION pg_get_wal_stats(IN start_lsn pg_lsn, + IN end_lsn pg_lsn, + IN per_record boolean DEFAULT false, + OUT "resource_manager/record_type" text, + OUT count int8, + OUT count_percentage float4, + OUT record_size int8, + OUT record_size_percentage float4, + OUT fpi_size int8, + OUT fpi_size_percentage float4, + OUT combined_size int8, + OUT combined_size_percentage float4 +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_get_wal_stats' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean) TO pg_read_server_files; + +-- +-- pg_get_wal_stats_till_end_of_wal() +-- +CREATE FUNCTION pg_get_wal_stats_till_end_of_wal(IN start_lsn pg_lsn, + IN per_record boolean DEFAULT false, + OUT "resource_manager/record_type" text, + OUT count int8, + OUT count_percentage float4, + OUT record_size int8, + OUT record_size_percentage float4, + OUT fpi_size int8, + OUT fpi_size_percentage float4, + OUT combined_size int8, + OUT combined_size_percentage float4 +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_get_wal_stats_till_end_of_wal' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_stats_till_end_of_wal(pg_lsn, boolean) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_get_wal_stats_till_end_of_wal(pg_lsn, boolean) TO pg_read_server_files; diff --git a/contrib/pg_walinspect/pg_walinspect.c b/contrib/pg_walinspect/pg_walinspect.c new file mode 100644 index 0000000000..58afa1ab1c --- /dev/null +++ b/contrib/pg_walinspect/pg_walinspect.c @@ -0,0 +1,629 @@ +/*------------------------------------------------------------------------- + * + * pg_walinspect.c + * Functions to inspect contents of PostgreSQL Write-Ahead Log + * + * Copyright (c) 2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/pg_walinspect/pg_walinspect.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/xlog.h" +#include "access/xlog_internal.h" +#include "access/xlogreader.h" +#include "access/xlogrecovery.h" +#include "access/xlogstats.h" +#include "access/xlogutils.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/pg_lsn.h" + +/* + * NOTE: For any code change or issue fix here, it is highly recommended to + * give a thought about doing the same in pg_waldump tool as well. + */ + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(pg_get_wal_record_info); +PG_FUNCTION_INFO_V1(pg_get_wal_records_info); +PG_FUNCTION_INFO_V1(pg_get_wal_records_info_till_end_of_wal); +PG_FUNCTION_INFO_V1(pg_get_wal_stats); +PG_FUNCTION_INFO_V1(pg_get_wal_stats_till_end_of_wal); + +static bool IsFutureLSN(XLogRecPtr lsn, XLogRecPtr *curr_lsn); +static XLogReaderState *InitXLogReaderState(XLogRecPtr lsn, + XLogRecPtr *first_record); +static XLogRecord *ReadNextXLogRecord(XLogReaderState *xlogreader, + XLogRecPtr first_record); +static void GetWALRecordInfo(XLogReaderState *record, XLogRecPtr lsn, + Datum *values, bool *nulls, uint32 ncols); +static XLogRecPtr ValidateInputLSNs(bool till_end_of_wal, + XLogRecPtr start_lsn, XLogRecPtr end_lsn); +static void GetWALRecordsInfo(FunctionCallInfo fcinfo, XLogRecPtr start_lsn, + XLogRecPtr end_lsn); +static void GetXLogSummaryStats(XLogStats * stats, ReturnSetInfo *rsinfo, + Datum *values, bool *nulls, uint32 ncols, + bool stats_per_record); +static void FillXLogStatsRow(const char *name, uint64 n, uint64 total_count, + uint64 rec_len, uint64 total_rec_len, + uint64 fpi_len, uint64 total_fpi_len, + uint64 tot_len, uint64 total_len, + Datum *values, bool *nulls, uint32 ncols); +static void GetWalStats(FunctionCallInfo fcinfo, XLogRecPtr start_lsn, + XLogRecPtr end_lsn, bool stats_per_record); + +/* + * Check if the given LSN is in future. Also, return the LSN up to which the + * server has WAL. + */ +static bool +IsFutureLSN(XLogRecPtr lsn, XLogRecPtr *curr_lsn) +{ + /* + * We determine the current LSN of the server similar to how page_read + * callback read_local_xlog_page_no_wait does. + */ + if (!RecoveryInProgress()) + *curr_lsn = GetFlushRecPtr(NULL); + else + *curr_lsn = GetXLogReplayRecPtr(NULL); + + Assert(!XLogRecPtrIsInvalid(*curr_lsn)); + + if (lsn >= *curr_lsn) + return true; + + return false; +} + +/* + * Intialize WAL reader and identify first valid LSN. + */ +static XLogReaderState * +InitXLogReaderState(XLogRecPtr lsn, XLogRecPtr *first_record) +{ + XLogReaderState *xlogreader; + + /* + * Reading WAL below the first page of the first sgements isn't allowed. + * This is a bootstrap WAL page and the page_read callback fails to read + * it. + */ + if (lsn < XLOG_BLCKSZ) + ereport(ERROR, + (errmsg("could not read WAL at LSN %X/%X", + LSN_FORMAT_ARGS(lsn)))); + + xlogreader = XLogReaderAllocate(wal_segment_size, NULL, + XL_ROUTINE(.page_read = &read_local_xlog_page_no_wait, + .segment_open = &wal_segment_open, + .segment_close = &wal_segment_close), + NULL); + + if (xlogreader == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"), + errdetail("Failed while allocating a WAL reading processor."))); + + /* first find a valid recptr to start from */ + *first_record = XLogFindNextRecord(xlogreader, lsn); + + if (XLogRecPtrIsInvalid(*first_record)) + ereport(ERROR, + (errmsg("could not find a valid record after %X/%X", + LSN_FORMAT_ARGS(lsn)))); + + return xlogreader; +} + +/* + * Read next WAL record. + * + * By design, to be less intrusive in a running system, no slot is allocated + * to reserve the WAL we're about to read. Therefore this function can + * encounter read errors for historical WAL. + * + * We guard against ordinary errors trying to read WAL that hasn't been + * written yet by limiting end_lsn to the flushed WAL, but that can also + * encounter errors if the flush pointer falls in the middle of a record. + */ +static XLogRecord * +ReadNextXLogRecord(XLogReaderState *xlogreader, XLogRecPtr first_record) +{ + XLogRecord *record; + char *errormsg; + + record = XLogReadRecord(xlogreader, &errormsg); + + if (record == NULL) + { + if (errormsg) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read WAL at %X/%X: %s", + LSN_FORMAT_ARGS(first_record), errormsg))); + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read WAL at %X/%X", + LSN_FORMAT_ARGS(first_record)))); + } + + return record; +} + +/* + * Get a single WAL record info. + */ +static void +GetWALRecordInfo(XLogReaderState *record, XLogRecPtr lsn, + Datum *values, bool *nulls, uint32 ncols) +{ + const char *id; + RmgrData desc; + uint32 fpi_len = 0; + StringInfoData rec_desc; + StringInfoData rec_blk_ref; + uint32 main_data_len; + int i = 0; + + desc = GetRmgr(XLogRecGetRmid(record)); + id = desc.rm_identify(XLogRecGetInfo(record)); + + if (id == NULL) + id = psprintf("UNKNOWN (%x)", XLogRecGetInfo(record) & ~XLR_INFO_MASK); + + initStringInfo(&rec_desc); + desc.rm_desc(&rec_desc, record); + + /* Block references. */ + initStringInfo(&rec_blk_ref); + XLogRecGetBlockRefInfo(record, false, true, &rec_blk_ref, &fpi_len); + + main_data_len = XLogRecGetDataLen(record); + + values[i++] = LSNGetDatum(lsn); + values[i++] = LSNGetDatum(record->EndRecPtr); + values[i++] = LSNGetDatum(XLogRecGetPrev(record)); + values[i++] = TransactionIdGetDatum(XLogRecGetXid(record)); + values[i++] = CStringGetTextDatum(desc.rm_name); + values[i++] = CStringGetTextDatum(id); + values[i++] = UInt32GetDatum(XLogRecGetTotalLen(record)); + values[i++] = UInt32GetDatum(main_data_len); + values[i++] = UInt32GetDatum(fpi_len); + values[i++] = CStringGetTextDatum(rec_desc.data); + values[i++] = CStringGetTextDatum(rec_blk_ref.data); + + Assert(i == ncols); +} + +/* + * Get WAL record info. + * + * This function emits an error if a future WAL LSN i.e. WAL LSN the database + * system doesn't know about is specified. + */ +Datum +pg_get_wal_record_info(PG_FUNCTION_ARGS) +{ +#define PG_GET_WAL_RECORD_INFO_COLS 11 + Datum result; + Datum values[PG_GET_WAL_RECORD_INFO_COLS]; + bool nulls[PG_GET_WAL_RECORD_INFO_COLS]; + XLogRecPtr lsn; + XLogRecPtr curr_lsn; + XLogRecPtr first_record; + XLogReaderState *xlogreader; + TupleDesc tupdesc; + HeapTuple tuple; + + lsn = PG_GETARG_LSN(0); + + if (IsFutureLSN(lsn, &curr_lsn)) + { + /* + * GetFlushRecPtr or GetXLogReplayRecPtr gives "end+1" LSN of the last + * record flushed or replayed respectively. But let's use the LSN up + * to "end" in user facing message. + */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot accept future input LSN"), + errdetail("Last known WAL LSN on the database system is at %X/%X.", + LSN_FORMAT_ARGS(curr_lsn)))); + } + + /* Build a tuple descriptor for our result type. */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + xlogreader = InitXLogReaderState(lsn, &first_record); + + (void) ReadNextXLogRecord(xlogreader, first_record); + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + GetWALRecordInfo(xlogreader, first_record, values, nulls, + PG_GET_WAL_RECORD_INFO_COLS); + + XLogReaderFree(xlogreader); + + tuple = heap_form_tuple(tupdesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + PG_RETURN_DATUM(result); +#undef PG_GET_WAL_RECORD_INFO_COLS +} + +/* + * Validate the input LSNs and compute end LSN for till_end_of_wal versions. + */ +static XLogRecPtr +ValidateInputLSNs(bool till_end_of_wal, XLogRecPtr start_lsn, + XLogRecPtr end_lsn) +{ + XLogRecPtr curr_lsn; + + if (IsFutureLSN(start_lsn, &curr_lsn)) + { + /* + * GetFlushRecPtr or GetXLogReplayRecPtr gives "end+1" LSN of the last + * record flushed or replayed respectively. But let's use the LSN up + * to "end" in user facing message. + */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot accept future start LSN"), + errdetail("Last known WAL LSN on the database system is at %X/%X.", + LSN_FORMAT_ARGS(curr_lsn)))); + } + + if (till_end_of_wal) + end_lsn = curr_lsn; + + if (end_lsn > curr_lsn) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot accept future end LSN"), + errdetail("Last known WAL LSN on the database system is at %X/%X.", + LSN_FORMAT_ARGS(curr_lsn)))); + + if (start_lsn >= end_lsn) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("WAL start LSN must be less than end LSN"))); + + return end_lsn; +} + +/* + * Get info and data of all WAL records between start LSN and end LSN. + */ +static void +GetWALRecordsInfo(FunctionCallInfo fcinfo, XLogRecPtr start_lsn, + XLogRecPtr end_lsn) +{ +#define PG_GET_WAL_RECORDS_INFO_COLS 11 + XLogRecPtr first_record; + XLogReaderState *xlogreader; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Datum values[PG_GET_WAL_RECORDS_INFO_COLS]; + bool nulls[PG_GET_WAL_RECORDS_INFO_COLS]; + + SetSingleFuncCall(fcinfo, 0); + + xlogreader = InitXLogReaderState(start_lsn, &first_record); + + Assert(xlogreader); + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + for (;;) + { + (void) ReadNextXLogRecord(xlogreader, first_record); + + if (xlogreader->EndRecPtr <= end_lsn) + { + GetWALRecordInfo(xlogreader, xlogreader->currRecPtr, values, nulls, + PG_GET_WAL_RECORDS_INFO_COLS); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + /* if we read up to end_lsn, we're done */ + if (xlogreader->EndRecPtr >= end_lsn) + break; + + CHECK_FOR_INTERRUPTS(); + } + + XLogReaderFree(xlogreader); + +#undef PG_GET_WAL_RECORDS_INFO_COLS +} + +/* + * Get info and data of all WAL records between start LSN and end LSN. + * + * This function emits an error if a future start or end WAL LSN i.e. WAL LSN + * the database system doesn't know about is specified. + */ +Datum +pg_get_wal_records_info(PG_FUNCTION_ARGS) +{ + XLogRecPtr start_lsn; + XLogRecPtr end_lsn; + + start_lsn = PG_GETARG_LSN(0); + end_lsn = PG_GETARG_LSN(1); + + end_lsn = ValidateInputLSNs(false, start_lsn, end_lsn); + + GetWALRecordsInfo(fcinfo, start_lsn, end_lsn); + + PG_RETURN_VOID(); +} + +/* + * Get info and data of all WAL records from start LSN till end of WAL. + * + * This function emits an error if a future start i.e. WAL LSN the database + * system doesn't know about is specified. + */ +Datum +pg_get_wal_records_info_till_end_of_wal(PG_FUNCTION_ARGS) +{ + XLogRecPtr start_lsn; + XLogRecPtr end_lsn = InvalidXLogRecPtr; + + start_lsn = PG_GETARG_LSN(0); + + end_lsn = ValidateInputLSNs(true, start_lsn, end_lsn); + + GetWALRecordsInfo(fcinfo, start_lsn, end_lsn); + + PG_RETURN_VOID(); +} + +/* + * Fill single row of record counts and sizes for an rmgr or record. + */ +static void +FillXLogStatsRow(const char *name, + uint64 n, uint64 total_count, + uint64 rec_len, uint64 total_rec_len, + uint64 fpi_len, uint64 total_fpi_len, + uint64 tot_len, uint64 total_len, + Datum *values, bool *nulls, uint32 ncols) +{ + double n_pct, + rec_len_pct, + fpi_len_pct, + tot_len_pct; + int i = 0; + + n_pct = 0; + if (total_count != 0) + n_pct = 100 * (double) n / total_count; + + rec_len_pct = 0; + if (total_rec_len != 0) + rec_len_pct = 100 * (double) rec_len / total_rec_len; + + fpi_len_pct = 0; + if (total_fpi_len != 0) + fpi_len_pct = 100 * (double) fpi_len / total_fpi_len; + + tot_len_pct = 0; + if (total_len != 0) + tot_len_pct = 100 * (double) tot_len / total_len; + + values[i++] = CStringGetTextDatum(name); + values[i++] = Int64GetDatum(n); + values[i++] = Float4GetDatum(n_pct); + values[i++] = Int64GetDatum(rec_len); + values[i++] = Float4GetDatum(rec_len_pct); + values[i++] = Int64GetDatum(fpi_len); + values[i++] = Float4GetDatum(fpi_len_pct); + values[i++] = Int64GetDatum(tot_len); + values[i++] = Float4GetDatum(tot_len_pct); + + Assert(i == ncols); +} + +/* + * Get summary statistics about the records seen so far. + */ +static void +GetXLogSummaryStats(XLogStats *stats, ReturnSetInfo *rsinfo, + Datum *values, bool *nulls, uint32 ncols, + bool stats_per_record) +{ + uint64 total_count = 0; + uint64 total_rec_len = 0; + uint64 total_fpi_len = 0; + uint64 total_len = 0; + int ri; + + /* + * Each row shows its percentages of the total, so make a first pass to + * calculate column totals. + */ + for (ri = 0; ri <= RM_MAX_ID; ri++) + { + if (!RmgrIdIsValid(ri)) + continue; + + total_count += stats->rmgr_stats[ri].count; + total_rec_len += stats->rmgr_stats[ri].rec_len; + total_fpi_len += stats->rmgr_stats[ri].fpi_len; + } + total_len = total_rec_len + total_fpi_len; + + for (ri = 0; ri <= RM_MAX_ID; ri++) + { + uint64 count; + uint64 rec_len; + uint64 fpi_len; + uint64 tot_len; + RmgrData desc; + + if (!RmgrIdIsValid(ri)) + continue; + + if (!RmgrIdExists(ri)) + continue; + + desc = GetRmgr(ri); + + if (stats_per_record) + { + int rj; + + for (rj = 0; rj < MAX_XLINFO_TYPES; rj++) + { + const char *id; + + count = stats->record_stats[ri][rj].count; + rec_len = stats->record_stats[ri][rj].rec_len; + fpi_len = stats->record_stats[ri][rj].fpi_len; + tot_len = rec_len + fpi_len; + + /* Skip undefined combinations and ones that didn't occur */ + if (count == 0) + continue; + + /* the upper four bits in xl_info are the rmgr's */ + id = desc.rm_identify(rj << 4); + if (id == NULL) + id = psprintf("UNKNOWN (%x)", rj << 4); + + FillXLogStatsRow(psprintf("%s/%s", desc.rm_name, id), count, + total_count, rec_len, total_rec_len, fpi_len, + total_fpi_len, tot_len, total_len, + values, nulls, ncols); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + } + else + { + count = stats->rmgr_stats[ri].count; + rec_len = stats->rmgr_stats[ri].rec_len; + fpi_len = stats->rmgr_stats[ri].fpi_len; + tot_len = rec_len + fpi_len; + + FillXLogStatsRow(desc.rm_name, count, total_count, rec_len, + total_rec_len, fpi_len, total_fpi_len, tot_len, + total_len, values, nulls, ncols); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + } +} + +/* + * Get WAL stats between start LSN and end LSN. + */ +static void +GetWalStats(FunctionCallInfo fcinfo, XLogRecPtr start_lsn, + XLogRecPtr end_lsn, bool stats_per_record) +{ +#define PG_GET_WAL_STATS_COLS 9 + XLogRecPtr first_record; + XLogReaderState *xlogreader; + XLogStats stats; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Datum values[PG_GET_WAL_STATS_COLS]; + bool nulls[PG_GET_WAL_STATS_COLS]; + + SetSingleFuncCall(fcinfo, 0); + + xlogreader = InitXLogReaderState(start_lsn, &first_record); + + MemSet(&stats, 0, sizeof(stats)); + + for (;;) + { + (void) ReadNextXLogRecord(xlogreader, first_record); + + if (xlogreader->EndRecPtr <= end_lsn) + XLogRecStoreStats(&stats, xlogreader); + + /* if we read up to end_lsn, we're done */ + if (xlogreader->EndRecPtr >= end_lsn) + break; + + CHECK_FOR_INTERRUPTS(); + } + + XLogReaderFree(xlogreader); + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + GetXLogSummaryStats(&stats, rsinfo, values, nulls, + PG_GET_WAL_STATS_COLS, + stats_per_record); + +#undef PG_GET_WAL_STATS_COLS +} + +/* + * Get stats of all WAL records between start LSN and end LSN. + * + * This function emits an error if a future start or end WAL LSN i.e. WAL LSN + * the database system doesn't know about is specified. + */ +Datum +pg_get_wal_stats(PG_FUNCTION_ARGS) +{ + XLogRecPtr start_lsn; + XLogRecPtr end_lsn; + bool stats_per_record; + + start_lsn = PG_GETARG_LSN(0); + end_lsn = PG_GETARG_LSN(1); + stats_per_record = PG_GETARG_BOOL(2); + + end_lsn = ValidateInputLSNs(false, start_lsn, end_lsn); + + GetWalStats(fcinfo, start_lsn, end_lsn, stats_per_record); + + PG_RETURN_VOID(); +} + +/* + * Get stats of all WAL records from start LSN till end of WAL. + * + * This function emits an error if a future start i.e. WAL LSN the database + * system doesn't know about is specified. + */ +Datum +pg_get_wal_stats_till_end_of_wal(PG_FUNCTION_ARGS) +{ + XLogRecPtr start_lsn; + XLogRecPtr end_lsn = InvalidXLogRecPtr; + bool stats_per_record; + + start_lsn = PG_GETARG_LSN(0); + stats_per_record = PG_GETARG_BOOL(1); + + end_lsn = ValidateInputLSNs(true, start_lsn, end_lsn); + + GetWalStats(fcinfo, start_lsn, end_lsn, stats_per_record); + + PG_RETURN_VOID(); +} diff --git a/contrib/pg_walinspect/pg_walinspect.control b/contrib/pg_walinspect/pg_walinspect.control new file mode 100644 index 0000000000..017e56a2bb --- /dev/null +++ b/contrib/pg_walinspect/pg_walinspect.control @@ -0,0 +1,5 @@ +# pg_walinspect extension +comment = 'functions to inspect contents of PostgreSQL Write-Ahead Log' +default_version = '1.0' +module_pathname = '$libdir/pg_walinspect' +relocatable = true diff --git a/contrib/pg_walinspect/sql/pg_walinspect.sql b/contrib/pg_walinspect/sql/pg_walinspect.sql new file mode 100644 index 0000000000..1a9fd09e7b --- /dev/null +++ b/contrib/pg_walinspect/sql/pg_walinspect.sql @@ -0,0 +1,120 @@ +CREATE EXTENSION pg_walinspect; + +CREATE TABLE sample_tbl(col1 int, col2 int); + +-- Make sure checkpoints don't interfere with the test. +SELECT lsn as wal_lsn1 FROM + pg_create_physical_replication_slot('regress_pg_walinspect_slot', true, false) + \gset + +INSERT INTO sample_tbl SELECT * FROM generate_series(1, 2); + +SELECT pg_current_wal_lsn() AS wal_lsn2 \gset + +INSERT INTO sample_tbl SELECT * FROM generate_series(1, 2); + +-- =================================================================== +-- Tests for input validation +-- =================================================================== + +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info(:'wal_lsn2', :'wal_lsn1'); -- ERROR + +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats(:'wal_lsn2', :'wal_lsn1'); -- ERROR + +-- =================================================================== +-- Tests for all function executions +-- =================================================================== + +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_record_info(:'wal_lsn1'); + +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2'); + +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_records_info_till_end_of_wal(:'wal_lsn1'); + +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats(:'wal_lsn1', :'wal_lsn2'); + +SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats_till_end_of_wal(:'wal_lsn1'); + +-- =================================================================== +-- Test for filtering out WAL records of a particular table +-- =================================================================== + +SELECT oid AS sample_tbl_oid FROM pg_class WHERE relname = 'sample_tbl' \gset + +SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2') + WHERE block_ref LIKE concat('%', :'sample_tbl_oid', '%') AND resource_manager = 'Heap'; + +-- =================================================================== +-- Test for filtering out WAL records based on resource_manager and +-- record_type +-- =================================================================== + +SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2') + WHERE resource_manager = 'Heap' AND record_type = 'INSERT'; + +-- =================================================================== +-- Tests for permissions +-- =================================================================== +CREATE ROLE regress_pg_walinspect; + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- no + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- no + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- no + +-- Functions accessible by users with role pg_read_server_files + +GRANT pg_read_server_files TO regress_pg_walinspect; + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes + +REVOKE pg_read_server_files FROM regress_pg_walinspect; + +-- Superuser can grant execute to other users +GRANT EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn) + TO regress_pg_walinspect; + +GRANT EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn) + TO regress_pg_walinspect; + +GRANT EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean) + TO regress_pg_walinspect; + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_records_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes + +SELECT has_function_privilege('regress_pg_walinspect', + 'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes + +REVOKE EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn) + FROM regress_pg_walinspect; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn) + FROM regress_pg_walinspect; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean) + FROM regress_pg_walinspect; + +-- =================================================================== +-- Clean up +-- =================================================================== + +DROP ROLE regress_pg_walinspect; + +SELECT pg_drop_replication_slot('regress_pg_walinspect_slot'); + +DROP TABLE sample_tbl; diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index 1e42ce1a7f..4e7b87a42f 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -131,6 +131,7 @@ CREATE EXTENSION module_name; &pgsurgery; &pgtrgm; &pgvisibility; + &pgwalinspect; &postgres-fdw; &seg; &sepgsql; diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 7dea670969..1e82cb2d3d 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -148,6 +148,7 @@ + diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 29c4376886..5047e090db 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -29205,7 +29205,7 @@ postgres=# SELECT * FROM pg_walfile_name_offset((pg_backup_stop()).lsn); - + pg_logical_emit_message diff --git a/doc/src/sgml/pgwalinspect.sgml b/doc/src/sgml/pgwalinspect.sgml new file mode 100644 index 0000000000..44010a5b0a --- /dev/null +++ b/doc/src/sgml/pgwalinspect.sgml @@ -0,0 +1,275 @@ + + + + pg_walinspect + + + pg_walinspect + + + + The pg_walinspect module provides SQL functions that + allow you to inspect the contents of write-ahead log of + a running PostgreSQL database cluster at a low + level, which is useful for debugging or analytical or reporting or + educational purposes. It is similar to , but + accessible through SQL rather than a separate utility. + + + + All the functions of this module will provide the WAL information using the + current server's timeline ID. + + + + All the functions of this module will try to find the first valid WAL record + that is at or after the given in_lsn or + start_lsn and will emit error if no such record + is available. Similarly, the end_lsn must be + available, and if it falls in the middle of a record, the entire record must + be available. + + + + + Some functions, such as pg_logical_emit_message, + return the LSN after the record just + inserted. Therefore, if you pass that LSN as + in_lsn or start_lsn + to one of these functions, it will return the next + record. + + + + By default, use of these functions is restricted to superusers and members of + the pg_read_server_files role. Access may be granted by + superusers to others using GRANT. + + + + General Functions + + + + + + pg_get_wal_record_info(in_lsn pg_lsn, + start_lsn OUT pg_lsn, + end_lsn OUT pg_lsn, + prev_lsn OUT pg_lsn, + xid OUT xid, + resource_manager OUT text, + record_type OUT text, + record_length OUT int4, + main_data_length OUT int4, + fpi_length OUT int4, + description OUT text, + block_ref OUT text) + + + + + + Gets WAL record information of a given LSN. If the given LSN isn't + at the start of a WAL record, it gives the information of the next + available valid WAL record; or an error if no such record is found. + + + + + + + + pg_get_wal_records_info(start_lsn pg_lsn, + end_lsn pg_lsn, + start_lsn OUT pg_lsn, + end_lsn OUT pg_lsn, + prev_lsn OUT pg_lsn, + xid OUT xid, + resource_manager OUT text, + record_type OUT text, + record_length OUT int4, + main_data_length OUT int4, + fpi_length OUT int4, + description OUT text, + block_ref OUT text) + returns setof record + + + + + + Gets information of all the valid WAL records between + start_lsn and end_lsn. + Returns one row per WAL record. If start_lsn + or end_lsn are not yet available, the + function will raise an error. For example, usage of the function is as + follows: + +postgres=# select start_lsn, end_lsn, prev_lsn, xid, resource_manager, record_type, record_length, main_data_length, fpi_length, description from pg_get_wal_records_info('0/14F9A30', '0/15011D7'); + start_lsn | end_lsn | prev_lsn | xid | resource_manager | record_type | record_length | main_data_length | fpi_length | description +-----------+-----------+-----------+-----+------------------+--------------+---------------+------------------+------------+--------------------- + 0/14FA118 | 0/14FB4B0 | 0/14F9958 | 725 | Btree | INSERT_LEAF | 5013 | 2 | 4960 | off 246 + 0/14FB4B0 | 0/14FD050 | 0/14FA118 | 725 | Btree | INSERT_LEAF | 7045 | 2 | 6992 | off 130 + 0/14FD050 | 0/14FD0A8 | 0/14FB4B0 | 725 | Heap2 | MULTI_INSERT | 85 | 6 | 0 | 1 tuples flags 0x02 + 0/14FD0A8 | 0/14FD0F0 | 0/14FD050 | 725 | Btree | INSERT_LEAF | 72 | 2 | 0 | off 155 + 0/14FD0F0 | 0/14FD138 | 0/14FD0A8 | 725 | Btree | INSERT_LEAF | 72 | 2 | 0 | off 134 + 0/14FD138 | 0/14FD210 | 0/14FD0F0 | 725 | Heap | INSERT | 211 | 3 | 0 | off 11 flags 0x00 + 0/14FD210 | 0/14FD250 | 0/14FD138 | 725 | Btree | INSERT_LEAF | 64 | 2 | 0 | off 246 + 0/14FD250 | 0/14FF260 | 0/14FD210 | 725 | Btree | INSERT_LEAF | 8181 | 2 | 8128 | off 47 + 0/14FF260 | 0/14FF2B8 | 0/14FD250 | 725 | Heap2 | MULTI_INSERT | 85 | 6 | 0 | 1 tuples flags 0x02 + 0/14FF2B8 | 0/14FF300 | 0/14FF260 | 725 | Btree | INSERT_LEAF | 72 | 2 | 0 | off 155 + 0/14FF300 | 0/15008D8 | 0/14FF2B8 | 725 | Btree | INSERT_LEAF | 5565 | 2 | 5512 | off 106 + 0/15008D8 | 0/1500C48 | 0/14FF300 | 725 | Heap | INSERT | 874 | 3 | 820 | off 2 flags 0x01 +(12 rows) + + + + + + + + + pg_get_wal_records_info_till_end_of_wal(start_lsn pg_lsn, + start_lsn OUT pg_lsn, + end_lsn OUT pg_lsn, + prev_lsn OUT pg_lsn, + xid OUT xid, + resource_manager OUT text, + record_type OUT text, + record_length OUT int4, + main_data_length OUT int4, + fpi_length OUT int4, + description OUT text, + block_ref OUT text) + returns setof record + + + + + + This function is same as pg_get_wal_records_info() + except that it gets information of all the valid WAL records from + start_lsn till the end of WAL. + + + + + + + + pg_get_wal_stats(start_lsn pg_lsn, + end_lsn pg_lsn, + per_record boolean DEFAULT false, + "resource_manager/record_type" OUT text, + count OUT int8, + count_percentage OUT float4, + record_length OUT int8, + record_length_percentage OUT float4, + fpi_length OUT int8, + fpi_length_percentage OUT float4, + combined_size OUT int8, + combined_size_percentage OUT float4) + returns setof record + + + + + + Gets statistics of all the valid WAL records between + start_lsn and + end_lsn. By default, it returns one row per + resource_manager type. When + per_record is set to true, + it returns one row per record_type. + If start_lsn + or end_lsn are not yet available, the + function will raise an error. For example, usage of the function is as + follows: + +postgres=# select * from pg_get_wal_stats('0/12FBA30', '0/15011D7') where count > 0; + resource_manager/record_type | count | count_percentage | record_size | record_size_percentage | fpi_size | fpi_size_percentage | combined_size | combined_size_percentage +------------------------------+-------+------------------+-------------+------------------------+----------+---------------------+---------------+-------------------------- + XLOG | 10 | 0.10871929 | 796 | 0.052369177 | 352 | 0.061031006 | 1148 | 0.054751817 + Transaction | 187 | 2.0330508 | 62773 | 4.1298623 | 0 | 0 | 62773 | 2.9938467 + Storage | 13 | 0.14133507 | 546 | 0.035921574 | 0 | 0 | 546 | 0.0260405 + Database | 2 | 0.021743858 | 84 | 0.005526396 | 0 | 0 | 84 | 0.0040062307 + Standby | 218 | 2.3700805 | 15908 | 1.0465941 | 0 | 0 | 15908 | 0.75870377 + Heap2 | 1897 | 20.624048 | 383916 | 25.257998 | 364472 | 63.193447 | 748388 | 35.693035 + Heap | 1318 | 14.329202 | 621390 | 40.88151 | 139660 | 24.214746 | 761050 | 36.29693 + Btree | 5553 | 60.37182 | 434565 | 28.590216 | 72272 | 12.530776 | 506837 | 24.17269 +(8 rows) + + +With per_record passed as true: + + +postgres=# select * from pg_get_wal_stats('0/14AFC30', '0/15011D7', true) where count > 0; + resource_manager/record_type | count | count_percentage | record_size | record_size_percentage | fpi_size | fpi_size_percentage | combined_size | combined_size_percentage +------------------------------+-------+------------------+-------------+------------------------+----------+---------------------+---------------+-------------------------- + XLOG/CHECKPOINT_SHUTDOWN | 1 | 0.32894737 | 114 | 0.22891566 | 0 | 0 | 114 | 0.03534489 + XLOG/CHECKPOINT_ONLINE | 4 | 1.3157895 | 456 | 0.91566265 | 0 | 0 | 456 | 0.14137957 + XLOG/NEXTOID | 1 | 0.32894737 | 30 | 0.060240965 | 0 | 0 | 30 | 0.009301287 + Transaction/COMMIT | 9 | 2.9605262 | 1173 | 2.3554218 | 0 | 0 | 1173 | 0.36368033 + Storage/CREATE | 1 | 0.32894737 | 42 | 0.084337346 | 0 | 0 | 42 | 0.0130218025 + Database/CREATE_FILE_COPY | 2 | 0.65789473 | 84 | 0.16867469 | 0 | 0 | 84 | 0.026043605 + Standby/RUNNING_XACTS | 6 | 1.9736842 | 316 | 0.6345382 | 0 | 0 | 316 | 0.09797356 + Standby/INVALIDATIONS | 45 | 14.802631 | 4018 | 8.068274 | 0 | 0 | 4018 | 1.2457525 + Heap2/PRUNE | 4 | 1.3157895 | 270 | 0.5421687 | 0 | 0 | 270 | 0.08371159 + Heap2/FREEZE_PAGE | 27 | 8.881579 | 20904 | 41.975903 | 0 | 0 | 20904 | 6.481137 + Heap2/VISIBLE | 29 | 9.539474 | 1756 | 3.5261045 | 73728 | 27.032736 | 75484 | 23.403278 + Heap2/MULTI_INSERT | 13 | 4.2763157 | 1049 | 2.1064258 | 12216 | 4.479057 | 13265 | 4.112719 + Heap/INSERT | 19 | 6.25 | 2324 | 4.6666665 | 43884 | 16.090284 | 46208 | 14.326463 + Heap/UPDATE | 7 | 2.3026316 | 511 | 1.0261045 | 54340 | 19.924028 | 54851 | 17.006165 + Heap/HOT_UPDATE | 11 | 3.618421 | 1134 | 2.2771084 | 468 | 0.17159452 | 1602 | 0.49668875 + Heap/LOCK | 8 | 2.631579 | 432 | 0.8674699 | 0 | 0 | 432 | 0.13393854 + Heap/INPLACE | 45 | 14.802631 | 9123 | 18.319277 | 16076 | 5.894345 | 25199 | 7.8127713 + Heap/UPDATE+INIT | 1 | 0.32894737 | 817 | 1.6405623 | 0 | 0 | 817 | 0.25330505 + Btree/INSERT_LEAF | 70 | 23.026316 | 5183 | 10.407631 | 72024 | 26.407955 | 77207 | 23.937483 + Btree/DEDUP | 1 | 0.32894737 | 64 | 0.12851405 | 0 | 0 | 64 | 0.019842746 +(20 rows) + + + + + + + + + pg_get_wal_stats_till_end_of_wal(start_lsn pg_lsn, + per_record boolean DEFAULT false, + "resource_manager/record_type" OUT text, + count OUT int8, + count_percentage OUT float4, + record_length OUT int8, + record_length_percentage OUT float4, + fpi_length OUT int8, + fpi_length_percentage OUT float4, + combined_size OUT int8, + combined_size_percentage OUT float4) + returns setof record + + + + + + This function is same as pg_get_wal_stats() except + that it gets statistics of all the valid WAL records from + start_lsn till end of WAL. + + + + + + + + + Author + + + Bharath Rupireddy bharath.rupireddyforpostgres@gmail.com + + + + diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index e7452af679..dff1e7685e 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -200,3 +200,133 @@ xlog_identify(uint8 info) return id; } + +/* + * Returns a string giving information about all the blocks in an + * XLogRecord. + */ +void +XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty, + bool detailed_format, StringInfo buf, + uint32 *fpi_len) +{ + int block_id; + + Assert(record != NULL); + + if (detailed_format && pretty) + appendStringInfoChar(buf, '\n'); + + for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++) + { + RelFileNode rnode = {InvalidOid, InvalidOid, InvalidOid}; + ForkNumber forknum = InvalidForkNumber; + BlockNumber blk = InvalidBlockNumber; + + if (!XLogRecHasBlockRef(record, block_id)) + continue; + + XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk); + + if (detailed_format) + { + /* Get block references in detailed format. */ + + if (pretty) + appendStringInfoChar(buf, '\t'); + else if (block_id > 0) + appendStringInfoChar(buf, ' '); + + appendStringInfo(buf, + "blkref #%d: rel %u/%u/%u fork %s blk %u", + block_id, + rnode.spcNode, rnode.dbNode, rnode.relNode, + forkNames[forknum], + blk); + + if (XLogRecHasBlockImage(record, block_id)) + { + uint8 bimg_info = XLogRecGetBlock(record, block_id)->bimg_info; + + /* Calculate the amount of FPI data in the record. */ + if (fpi_len) + *fpi_len += XLogRecGetBlock(record, block_id)->bimg_len; + + if (BKPIMAGE_COMPRESSED(bimg_info)) + { + const char *method; + + if ((bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0) + method = "pglz"; + else if ((bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0) + method = "lz4"; + else if ((bimg_info & BKPIMAGE_COMPRESS_ZSTD) != 0) + method = "zstd"; + else + method = "unknown"; + + appendStringInfo(buf, + " (FPW%s); hole: offset: %u, length: %u, " + "compression saved: %u, method: %s", + XLogRecBlockImageApply(record, block_id) ? + "" : " for WAL verification", + XLogRecGetBlock(record, block_id)->hole_offset, + XLogRecGetBlock(record, block_id)->hole_length, + BLCKSZ - + XLogRecGetBlock(record, block_id)->hole_length - + XLogRecGetBlock(record, block_id)->bimg_len, + method); + } + else + { + appendStringInfo(buf, + " (FPW%s); hole: offset: %u, length: %u", + XLogRecBlockImageApply(record, block_id) ? + "" : " for WAL verification", + XLogRecGetBlock(record, block_id)->hole_offset, + XLogRecGetBlock(record, block_id)->hole_length); + } + } + + if (pretty) + appendStringInfoChar(buf, '\n'); + } + else + { + /* Get block references in short format. */ + + if (forknum != MAIN_FORKNUM) + { + appendStringInfo(buf, + ", blkref #%d: rel %u/%u/%u fork %s blk %u", + block_id, + rnode.spcNode, rnode.dbNode, rnode.relNode, + forkNames[forknum], + blk); + } + else + { + appendStringInfo(buf, + ", blkref #%d: rel %u/%u/%u blk %u", + block_id, + rnode.spcNode, rnode.dbNode, rnode.relNode, + blk); + } + + if (XLogRecHasBlockImage(record, block_id)) + { + /* Calculate the amount of FPI data in the record. */ + if (fpi_len) + *fpi_len += XLogRecGetBlock(record, block_id)->bimg_len; + + if (XLogRecBlockImageApply(record, block_id)) + appendStringInfo(buf, " FPW"); + else + appendStringInfo(buf, " FPW for WAL verification"); + } + } + } + + if (!detailed_format && pretty) + appendStringInfoChar(buf, '\n'); +} diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile index 8c17c88dfc..3e5444a6f7 100644 --- a/src/backend/access/transam/Makefile +++ b/src/backend/access/transam/Makefile @@ -34,6 +34,7 @@ OBJS = \ xlogprefetcher.o \ xlogreader.o \ xlogrecovery.o \ + xlogstats.o \ xlogutils.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 5862d9dc75..a5f1a648d3 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -1320,13 +1320,6 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, return true; } -#ifdef FRONTEND -/* - * Functions that are currently not needed in the backend, but are better - * implemented inside xlogreader.c because of the internal facilities available - * here. - */ - /* * Find the first record with an lsn >= RecPtr. * @@ -1447,8 +1440,6 @@ err: return InvalidXLogRecPtr; } -#endif /* FRONTEND */ - /* * Helper function to ease writing of XLogRoutine->page_read callbacks. * If this function is used, caller must supply a segment_open callback in diff --git a/src/backend/access/transam/xlogstats.c b/src/backend/access/transam/xlogstats.c new file mode 100644 index 0000000000..aff3069ecb --- /dev/null +++ b/src/backend/access/transam/xlogstats.c @@ -0,0 +1,93 @@ +/*------------------------------------------------------------------------- + * + * xlogstats.c + * Functions for WAL Statitstics + * + * Copyright (c) 2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/access/transam/xlogstats.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/xlogreader.h" +#include "access/xlogstats.h" + +/* + * Calculate the size of a record, split into !FPI and FPI parts. + */ +void +XLogRecGetLen(XLogReaderState *record, uint32 *rec_len, + uint32 *fpi_len) +{ + int block_id; + + /* + * Calculate the amount of FPI data in the record. + * + * XXX: We peek into xlogreader's private decoded backup blocks for the + * bimg_len indicating the length of FPI data. + */ + *fpi_len = 0; + for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++) + { + if (XLogRecHasBlockImage(record, block_id)) + *fpi_len += XLogRecGetBlock(record, block_id)->bimg_len; + } + + /* + * Calculate the length of the record as the total length - the length of + * all the block images. + */ + *rec_len = XLogRecGetTotalLen(record) - *fpi_len; +} + +/* + * Store per-rmgr and per-record statistics for a given record. + */ +void +XLogRecStoreStats(XLogStats *stats, XLogReaderState *record) +{ + RmgrId rmid; + uint8 recid; + uint32 rec_len; + uint32 fpi_len; + + Assert(stats != NULL && record != NULL); + + stats->count++; + + rmid = XLogRecGetRmid(record); + + XLogRecGetLen(record, &rec_len, &fpi_len); + + /* Update per-rmgr statistics */ + + stats->rmgr_stats[rmid].count++; + stats->rmgr_stats[rmid].rec_len += rec_len; + stats->rmgr_stats[rmid].fpi_len += fpi_len; + + /* + * Update per-record statistics, where the record is identified by a + * combination of the RmgrId and the four bits of the xl_info field that + * are the rmgr's domain (resulting in sixteen possible entries per + * RmgrId). + */ + + recid = XLogRecGetInfo(record) >> 4; + + /* + * XACT records need to be handled differently. Those records use the + * first bit of those four bits for an optional flag variable and the + * following three bits for the opcode. We filter opcode out of xl_info + * and use it as the identifier of the record. + */ + if (rmid == RM_XACT_ID) + recid &= 0x07; + + stats->record_stats[rmid][recid].count++; + stats->record_stats[rmid][recid].rec_len += rec_len; + stats->record_stats[rmid][recid].fpi_len += fpi_len; +} diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index bb2d3ec991..b5d34c61e6 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -80,6 +80,10 @@ typedef struct xl_invalid_page static HTAB *invalid_page_tab = NULL; +static int +read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, + char *cur_page, bool wait_for_wal); /* Report a reference to an invalid page */ static void @@ -870,6 +874,31 @@ wal_segment_close(XLogReaderState *state) int read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page) +{ + return read_local_xlog_page_guts(state, targetPagePtr, reqLen, + targetRecPtr, cur_page, true); +} + +/* + * Same as read_local_xlog_page except that it doesn't wait for future WAL + * to be available. + */ +int +read_local_xlog_page_no_wait(XLogReaderState *state, XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, + char *cur_page) +{ + return read_local_xlog_page_guts(state, targetPagePtr, reqLen, + targetRecPtr, cur_page, false); +} + +/* + * Implementation of read_local_xlog_page and its no wait version. + */ +static int +read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, + char *cur_page, bool wait_for_wal) { XLogRecPtr read_upto, loc; @@ -925,6 +954,10 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, if (loc <= read_upto) break; + /* If asked, let's not wait for future WAL. */ + if (!wait_for_wal) + break; + CHECK_FOR_INTERRUPTS(); pg_usleep(1000L); } diff --git a/src/bin/pg_waldump/.gitignore b/src/bin/pg_waldump/.gitignore index 3be00a8b61..dabb6e34b6 100644 --- a/src/bin/pg_waldump/.gitignore +++ b/src/bin/pg_waldump/.gitignore @@ -23,6 +23,7 @@ /xactdesc.c /xlogdesc.c /xlogreader.c +/xlogstat.c # Generated by test suite /tmp_check/ diff --git a/src/bin/pg_waldump/Makefile b/src/bin/pg_waldump/Makefile index 9f333d0c8a..d6459e17c7 100644 --- a/src/bin/pg_waldump/Makefile +++ b/src/bin/pg_waldump/Makefile @@ -13,7 +13,8 @@ OBJS = \ compat.o \ pg_waldump.o \ rmgrdesc.o \ - xlogreader.o + xlogreader.o \ + xlogstats.o override CPPFLAGS := -DFRONTEND $(CPPFLAGS) @@ -29,6 +30,9 @@ pg_waldump: $(OBJS) | submake-libpgport xlogreader.c: % : $(top_srcdir)/src/backend/access/transam/% rm -f $@ && $(LN_S) $< . +xlogstats.c: % : $(top_srcdir)/src/backend/access/transam/% + rm -f $@ && $(LN_S) $< . + $(RMGRDESCSOURCES): % : $(top_srcdir)/src/backend/access/rmgrdesc/% rm -f $@ && $(LN_S) $< . @@ -42,7 +46,7 @@ uninstall: rm -f '$(DESTDIR)$(bindir)/pg_waldump$(X)' clean distclean maintainer-clean: - rm -f pg_waldump$(X) $(OBJS) $(RMGRDESCSOURCES) xlogreader.c + rm -f pg_waldump$(X) $(OBJS) $(RMGRDESCSOURCES) xlogreader.c xlogstats.c rm -rf tmp_check check: diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 30ca7684bd..8bf6899d67 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -21,11 +21,17 @@ #include "access/xlog_internal.h" #include "access/xlogreader.h" #include "access/xlogrecord.h" +#include "access/xlogstats.h" #include "common/fe_memutils.h" #include "common/logging.h" #include "getopt_long.h" #include "rmgrdesc.h" +/* + * NOTE: For any code change or issue fix here, it is highly recommended to + * give a thought about doing the same in pg_walinspect contrib module as well. + */ + static const char *progname; static int WalSegSz; @@ -66,24 +72,6 @@ typedef struct XLogDumpConfig bool filter_by_fpw; } XLogDumpConfig; -typedef struct Stats -{ - uint64 count; - uint64 rec_len; - uint64 fpi_len; -} Stats; - -#define MAX_XLINFO_TYPES 16 - -typedef struct XLogDumpStats -{ - uint64 count; - XLogRecPtr startptr; - XLogRecPtr endptr; - Stats rmgr_stats[RM_MAX_ID + 1]; - Stats record_stats[RM_MAX_ID + 1][MAX_XLINFO_TYPES]; -} XLogDumpStats; - #define fatal_error(...) do { pg_log_fatal(__VA_ARGS__); exit(EXIT_FAILURE); } while(0) /* @@ -453,81 +441,6 @@ XLogRecordHasFPW(XLogReaderState *record) return false; } -/* - * Calculate the size of a record, split into !FPI and FPI parts. - */ -static void -XLogDumpRecordLen(XLogReaderState *record, uint32 *rec_len, uint32 *fpi_len) -{ - int block_id; - - /* - * Calculate the amount of FPI data in the record. - * - * XXX: We peek into xlogreader's private decoded backup blocks for the - * bimg_len indicating the length of FPI data. - */ - *fpi_len = 0; - for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++) - { - if (XLogRecHasBlockImage(record, block_id)) - *fpi_len += XLogRecGetBlock(record, block_id)->bimg_len; - } - - /* - * Calculate the length of the record as the total length - the length of - * all the block images. - */ - *rec_len = XLogRecGetTotalLen(record) - *fpi_len; -} - -/* - * Store per-rmgr and per-record statistics for a given record. - */ -static void -XLogDumpCountRecord(XLogDumpConfig *config, XLogDumpStats *stats, - XLogReaderState *record) -{ - RmgrId rmid; - uint8 recid; - uint32 rec_len; - uint32 fpi_len; - - stats->count++; - - rmid = XLogRecGetRmid(record); - - XLogDumpRecordLen(record, &rec_len, &fpi_len); - - /* Update per-rmgr statistics */ - - stats->rmgr_stats[rmid].count++; - stats->rmgr_stats[rmid].rec_len += rec_len; - stats->rmgr_stats[rmid].fpi_len += fpi_len; - - /* - * Update per-record statistics, where the record is identified by a - * combination of the RmgrId and the four bits of the xl_info field that - * are the rmgr's domain (resulting in sixteen possible entries per - * RmgrId). - */ - - recid = XLogRecGetInfo(record) >> 4; - - /* - * XACT records need to be handled differently. Those records use the - * first bit of those four bits for an optional flag variable and the - * following three bits for the opcode. We filter opcode out of xl_info - * and use it as the identifier of the record. - */ - if (rmid == RM_XACT_ID) - recid &= 0x07; - - stats->record_stats[rmid][recid].count++; - stats->record_stats[rmid][recid].rec_len += rec_len; - stats->record_stats[rmid][recid].fpi_len += fpi_len; -} - /* * Print a record to stdout */ @@ -538,15 +451,11 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record) const RmgrDescData *desc = GetRmgrDesc(XLogRecGetRmid(record)); uint32 rec_len; uint32 fpi_len; - RelFileNode rnode; - ForkNumber forknum; - BlockNumber blk; - int block_id; uint8 info = XLogRecGetInfo(record); XLogRecPtr xl_prev = XLogRecGetPrev(record); StringInfoData s; - XLogDumpRecordLen(record, &rec_len, &fpi_len); + XLogRecGetLen(record, &rec_len, &fpi_len); printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ", desc->rm_name, @@ -564,93 +473,11 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record) initStringInfo(&s); desc->rm_desc(&s, record); printf("%s", s.data); + + resetStringInfo(&s); + XLogRecGetBlockRefInfo(record, true, config->bkp_details, &s, NULL); + printf("%s", s.data); pfree(s.data); - - if (!config->bkp_details) - { - /* print block references (short format) */ - for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++) - { - if (!XLogRecHasBlockRef(record, block_id)) - continue; - - XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk); - if (forknum != MAIN_FORKNUM) - printf(", blkref #%d: rel %u/%u/%u fork %s blk %u", - block_id, - rnode.spcNode, rnode.dbNode, rnode.relNode, - forkNames[forknum], - blk); - else - printf(", blkref #%d: rel %u/%u/%u blk %u", - block_id, - rnode.spcNode, rnode.dbNode, rnode.relNode, - blk); - if (XLogRecHasBlockImage(record, block_id)) - { - if (XLogRecBlockImageApply(record, block_id)) - printf(" FPW"); - else - printf(" FPW for WAL verification"); - } - } - putchar('\n'); - } - else - { - /* print block references (detailed format) */ - putchar('\n'); - for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++) - { - if (!XLogRecHasBlockRef(record, block_id)) - continue; - - XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk); - printf("\tblkref #%d: rel %u/%u/%u fork %s blk %u", - block_id, - rnode.spcNode, rnode.dbNode, rnode.relNode, - forkNames[forknum], - blk); - if (XLogRecHasBlockImage(record, block_id)) - { - uint8 bimg_info = XLogRecGetBlock(record, block_id)->bimg_info; - - if (BKPIMAGE_COMPRESSED(bimg_info)) - { - const char *method; - - if ((bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0) - method = "pglz"; - else if ((bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0) - method = "lz4"; - else if ((bimg_info & BKPIMAGE_COMPRESS_ZSTD) != 0) - method = "zstd"; - else - method = "unknown"; - - printf(" (FPW%s); hole: offset: %u, length: %u, " - "compression saved: %u, method: %s", - XLogRecBlockImageApply(record, block_id) ? - "" : " for WAL verification", - XLogRecGetBlock(record, block_id)->hole_offset, - XLogRecGetBlock(record, block_id)->hole_length, - BLCKSZ - - XLogRecGetBlock(record, block_id)->hole_length - - XLogRecGetBlock(record, block_id)->bimg_len, - method); - } - else - { - printf(" (FPW%s); hole: offset: %u, length: %u", - XLogRecBlockImageApply(record, block_id) ? - "" : " for WAL verification", - XLogRecGetBlock(record, block_id)->hole_offset, - XLogRecGetBlock(record, block_id)->hole_length); - } - } - putchar('\n'); - } - } } /* @@ -698,7 +525,7 @@ XLogDumpStatsRow(const char *name, * Display summary statistics about the records seen so far. */ static void -XLogDumpDisplayStats(XLogDumpConfig *config, XLogDumpStats *stats) +XLogDumpDisplayStats(XLogDumpConfig *config, XLogStats *stats) { int ri, rj; @@ -722,6 +549,9 @@ XLogDumpDisplayStats(XLogDumpConfig *config, XLogDumpStats *stats) for (ri = 0; ri <= RM_MAX_ID; ri++) { + if (!RmgrIdIsValid(ri)) + continue; + total_count += stats->rmgr_stats[ri].count; total_rec_len += stats->rmgr_stats[ri].rec_len; total_fpi_len += stats->rmgr_stats[ri].fpi_len; @@ -867,7 +697,7 @@ main(int argc, char **argv) XLogReaderState *xlogreader_state; XLogDumpPrivate private; XLogDumpConfig config; - XLogDumpStats stats; + XLogStats stats; XLogRecord *record; XLogRecPtr first_record; char *waldir = NULL; @@ -921,7 +751,7 @@ main(int argc, char **argv) memset(&private, 0, sizeof(XLogDumpPrivate)); memset(&config, 0, sizeof(XLogDumpConfig)); - memset(&stats, 0, sizeof(XLogDumpStats)); + memset(&stats, 0, sizeof(XLogStats)); private.timeline = 1; private.startptr = InvalidXLogRecPtr; @@ -1319,7 +1149,7 @@ main(int argc, char **argv) { if (config.stats == true) { - XLogDumpCountRecord(&config, &stats, xlogreader_state); + XLogRecStoreStats(&stats, xlogreader_state); stats.endptr = xlogreader_state->EndRecPtr; } else diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index e302bd102c..5e1e3446ae 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -31,7 +31,7 @@ extern XLogRecPtr XactLastRecEnd; extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd; /* these variables are GUC parameters related to XLOG */ -extern int wal_segment_size; +extern PGDLLIMPORT int wal_segment_size; extern int min_wal_size_mb; extern int max_wal_size_mb; extern int wal_keep_size_mb; diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index d9df7f295d..750f634120 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -320,7 +320,7 @@ typedef struct RmgrData struct XLogRecordBuffer *buf); } RmgrData; -extern RmgrData RmgrTable[]; +extern PGDLLIMPORT RmgrData RmgrTable[]; extern void RmgrStartup(void); extern void RmgrCleanup(void); extern void RmgrNotFound(RmgrId rmid); @@ -350,6 +350,10 @@ extern XLogRecPtr RequestXLogSwitch(bool mark_unimportant); extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli); +extern void XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty, + bool detailed_format, StringInfo buf, + uint32 *fpi_len); + /* * Exported for the functions in timeline.c and xlogarchive.c. Only valid * in the startup process. diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index d8eb857611..727e9fe971 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -344,9 +344,7 @@ extern void XLogReaderSetDecodeBuffer(XLogReaderState *state, /* Position the XLogReader to given record */ extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr); -#ifdef FRONTEND extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr); -#endif /* FRONTEND */ /* Return values from XLogPageReadCB. */ typedef enum XLogPageReadResult diff --git a/src/include/access/xlogstats.h b/src/include/access/xlogstats.h new file mode 100644 index 0000000000..453bb1f491 --- /dev/null +++ b/src/include/access/xlogstats.h @@ -0,0 +1,40 @@ +/*------------------------------------------------------------------------- + * + * xlogstats.h + * Definitions for WAL Statitstics + * + * Copyright (c) 2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/include/access/xlogstats.h + * + *------------------------------------------------------------------------- + */ +#ifndef XLOGSTATS_H +#define XLOGSTATS_H + +#define MAX_XLINFO_TYPES 16 + +typedef struct XLogRecStats +{ + uint64 count; + uint64 rec_len; + uint64 fpi_len; +} XLogRecStats; + +typedef struct XLogStats +{ + uint64 count; +#ifdef FRONTEND + XLogRecPtr startptr; + XLogRecPtr endptr; +#endif + XLogRecStats rmgr_stats[RM_MAX_ID + 1]; + XLogRecStats record_stats[RM_MAX_ID + 1][MAX_XLINFO_TYPES]; +} XLogStats; + +extern void XLogRecGetLen(XLogReaderState *record, uint32 *rec_len, + uint32 *fpi_len); +extern void XLogRecStoreStats(XLogStats *stats, XLogReaderState *record); + +#endif /* XLOGSTATS_H */ diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h index ff40f96e42..3746e31e40 100644 --- a/src/include/access/xlogutils.h +++ b/src/include/access/xlogutils.h @@ -93,6 +93,10 @@ extern void FreeFakeRelcacheEntry(Relation fakerel); extern int read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page); +extern int read_local_xlog_page_no_wait(XLogReaderState *state, + XLogRecPtr targetPagePtr, int reqLen, + XLogRecPtr targetRecPtr, + char *cur_page); extern void wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p);