pg_walinspect: pg_get_wal_fpi_info() -> pg_get_wal_block_info()

This commit reworks pg_get_wal_fpi_info() to become aware of all the
block information that can be attached to a record rather than just its
full-page writes:
- Addition of the block id as assigned by XLogRegisterBuffer(),
XLogRegisterBlock() or XLogRegisterBufData().
- Addition of the block data, as bytea, or NULL if none.  The length of
the block data can be guessed with length(), so there is no need to
store its length in a separate field.
- Addition of the full-page image length, as counted without a hole or
even compressed.
- Modification of the handling of the full-page image data.  This is
still a bytea, but it could become NULL if none is assigned to a block.
- Addition of the full-page image flags, tracking if a page is stored
with a hole, if it needs to be applied and the type of compression
applied to it, as of all the BKPIMAGE_* values in xlogrecord.h.

The information of each block is returned as one single record, with the
record's ReadRecPtr included to be able to join the block information
with the existing pg_get_wal_records_info().  Note that it is perfectly
possible for a block to hold both data and full-page image.

Thanks also to Kyotaro Horiguchi and Matthias van de Meent for the
discussion.

This commit uses some of the work proposed by Melanie, though it has
been largely redesigned and rewritten by me.  Bharath has helped in
refining a bit the whole.

Reported-by: Melanie Plageman
Author: Michael Paquier, Melanie Plageman, Bharath Rupireddy
Discussion: https://postgr.es/m/CAAKRu_bORebdZmcV8V4cZBzU8M_C6tDDdbiPhCZ6i-iuSXW9TA@mail.gmail.com
This commit is contained in:
Michael Paquier 2023-03-10 10:09:07 +09:00
parent 8da2ec31ee
commit 9ecb134a93
5 changed files with 174 additions and 86 deletions

View File

@ -74,17 +74,28 @@ SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2'
(1 row)
-- ===================================================================
-- Tests to get full page image (FPI) from WAL record
-- Tests to get block information from WAL record
-- ===================================================================
-- Update table to generate some block data
SELECT pg_current_wal_lsn() AS wal_lsn3 \gset
-- Force FPI on the next update.
CHECKPOINT;
-- Update table to generate an FPI.
UPDATE sample_tbl SET col1 = col1 * 100 WHERE col1 = 1;
UPDATE sample_tbl SET col1 = col1 + 1 WHERE col1 = 1;
SELECT pg_current_wal_lsn() AS wal_lsn4 \gset
-- Check if we get block data from WAL record.
SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_block_info(:'wal_lsn3', :'wal_lsn4')
WHERE relfilenode = :'sample_tbl_oid' AND blockdata IS NOT NULL;
ok
----
t
(1 row)
-- Force full-page image on the next update.
SELECT pg_current_wal_lsn() AS wal_lsn5 \gset
CHECKPOINT;
UPDATE sample_tbl SET col1 = col1 + 1 WHERE col1 = 2;
SELECT pg_current_wal_lsn() AS wal_lsn6 \gset
-- Check if we get FPI from WAL record.
SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_fpi_info(:'wal_lsn3', :'wal_lsn4')
WHERE relfilenode = :'sample_tbl_oid';
SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_block_info(:'wal_lsn5', :'wal_lsn6')
WHERE relfilenode = :'sample_tbl_oid' AND fpi IS NOT NULL;
ok
----
t
@ -116,7 +127,7 @@ SELECT has_function_privilege('regress_pg_walinspect',
(1 row)
SELECT has_function_privilege('regress_pg_walinspect',
'pg_get_wal_fpi_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- no
'pg_get_wal_block_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- no
has_function_privilege
------------------------
f
@ -146,7 +157,7 @@ SELECT has_function_privilege('regress_pg_walinspect',
(1 row)
SELECT has_function_privilege('regress_pg_walinspect',
'pg_get_wal_fpi_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
'pg_get_wal_block_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
has_function_privilege
------------------------
t
@ -160,7 +171,7 @@ GRANT EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn)
TO regress_pg_walinspect;
GRANT EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean)
TO regress_pg_walinspect;
GRANT EXECUTE ON FUNCTION pg_get_wal_fpi_info(pg_lsn, pg_lsn)
GRANT EXECUTE ON FUNCTION pg_get_wal_block_info(pg_lsn, pg_lsn)
TO regress_pg_walinspect;
SELECT has_function_privilege('regress_pg_walinspect',
'pg_get_wal_record_info(pg_lsn)', 'EXECUTE'); -- yes
@ -184,7 +195,7 @@ SELECT has_function_privilege('regress_pg_walinspect',
(1 row)
SELECT has_function_privilege('regress_pg_walinspect',
'pg_get_wal_fpi_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
'pg_get_wal_block_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
has_function_privilege
------------------------
t
@ -196,7 +207,7 @@ REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn)
FROM regress_pg_walinspect;
REVOKE EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean)
FROM regress_pg_walinspect;
REVOKE EXECUTE ON FUNCTION pg_get_wal_fpi_info(pg_lsn, pg_lsn)
REVOKE EXECUTE ON FUNCTION pg_get_wal_block_info(pg_lsn, pg_lsn)
FROM regress_pg_walinspect;
-- ===================================================================
-- Clean up

View File

@ -4,21 +4,25 @@
\echo Use "ALTER EXTENSION pg_walinspect UPDATE TO '1.1'" to load this file. \quit
--
-- pg_get_wal_fpi_info()
-- pg_get_wal_block_info()
--
CREATE FUNCTION pg_get_wal_fpi_info(IN start_lsn pg_lsn,
CREATE FUNCTION pg_get_wal_block_info(IN start_lsn pg_lsn,
IN end_lsn pg_lsn,
OUT lsn pg_lsn,
OUT blockid int2,
OUT reltablespace oid,
OUT reldatabase oid,
OUT relfilenode oid,
OUT relblocknumber int8,
OUT forkname text,
OUT fpi bytea
OUT blockdata bytea,
OUT fpi bytea,
OUT fpilen int4,
OUT fpiinfo text[]
)
RETURNS SETOF record
AS 'MODULE_PATHNAME', 'pg_get_wal_fpi_info'
AS 'MODULE_PATHNAME', 'pg_get_wal_block_info'
LANGUAGE C STRICT PARALLEL SAFE;
REVOKE EXECUTE ON FUNCTION pg_get_wal_fpi_info(pg_lsn, pg_lsn) FROM PUBLIC;
GRANT EXECUTE ON FUNCTION pg_get_wal_fpi_info(pg_lsn, pg_lsn) TO pg_read_server_files;
REVOKE EXECUTE ON FUNCTION pg_get_wal_block_info(pg_lsn, pg_lsn) FROM PUBLIC;
GRANT EXECUTE ON FUNCTION pg_get_wal_block_info(pg_lsn, pg_lsn) TO pg_read_server_files;

View File

@ -20,6 +20,7 @@
#include "access/xlogutils.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/pg_lsn.h"
@ -30,7 +31,7 @@
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(pg_get_wal_fpi_info);
PG_FUNCTION_INFO_V1(pg_get_wal_block_info);
PG_FUNCTION_INFO_V1(pg_get_wal_record_info);
PG_FUNCTION_INFO_V1(pg_get_wal_records_info);
PG_FUNCTION_INFO_V1(pg_get_wal_records_info_till_end_of_wal);
@ -56,7 +57,7 @@ static void FillXLogStatsRow(const char *name, uint64 n, uint64 total_count,
Datum *values, bool *nulls, uint32 ncols);
static void GetWalStats(FunctionCallInfo fcinfo, XLogRecPtr start_lsn,
XLogRecPtr end_lsn, bool stats_per_record);
static void GetWALFPIInfo(FunctionCallInfo fcinfo, XLogReaderState *record);
static void GetWALBlockInfo(FunctionCallInfo fcinfo, XLogReaderState *record);
/*
* Check if the given LSN is in future. Also, return the LSN up to which the
@ -221,49 +222,40 @@ GetWALRecordInfo(XLogReaderState *record, Datum *values,
/*
* Store a set of full page images from a single record.
* Store a set of block information from a single record (FPI and block
* information).
*/
static void
GetWALFPIInfo(FunctionCallInfo fcinfo, XLogReaderState *record)
GetWALBlockInfo(FunctionCallInfo fcinfo, XLogReaderState *record)
{
#define PG_GET_WAL_FPI_INFO_COLS 7
#define PG_GET_WAL_BLOCK_INFO_COLS 11
int block_id;
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
{
PGAlignedBlock buf;
Page page;
bytea *raw_page;
BlockNumber blk;
DecodedBkpBlock *blk;
BlockNumber blkno;
RelFileLocator rnode;
ForkNumber fork;
Datum values[PG_GET_WAL_FPI_INFO_COLS] = {0};
bool nulls[PG_GET_WAL_FPI_INFO_COLS] = {0};
Datum values[PG_GET_WAL_BLOCK_INFO_COLS] = {0};
bool nulls[PG_GET_WAL_BLOCK_INFO_COLS] = {0};
int i = 0;
if (!XLogRecHasBlockRef(record, block_id))
continue;
if (!XLogRecHasBlockImage(record, block_id))
continue;
blk = XLogRecGetBlock(record, block_id);
page = (Page) buf.data;
if (!RestoreBlockImage(record, block_id, page))
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg_internal("%s", record->errormsg_buf)));
/* Full page exists, so let's save it. */
(void) XLogRecGetBlockTagExtended(record, block_id,
&rnode, &fork, &blk, NULL);
&rnode, &fork, &blkno, NULL);
values[i++] = LSNGetDatum(record->ReadRecPtr);
values[i++] = ObjectIdGetDatum(rnode.spcOid);
values[i++] = ObjectIdGetDatum(rnode.dbOid);
values[i++] = ObjectIdGetDatum(rnode.relNumber);
values[i++] = Int64GetDatum((int64) blk);
values[i++] = Int16GetDatum(block_id);
values[i++] = ObjectIdGetDatum(blk->rlocator.spcOid);
values[i++] = ObjectIdGetDatum(blk->rlocator.dbOid);
values[i++] = ObjectIdGetDatum(blk->rlocator.relNumber);
values[i++] = Int64GetDatum((int64) blkno);
if (fork >= 0 && fork <= MAX_FORKNUM)
values[i++] = CStringGetTextDatum(forkNames[fork]);
@ -272,34 +264,102 @@ GetWALFPIInfo(FunctionCallInfo fcinfo, XLogReaderState *record)
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg_internal("invalid fork number: %u", fork)));
/* Initialize bytea buffer to copy the FPI to. */
raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ);
SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ);
/* Block data */
if (blk->has_data)
{
bytea *raw_data;
/* Take a verbatim copy of the FPI. */
memcpy(VARDATA(raw_page), page, BLCKSZ);
/* Initialize bytea buffer to copy the data to */
raw_data = (bytea *) palloc(blk->data_len + VARHDRSZ);
SET_VARSIZE(raw_data, blk->data_len + VARHDRSZ);
values[i++] = PointerGetDatum(raw_page);
/* Copy the data */
memcpy(VARDATA(raw_data), blk->data, blk->data_len);
values[i++] = PointerGetDatum(raw_data);
}
else
{
/* No data, so set this field to NULL */
nulls[i++] = true;
}
Assert(i == PG_GET_WAL_FPI_INFO_COLS);
if (blk->has_image)
{
PGAlignedBlock buf;
Page page;
bytea *raw_page;
int bitcnt;
int cnt = 0;
Datum *flags;
ArrayType *a;
page = (Page) buf.data;
/* Full page image exists, so let's save it */
if (!RestoreBlockImage(record, block_id, page))
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg_internal("%s", record->errormsg_buf)));
/* Initialize bytea buffer to copy the FPI to */
raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ);
SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ);
/* Take a verbatim copy of the FPI */
memcpy(VARDATA(raw_page), page, BLCKSZ);
values[i++] = PointerGetDatum(raw_page);
values[i++] = UInt32GetDatum(blk->bimg_len);
/* FPI flags */
bitcnt = pg_popcount((const char *) &blk->bimg_info,
sizeof(uint8));
/* Build set of raw flags */
flags = (Datum *) palloc0(sizeof(Datum) * bitcnt);
if ((blk->bimg_info & BKPIMAGE_HAS_HOLE) != 0)
flags[cnt++] = CStringGetTextDatum("HAS_HOLE");
if (blk->apply_image)
flags[cnt++] = CStringGetTextDatum("APPLY");
if ((blk->bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0)
flags[cnt++] = CStringGetTextDatum("COMPRESS_PGLZ");
if ((blk->bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0)
flags[cnt++] = CStringGetTextDatum("COMPRESS_LZ4");
if ((blk->bimg_info & BKPIMAGE_COMPRESS_ZSTD) != 0)
flags[cnt++] = CStringGetTextDatum("COMPRESS_ZSTD");
Assert(cnt <= bitcnt);
a = construct_array_builtin(flags, cnt, TEXTOID);
values[i++] = PointerGetDatum(a);
}
else
{
/* No full page image, so store NULLs for all its fields */
memset(&nulls[i], true, 3 * sizeof(bool));
i += 3;
}
Assert(i == PG_GET_WAL_BLOCK_INFO_COLS);
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
values, nulls);
}
#undef PG_GET_WAL_FPI_INFO_COLS
#undef PG_GET_WAL_FPI_BLOCK_COLS
}
/*
* Get full page images with their relation information for all the WAL
* records between start and end LSNs. Decompression is applied to the
* blocks, if necessary.
* Get information about all the blocks saved in WAL records between start
* and end LSNs. This produces information about the full page images with
* their relation information, and the data saved in each block associated
* to a record. Decompression is applied to the full page images, if
* necessary.
*
* This function emits an error if a future start or end WAL LSN i.e. WAL LSN
* the database system doesn't know about is specified.
*/
Datum
pg_get_wal_fpi_info(PG_FUNCTION_ARGS)
pg_get_wal_block_info(PG_FUNCTION_ARGS)
{
XLogRecPtr start_lsn;
XLogRecPtr end_lsn;
@ -317,7 +377,7 @@ pg_get_wal_fpi_info(PG_FUNCTION_ARGS)
xlogreader = InitXLogReaderState(start_lsn);
tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
"pg_get_wal_fpi_info temporary cxt",
"pg_get_block_fpi_info temporary cxt",
ALLOCSET_DEFAULT_SIZES);
while (ReadNextXLogRecord(xlogreader) &&
@ -326,7 +386,7 @@ pg_get_wal_fpi_info(PG_FUNCTION_ARGS)
/* Use the tmp context so we can clean up after each tuple is done */
old_cxt = MemoryContextSwitchTo(tmp_cxt);
GetWALFPIInfo(fcinfo, xlogreader);
GetWALBlockInfo(fcinfo, xlogreader);
/* clean up and switch back */
MemoryContextSwitchTo(old_cxt);

View File

@ -53,20 +53,25 @@ SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2'
WHERE resource_manager = 'Heap' AND record_type = 'INSERT';
-- ===================================================================
-- Tests to get full page image (FPI) from WAL record
-- Tests to get block information from WAL record
-- ===================================================================
-- Update table to generate some block data
SELECT pg_current_wal_lsn() AS wal_lsn3 \gset
-- Force FPI on the next update.
CHECKPOINT;
-- Update table to generate an FPI.
UPDATE sample_tbl SET col1 = col1 * 100 WHERE col1 = 1;
UPDATE sample_tbl SET col1 = col1 + 1 WHERE col1 = 1;
SELECT pg_current_wal_lsn() AS wal_lsn4 \gset
-- Check if we get block data from WAL record.
SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_block_info(:'wal_lsn3', :'wal_lsn4')
WHERE relfilenode = :'sample_tbl_oid' AND blockdata IS NOT NULL;
-- Force full-page image on the next update.
SELECT pg_current_wal_lsn() AS wal_lsn5 \gset
CHECKPOINT;
UPDATE sample_tbl SET col1 = col1 + 1 WHERE col1 = 2;
SELECT pg_current_wal_lsn() AS wal_lsn6 \gset
-- Check if we get FPI from WAL record.
SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_fpi_info(:'wal_lsn3', :'wal_lsn4')
WHERE relfilenode = :'sample_tbl_oid';
SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_block_info(:'wal_lsn5', :'wal_lsn6')
WHERE relfilenode = :'sample_tbl_oid' AND fpi IS NOT NULL;
-- ===================================================================
-- Tests for permissions
@ -83,7 +88,7 @@ SELECT has_function_privilege('regress_pg_walinspect',
'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- no
SELECT has_function_privilege('regress_pg_walinspect',
'pg_get_wal_fpi_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- no
'pg_get_wal_block_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- no
-- Functions accessible by users with role pg_read_server_files
@ -99,7 +104,7 @@ SELECT has_function_privilege('regress_pg_walinspect',
'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes
SELECT has_function_privilege('regress_pg_walinspect',
'pg_get_wal_fpi_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
'pg_get_wal_block_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
REVOKE pg_read_server_files FROM regress_pg_walinspect;
@ -113,7 +118,7 @@ GRANT EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn)
GRANT EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean)
TO regress_pg_walinspect;
GRANT EXECUTE ON FUNCTION pg_get_wal_fpi_info(pg_lsn, pg_lsn)
GRANT EXECUTE ON FUNCTION pg_get_wal_block_info(pg_lsn, pg_lsn)
TO regress_pg_walinspect;
SELECT has_function_privilege('regress_pg_walinspect',
@ -126,7 +131,7 @@ SELECT has_function_privilege('regress_pg_walinspect',
'pg_get_wal_stats(pg_lsn, pg_lsn, boolean) ', 'EXECUTE'); -- yes
SELECT has_function_privilege('regress_pg_walinspect',
'pg_get_wal_fpi_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
'pg_get_wal_block_info(pg_lsn, pg_lsn) ', 'EXECUTE'); -- yes
REVOKE EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn)
FROM regress_pg_walinspect;
@ -137,7 +142,7 @@ REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn)
REVOKE EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn, boolean)
FROM regress_pg_walinspect;
REVOKE EXECUTE ON FUNCTION pg_get_wal_fpi_info(pg_lsn, pg_lsn)
REVOKE EXECUTE ON FUNCTION pg_get_wal_block_info(pg_lsn, pg_lsn)
FROM regress_pg_walinspect;
-- ===================================================================

View File

@ -190,31 +190,39 @@ combined_size_percentage | 2.8634072910530795
<varlistentry>
<term>
<function>pg_get_wal_fpi_info(start_lsn pg_lsn, end_lsn pg_lsn) returns setof record</function>
<function>pg_get_wal_block_info(start_lsn pg_lsn, end_lsn pg_lsn) returns setof record</function>
</term>
<listitem>
<para>
Gets a copy of full page images as <type>bytea</type> values (after
applying decompression when necessary) and their information associated
with all the valid WAL records between
Gets a copy of the block information stored in WAL records. This includes
copies of the block data (<literal>NULL</literal> if none) and full page
images as <type>bytea</type> values (after
applying decompression when necessary, or <literal>NULL</literal> if none)
and their information associated with all the valid WAL records between
<replaceable>start_lsn</replaceable> and
<replaceable>end_lsn</replaceable>. Returns one row per full page image.
If <replaceable>start_lsn</replaceable> or
<replaceable>end_lsn</replaceable>. Returns one row per block registered
in a WAL record. If <replaceable>start_lsn</replaceable> or
<replaceable>end_lsn</replaceable> are not yet available, the function
will raise an error. For example:
<screen>
postgres=# SELECT lsn, reltablespace, reldatabase, relfilenode, relblocknumber,
forkname, substring(fpi for 24) as fpi_trimmed
FROM pg_get_wal_fpi_info('0/1801690', '0/1825C60');
postgres=# SELECT lsn, blockid, reltablespace, reldatabase, relfilenode,
relblocknumber, forkname,
substring(blockdata for 24) as block_trimmed,
substring(fpi for 24) as fpi_trimmed, fpilen, fpiinfo
FROM pg_get_wal_block_info('0/1871080', '0/1871440');
-[ RECORD 1 ]--+---------------------------------------------------
lsn | 0/1807E20
lsn | 0/18712F8
blockid | 0
reltablespace | 1663
reldatabase | 5
relfilenode | 16396
relblocknumber | 43
reldatabase | 16384
relfilenode | 16392
relblocknumber | 0
forkname | main
fpi_trimmed | \x00000000b89e660100000000a003c0030020042000000000
block_trimmed | \x02800128180164000000
fpi_trimmed | \x0000000050108701000000002c00601f00200420e0020000
fpilen | 204
fpiinfo | {HAS_HOLE,APPLY}
</screen>
</para>
</listitem>