diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 4f8058d8b1..b804eb8b5e 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -2982,6 +2982,33 @@ description | Waiting for a newly initialized WAL file to reach durable storage + + + restartpoints_timed bigint + + + Number of scheduled restartpoints due to timeout or after a failed attempt to perform it + + + + + + restartpoints_req bigint + + + Number of requested restartpoints + + + + + + restartpoints_done bigint + + + Number of restartpoints that have been performed + + + write_time double precision diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml index 2ed4eb659d..05e2a8f8be 100644 --- a/doc/src/sgml/wal.sgml +++ b/doc/src/sgml/wal.sgml @@ -655,14 +655,41 @@ directory. Restartpoints can't be performed more frequently than checkpoints on the primary because restartpoints can only be performed at checkpoint records. - A restartpoint is triggered when a checkpoint record is reached if at - least checkpoint_timeout seconds have passed since the last - restartpoint, or if WAL size is about to exceed - max_wal_size. However, because of limitations on when a - restartpoint can be performed, max_wal_size is often exceeded - during recovery, by up to one checkpoint cycle's worth of WAL. + A restartpoint can be demanded by a schedule or by an external request. + The restartpoints_timed counter in the + pg_stat_checkpointer + view counts the first ones while the restartpoints_req + the second. + A restartpoint is triggered by schedule when a checkpoint record is reached + if at least seconds have passed since + the last performed restartpoint or when the previous attempt to perform + the restartpoint has failed. In the last case, the next restartpoint + will be scheduled in 15 seconds. + A restartpoint is triggered by request due to similar reasons like checkpoint + but mostly if WAL size is about to exceed + However, because of limitations on when a restartpoint can be performed, + max_wal_size is often exceeded during recovery, + by up to one checkpoint cycle's worth of WAL. (max_wal_size is never a hard limit anyway, so you should always leave plenty of headroom to avoid running out of disk space.) + The restartpoints_done counter in the + pg_stat_checkpointer + view counts the restartpoints that have really been performed. + + + + In some cases, when the WAL size on the primary increases quickly, + for instance during massive INSERT, + the restartpoints_req counter on the standby + may demonstrate a peak growth. + This occurs because requests to create a new restartpoint due to increased + XLOG consumption cannot be performed because the safe checkpoint record + since the last restartpoint has not yet been replayed on the standby. + This behavior is normal and does not lead to an increase in system resource + consumption. + Only the restartpoints_done + counter among the restartpoint-related ones indicates that noticeable system + resources have been spent. diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 11d18ed9dd..058fc47c91 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1141,6 +1141,9 @@ CREATE VIEW pg_stat_checkpointer AS SELECT pg_stat_get_checkpointer_num_timed() AS num_timed, pg_stat_get_checkpointer_num_requested() AS num_requested, + pg_stat_get_checkpointer_restartpoints_timed() AS restartpoints_timed, + pg_stat_get_checkpointer_restartpoints_requested() AS restartpoints_req, + pg_stat_get_checkpointer_restartpoints_performed() AS restartpoints_done, pg_stat_get_checkpointer_write_time() AS write_time, pg_stat_get_checkpointer_sync_time() AS sync_time, pg_stat_get_checkpointer_buffers_written() AS buffers_written, diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index dc2da5a2cd..67ecb177e7 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -340,6 +340,8 @@ CheckpointerMain(void) pg_time_t now; int elapsed_secs; int cur_timeout; + bool chkpt_or_rstpt_requested = false; + bool chkpt_or_rstpt_timed = false; /* Clear any already-pending wakeups */ ResetLatch(MyLatch); @@ -358,7 +360,7 @@ CheckpointerMain(void) if (((volatile CheckpointerShmemStruct *) CheckpointerShmem)->ckpt_flags) { do_checkpoint = true; - PendingCheckpointerStats.num_requested++; + chkpt_or_rstpt_requested = true; } /* @@ -372,7 +374,7 @@ CheckpointerMain(void) if (elapsed_secs >= CheckPointTimeout) { if (!do_checkpoint) - PendingCheckpointerStats.num_timed++; + chkpt_or_rstpt_timed = true; do_checkpoint = true; flags |= CHECKPOINT_CAUSE_TIME; } @@ -408,6 +410,24 @@ CheckpointerMain(void) if (flags & CHECKPOINT_END_OF_RECOVERY) do_restartpoint = false; + if (chkpt_or_rstpt_timed) + { + chkpt_or_rstpt_timed = false; + if (do_restartpoint) + PendingCheckpointerStats.restartpoints_timed++; + else + PendingCheckpointerStats.num_timed++; + } + + if (chkpt_or_rstpt_requested) + { + chkpt_or_rstpt_requested = false; + if (do_restartpoint) + PendingCheckpointerStats.restartpoints_requested++; + else + PendingCheckpointerStats.num_requested++; + } + /* * We will warn if (a) too soon since last checkpoint (whatever * caused it) and (b) somebody set the CHECKPOINT_CAUSE_XLOG flag @@ -471,6 +491,9 @@ CheckpointerMain(void) * checkpoints happen at a predictable spacing. */ last_checkpoint_time = now; + + if (do_restartpoint) + PendingCheckpointerStats.restartpoints_performed++; } else { diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c index 301a0bc7bd..6ee258f240 100644 --- a/src/backend/utils/activity/pgstat_checkpointer.c +++ b/src/backend/utils/activity/pgstat_checkpointer.c @@ -49,6 +49,9 @@ pgstat_report_checkpointer(void) #define CHECKPOINTER_ACC(fld) stats_shmem->stats.fld += PendingCheckpointerStats.fld CHECKPOINTER_ACC(num_timed); CHECKPOINTER_ACC(num_requested); + CHECKPOINTER_ACC(restartpoints_timed); + CHECKPOINTER_ACC(restartpoints_requested); + CHECKPOINTER_ACC(restartpoints_performed); CHECKPOINTER_ACC(write_time); CHECKPOINTER_ACC(sync_time); CHECKPOINTER_ACC(buffers_written); @@ -116,6 +119,9 @@ pgstat_checkpointer_snapshot_cb(void) #define CHECKPOINTER_COMP(fld) pgStatLocal.snapshot.checkpointer.fld -= reset.fld; CHECKPOINTER_COMP(num_timed); CHECKPOINTER_COMP(num_requested); + CHECKPOINTER_COMP(restartpoints_timed); + CHECKPOINTER_COMP(restartpoints_requested); + CHECKPOINTER_COMP(restartpoints_performed); CHECKPOINTER_COMP(write_time); CHECKPOINTER_COMP(sync_time); CHECKPOINTER_COMP(buffers_written); diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 0cea320c00..e65cbf41e9 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1193,6 +1193,24 @@ pg_stat_get_checkpointer_num_requested(PG_FUNCTION_ARGS) PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->num_requested); } +Datum +pg_stat_get_checkpointer_restartpoints_timed(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->restartpoints_timed); +} + +Datum +pg_stat_get_checkpointer_restartpoints_requested(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->restartpoints_requested); +} + +Datum +pg_stat_get_checkpointer_restartpoints_performed(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->restartpoints_performed); +} + Datum pg_stat_get_checkpointer_buffers_written(PG_FUNCTION_ARGS) { diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index ae1bee42a9..2fd601add0 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202312211 +#define CATALOG_VERSION_NO 202312251 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index b8b26c263d..9052f5262a 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5721,6 +5721,21 @@ proname => 'pg_stat_get_checkpointer_num_requested', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_checkpointer_num_requested' }, +{ oid => '8743', + descr => 'statistics: number of timed restartpoints started by the checkpointer', + proname => 'pg_stat_get_checkpointer_restartpoints_timed', provolatile => 's', + proparallel => 'r', prorettype => 'int8', proargtypes => '', + prosrc => 'pg_stat_get_checkpointer_restartpoints_timed' }, +{ oid => '8744', + descr => 'statistics: number of backend requested restartpoints started by the checkpointer', + proname => 'pg_stat_get_checkpointer_restartpoints_requested', provolatile => 's', + proparallel => 'r', prorettype => 'int8', proargtypes => '', + prosrc => 'pg_stat_get_checkpointer_restartpoints_requested' }, +{ oid => '8745', + descr => 'statistics: number of backend performed restartpoints', + proname => 'pg_stat_get_checkpointer_restartpoints_performed', provolatile => 's', + proparallel => 'r', prorettype => 'int8', proargtypes => '', + prosrc => 'pg_stat_get_checkpointer_restartpoints_performed' }, { oid => '2771', descr => 'statistics: number of buffers written by the checkpointer', proname => 'pg_stat_get_checkpointer_buffers_written', provolatile => 's', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index fc93d0d731..ab91b3b367 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -262,6 +262,9 @@ typedef struct PgStat_CheckpointerStats { PgStat_Counter num_timed; PgStat_Counter num_requested; + PgStat_Counter restartpoints_timed; + PgStat_Counter restartpoints_requested; + PgStat_Counter restartpoints_performed; PgStat_Counter write_time; /* times in milliseconds */ PgStat_Counter sync_time; PgStat_Counter buffers_written; diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 05070393b9..f645e8486b 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1822,6 +1822,9 @@ pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_buf_written_clean() AS buffers_cle pg_stat_get_bgwriter_stat_reset_time() AS stats_reset; pg_stat_checkpointer| SELECT pg_stat_get_checkpointer_num_timed() AS num_timed, pg_stat_get_checkpointer_num_requested() AS num_requested, + pg_stat_get_checkpointer_restartpoints_timed() AS restartpoints_timed, + pg_stat_get_checkpointer_restartpoints_requested() AS restartpoints_req, + pg_stat_get_checkpointer_restartpoints_performed() AS restartpoints_done, pg_stat_get_checkpointer_write_time() AS write_time, pg_stat_get_checkpointer_sync_time() AS sync_time, pg_stat_get_checkpointer_buffers_written() AS buffers_written,