2001-06-22 21:18:36 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat.c
|
2022-04-07 06:29:46 +02:00
|
|
|
* Infrastructure for the cumulative statistics system.
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2022-04-07 06:29:46 +02:00
|
|
|
* The cumulative statistics system accumulates statistics for different kinds
|
|
|
|
* of objects. Some kinds of statistics are collected for a fixed number of
|
|
|
|
* objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
|
|
|
|
* statistics are collected for a varying number of objects
|
|
|
|
* (e.g. relations). See PgStat_KindInfo for a list of currently handled
|
|
|
|
* statistics.
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Statistics are loaded from the filesystem during startup (by the startup
|
|
|
|
* process), unless preceded by a crash, in which case all stats are
|
|
|
|
* discarded. They are written out by the checkpointer process just before
|
|
|
|
* shutting down, except when shutting down in immediate mode.
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Statistics for variable-numbered objects are stored in dynamic shared
|
|
|
|
* memory and can be found via a dshash hashtable. The statistics counters are
|
|
|
|
* not part of the dshash entry (PgStatShared_HashEntry) directly, but are
|
|
|
|
* separately allocated (PgStatShared_HashEntry->body). The separate
|
|
|
|
* allocation allows different kinds of statistics to be stored in the same
|
|
|
|
* hashtable without wasting space in PgStatShared_HashEntry.
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Variable-numbered stats are addressed by PgStat_HashKey while running. It
|
|
|
|
* is not possible to have statistics for an object that cannot be addressed
|
|
|
|
* that way at runtime. A wider identifier can be used when serializing to
|
|
|
|
* disk (used for replication slot stats).
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2022-04-07 06:29:46 +02:00
|
|
|
* To avoid contention on the shared hashtable, each backend has a
|
|
|
|
* backend-local hashtable (pgStatEntryRefHash) in front of the shared
|
|
|
|
* hashtable, containing references (PgStat_EntryRef) to shared hashtable
|
|
|
|
* entries. The shared hashtable only needs to be accessed when no prior
|
|
|
|
* reference is found in the local hashtable. Besides pointing to the the
|
|
|
|
* shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
|
|
|
|
* contains a pointer to the the shared statistics data, as a process-local
|
|
|
|
* address, to reduce access costs.
|
|
|
|
*
|
|
|
|
* The names for structs stored in shared memory are prefixed with
|
|
|
|
* PgStatShared instead of PgStat. Each stats entry in shared memory is
|
|
|
|
* protected by a dedicated lwlock.
|
|
|
|
*
|
|
|
|
* Most stats updates are first accumulated locally in each process as pending
|
|
|
|
* entries, then later flushed to shared memory (just after commit, or by
|
|
|
|
* idle-timeout). This practically eliminates contention on individual stats
|
|
|
|
* entries. For most kinds of variable-numbered pending stats data is stored
|
|
|
|
* in PgStat_EntryRef->pending. All entries with pending data are in the
|
|
|
|
* pgStatPending list. Pending statistics updates are flushed out by
|
|
|
|
* pgstat_report_stat().
|
|
|
|
*
|
|
|
|
* The behavior of different kinds of statistics is determined by the kind's
|
|
|
|
* entry in pgstat_kind_infos, see PgStat_KindInfo for details.
|
|
|
|
*
|
|
|
|
* The consistency of read accesses to statistics can be configured using the
|
|
|
|
* stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
|
|
|
|
* settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
|
|
|
|
* PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
|
|
|
|
* pgStatLocal.snapshot.
|
|
|
|
*
|
|
|
|
* To keep things manageable, stats handling is split across several
|
|
|
|
* files. Infrastructure pieces are in:
|
|
|
|
* - pgstat.c - this file, to tie it all together
|
|
|
|
* - pgstat_shmem.c - nearly everything dealing with shared memory, including
|
|
|
|
* the maintenance of hashtable entries
|
|
|
|
* - pgstat_xact.c - transactional integration, including the transactional
|
|
|
|
* creation and dropping of stats entries
|
|
|
|
*
|
|
|
|
* Each statistics kind is handled in a dedicated file:
|
|
|
|
* - pgstat_archiver.c
|
|
|
|
* - pgstat_bgwriter.c
|
|
|
|
* - pgstat_checkpointer.c
|
|
|
|
* - pgstat_database.c
|
|
|
|
* - pgstat_function.c
|
|
|
|
* - pgstat_relation.c
|
|
|
|
* - pgstat_slru.c
|
|
|
|
* - pgstat_subscription.c
|
|
|
|
* - pgstat_wal.c
|
|
|
|
*
|
|
|
|
* Whenever possible infrastructure files should not contain code related to
|
|
|
|
* specific kinds of stats.
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* Copyright (c) 2001-2022, PostgreSQL Global Development Group
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/postmaster/pgstat.c
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2001-06-30 21:01:27 +02:00
|
|
|
#include "postgres.h"
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
#include <unistd.h>
|
|
|
|
|
2006-07-13 18:49:20 +02:00
|
|
|
#include "access/transam.h"
|
2004-05-30 00:48:23 +02:00
|
|
|
#include "access/xact.h"
|
2022-04-07 06:29:46 +02:00
|
|
|
#include "lib/dshash.h"
|
2019-11-12 04:00:16 +01:00
|
|
|
#include "pgstat.h"
|
2022-04-07 06:29:46 +02:00
|
|
|
#include "port/atomics.h"
|
2004-10-28 03:38:41 +02:00
|
|
|
#include "storage/fd.h"
|
2002-05-05 02:03:29 +02:00
|
|
|
#include "storage/ipc.h"
|
2022-04-07 06:29:46 +02:00
|
|
|
#include "storage/lwlock.h"
|
2003-11-07 22:55:50 +01:00
|
|
|
#include "storage/pg_shmem.h"
|
2022-04-07 06:29:46 +02:00
|
|
|
#include "storage/shmem.h"
|
2007-09-24 05:12:23 +02:00
|
|
|
#include "utils/guc.h"
|
2004-05-30 00:48:23 +02:00
|
|
|
#include "utils/memutils.h"
|
2022-03-21 20:02:25 +01:00
|
|
|
#include "utils/pgstat_internal.h"
|
2011-09-09 19:23:41 +02:00
|
|
|
#include "utils/timestamp.h"
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-03-22 00:16:42 +01:00
|
|
|
|
2004-06-26 18:32:04 +02:00
|
|
|
/* ----------
|
|
|
|
* Timer definitions.
|
2022-04-07 06:29:46 +02:00
|
|
|
*
|
|
|
|
* In milliseconds.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* minimum interval non-forced stats flushes.*/
|
|
|
|
#define PGSTAT_MIN_INTERVAL 1000
|
|
|
|
/* how long until to block flushing pending stats updates */
|
|
|
|
#define PGSTAT_MAX_INTERVAL 60000
|
|
|
|
/* when to call pgstat_report_stat() again, even when idle */
|
|
|
|
#define PGSTAT_IDLE_INTERVAL 10000
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* Initial size hints for the hash tables used in statistics.
|
2004-06-26 18:32:04 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2022-03-22 00:16:42 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
#define PGSTAT_SNAPSHOT_HASH_SIZE 512
|
2008-11-03 02:17:08 +01:00
|
|
|
|
2004-06-26 18:32:04 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* hash table for statistics snapshots entry */
|
|
|
|
typedef struct PgStat_SnapshotEntry
|
|
|
|
{
|
|
|
|
PgStat_HashKey key;
|
|
|
|
char status; /* for simplehash use */
|
|
|
|
void *data; /* the stats data itself */
|
|
|
|
} PgStat_SnapshotEntry;
|
2011-09-17 00:25:27 +02:00
|
|
|
|
2004-06-26 18:32:04 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* ----------
|
|
|
|
* Backend-local Hash Table Definitions
|
|
|
|
* ----------
|
|
|
|
*/
|
2008-11-03 02:17:08 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* for stats snapshot entries */
|
|
|
|
#define SH_PREFIX pgstat_snapshot
|
|
|
|
#define SH_ELEMENT_TYPE PgStat_SnapshotEntry
|
|
|
|
#define SH_KEY_TYPE PgStat_HashKey
|
|
|
|
#define SH_KEY key
|
|
|
|
#define SH_HASH_KEY(tb, key) \
|
|
|
|
pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
|
|
|
|
#define SH_EQUAL(tb, a, b) \
|
|
|
|
pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
|
|
|
|
#define SH_SCOPE static inline
|
|
|
|
#define SH_DEFINE
|
|
|
|
#define SH_DECLARE
|
|
|
|
#include "lib/simplehash.h"
|
Try to ensure that stats collector's receive buffer size is at least 100KB.
Since commit 4e37b3e15, buildfarm member frogmouth has been failing
occasionally with symptoms indicating that some expected stats data is
getting dropped. The reason that that commit changed the behavior seems
probably to be that more data is getting shoved at the collector in a short
span of time. In current sources, the stats test's first session sends
about 9KB of data while exiting, which is probably the same as what was
sent just before wait_for_stats() in the previous test design. But now,
the test's second session is starting up concurrently, and it sends another
2KB (presumably reflecting its initial catalog accesses). Since frogmouth
is running on Windows XP, which reputedly has a default socket receive
buffer size of only 8KB, it is not very surprising if this has put us over
the threshold where the receive buffer can overflow and drop messages.
The same mechanism could very easily explain the intermittent stats test
failures we've been seeing for years, since background processes such
as the bgwriter will sometimes send data concurrently with all this, and
could thus cause occasional buffer overflows.
Hence, insert some code into pgstat_init() to increase the stats socket's
receive buffer size to 100KB if it's less than that. (On failure, emit a
LOG message, but keep going.) Modern systems seem to have default sizes
in the range of 100KB-250KB, but older platforms don't. I couldn't find
any platforms that wouldn't accept 100KB, so in theory this won't cause
any portability problems.
If this is successful at reducing the buildfarm failure rate in HEAD,
we should back-patch it, because it's certain that similar buffer overflows
happen in the field on platforms with small buffer sizes. Going forward,
there might be an argument for trying to increase the buffer size even
more, but let's take a baby step first.
Discussion: https://postgr.es/m/22173.1494788088@sss.pgh.pa.us
2017-05-16 21:24:52 +02:00
|
|
|
|
2004-06-26 18:32:04 +02:00
|
|
|
|
2022-03-22 00:16:42 +01:00
|
|
|
/* ----------
|
|
|
|
* Local function forward declarations
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
static void pgstat_write_statsfile(void);
|
|
|
|
static void pgstat_read_statsfile(void);
|
|
|
|
|
|
|
|
static void pgstat_reset_after_failure(TimestampTz ts);
|
|
|
|
|
|
|
|
static bool pgstat_flush_pending_entries(bool nowait);
|
2022-03-22 00:16:42 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
static void pgstat_prep_snapshot(void);
|
|
|
|
static void pgstat_build_snapshot(void);
|
|
|
|
static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
|
|
|
|
|
|
|
|
static inline bool pgstat_is_kind_valid(int ikind);
|
2022-03-22 00:16:42 +01:00
|
|
|
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/* ----------
|
2001-08-04 02:14:43 +02:00
|
|
|
* GUC parameters
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2022-03-22 00:16:42 +01:00
|
|
|
|
2007-09-24 05:12:23 +02:00
|
|
|
bool pgstat_track_counts = false;
|
2022-04-07 06:29:46 +02:00
|
|
|
int pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-03-22 00:16:42 +01:00
|
|
|
|
2022-03-21 20:02:25 +01:00
|
|
|
/* ----------
|
|
|
|
* state shared with pgstat_*.c
|
|
|
|
* ----------
|
Improve management of SLRU statistics collection.
Instead of re-identifying which statistics bucket to use for a given
SLRU on every counter increment, do it once during shmem initialization.
This saves a fair number of cycles, and there's no real cost because
we could not have a bucket assignment that varies over time or across
backends anyway.
Also, get rid of the ill-considered decision to let pgstat.c pry
directly into SLRU's shared state; it's cleaner just to have slru.c
pass the stats bucket number.
In consequence of these changes, there's no longer any need to store
an SLRU's LWLock tranche info in shared memory, so get rid of that,
making this a net reduction in shmem consumption. (That partly
reverts fe702a7b3.)
This is basically code review for 28cac71bd, so I also cleaned up
some comments, removed a dangling extern declaration, fixed some
things that should be static and/or const, etc.
Discussion: https://postgr.es/m/3618.1589313035@sss.pgh.pa.us
2020-05-13 19:08:12 +02:00
|
|
|
*/
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
PgStat_LocalState pgStatLocal;
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
|
2022-03-22 00:16:42 +01:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/* ----------
|
|
|
|
* Local data
|
2022-04-07 06:29:46 +02:00
|
|
|
*
|
|
|
|
* NB: There should be only variables related to stats infrastructure here,
|
|
|
|
* not for specific kinds of stats.
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2006-06-19 03:51:22 +02:00
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Memory contexts containing the pgStatEntryRefHash table, the
|
|
|
|
* pgStatSharedRef entries, and pending data respectively. Mostly to make it
|
|
|
|
* easier to track / attribute memory usage.
|
2007-05-27 05:50:39 +02:00
|
|
|
*/
|
2017-03-27 04:02:22 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
static MemoryContext pgStatPendingContext = NULL;
|
2007-03-30 20:34:56 +02:00
|
|
|
|
Avoid useless closely-spaced writes of statistics files.
The original intent in the stats collector was that we should not write out
stats data oftener than every PGSTAT_STAT_INTERVAL msec. Backends will not
make requests at all if they see the existing data is newer than that, and
the stats collector is supposed to disregard requests having a cutoff_time
older than its most recently written data, so that close-together requests
don't result in multiple writes. But the latter part of that got broken
in commit 187492b6c2e8cafc, so that if two backends concurrently decide
the existing stats are too old, the collector would write the data twice.
(In principle the collector's logic would still merge requests as long as
the second one arrives before we've actually written data ... but since
the message collection loop would write data immediately after processing
a single inquiry message, that never happened in practice, and in any case
the window in which it might work would be much shorter than
PGSTAT_STAT_INTERVAL.)
To fix, improve pgstat_recv_inquiry so that it checks whether the cutoff
time is too old, and doesn't add a request to the queue if so. This means
that we do not need DBWriteRequest.request_time, because the decision is
taken before making a queue entry. And that means that we don't really
need the DBWriteRequest data structure at all; an OID list of database
OIDs will serve and allow removal of some rather verbose and crufty code.
In passing, improve the comments in this area, which have been rather
neglected. Also change backend_read_statsfile so that it's not silently
relying on MyDatabaseId to have some particular value in the autovacuum
launcher process. It accidentally worked as desired because MyDatabaseId
is zero in that process; but that does not seem like a dependency we want,
especially with no documentation about it.
Although this patch is mine, it turns out I'd rediscovered a known bug,
for which Tomas Vondra had already submitted a patch that's functionally
equivalent to the non-cosmetic aspects of this patch. Thanks to Tomas
for reviewing this version.
Back-patch to 9.3 where the bug was introduced.
Prior-Discussion: <1718942738eb65c8407fcd864883f4c8@fuzzy.cz>
Patch: <4625.1464202586@sss.pgh.pa.us>
2016-05-31 21:54:46 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Backend local list of PgStat_EntryRef with unflushed pending stats.
|
|
|
|
*
|
|
|
|
* Newly pending entries should only ever be added to the end of the list,
|
|
|
|
* otherwise pgstat_flush_pending_entries() might not see them immediately.
|
Avoid useless closely-spaced writes of statistics files.
The original intent in the stats collector was that we should not write out
stats data oftener than every PGSTAT_STAT_INTERVAL msec. Backends will not
make requests at all if they see the existing data is newer than that, and
the stats collector is supposed to disregard requests having a cutoff_time
older than its most recently written data, so that close-together requests
don't result in multiple writes. But the latter part of that got broken
in commit 187492b6c2e8cafc, so that if two backends concurrently decide
the existing stats are too old, the collector would write the data twice.
(In principle the collector's logic would still merge requests as long as
the second one arrives before we've actually written data ... but since
the message collection loop would write data immediately after processing
a single inquiry message, that never happened in practice, and in any case
the window in which it might work would be much shorter than
PGSTAT_STAT_INTERVAL.)
To fix, improve pgstat_recv_inquiry so that it checks whether the cutoff
time is too old, and doesn't add a request to the queue if so. This means
that we do not need DBWriteRequest.request_time, because the decision is
taken before making a queue entry. And that means that we don't really
need the DBWriteRequest data structure at all; an OID list of database
OIDs will serve and allow removal of some rather verbose and crufty code.
In passing, improve the comments in this area, which have been rather
neglected. Also change backend_read_statsfile so that it's not silently
relying on MyDatabaseId to have some particular value in the autovacuum
launcher process. It accidentally worked as desired because MyDatabaseId
is zero in that process; but that does not seem like a dependency we want,
especially with no documentation about it.
Although this patch is mine, it turns out I'd rediscovered a known bug,
for which Tomas Vondra had already submitted a patch that's functionally
equivalent to the non-cosmetic aspects of this patch. Thanks to Tomas
for reviewing this version.
Back-patch to 9.3 where the bug was introduced.
Prior-Discussion: <1718942738eb65c8407fcd864883f4c8@fuzzy.cz>
Patch: <4625.1464202586@sss.pgh.pa.us>
2016-05-31 21:54:46 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
|
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
|
2021-08-07 03:54:39 +02:00
|
|
|
/*
|
|
|
|
* For assertions that check pgstat is not used before initialization / after
|
|
|
|
* shutdown.
|
|
|
|
*/
|
|
|
|
#ifdef USE_ASSERT_CHECKING
|
|
|
|
static bool pgstat_is_initialized = false;
|
|
|
|
static bool pgstat_is_shutdown = false;
|
|
|
|
#endif
|
|
|
|
|
2006-01-04 22:06:32 +01:00
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* The different kinds of statistics.
|
|
|
|
*
|
|
|
|
* If reasonably possible, handling specific to one kind of stats should go
|
|
|
|
* through this abstraction, rather than making more of pgstat.c aware.
|
|
|
|
*
|
|
|
|
* See comments for struct PgStat_KindInfo for details about the individual
|
|
|
|
* fields.
|
|
|
|
*
|
|
|
|
* XXX: It'd be nicer to define this outside of this file. But there doesn't
|
|
|
|
* seem to be a great way of doing that, given the split across multiple
|
|
|
|
* files.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = {
|
2006-10-04 02:30:14 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* stats kinds for variable-numbered objects */
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
[PGSTAT_KIND_DATABASE] = {
|
|
|
|
.name = "database",
|
2014-01-03 03:45:47 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.fixed_amount = false,
|
|
|
|
/* so pg_stat_database entries can be seen in all databases */
|
|
|
|
.accessed_across_databases = true,
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.shared_size = sizeof(PgStatShared_Database),
|
|
|
|
.shared_data_off = offsetof(PgStatShared_Database, stats),
|
|
|
|
.shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
|
|
|
|
.pending_size = sizeof(PgStat_StatDBEntry),
|
2004-08-29 07:07:03 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.flush_pending_cb = pgstat_database_flush_cb,
|
|
|
|
.reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
|
|
|
|
},
|
2006-10-04 02:30:14 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
[PGSTAT_KIND_RELATION] = {
|
|
|
|
.name = "relation",
|
2003-11-15 18:24:07 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.fixed_amount = false,
|
2003-11-15 18:24:07 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.shared_size = sizeof(PgStatShared_Relation),
|
|
|
|
.shared_data_off = offsetof(PgStatShared_Relation, stats),
|
|
|
|
.shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
|
|
|
|
.pending_size = sizeof(PgStat_TableStatus),
|
2003-11-15 18:24:07 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.flush_pending_cb = pgstat_relation_flush_cb,
|
|
|
|
.delete_pending_cb = pgstat_relation_delete_pending_cb,
|
|
|
|
},
|
2003-06-12 09:36:51 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
[PGSTAT_KIND_FUNCTION] = {
|
|
|
|
.name = "function",
|
2006-07-16 20:17:14 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.fixed_amount = false,
|
2004-03-23 00:55:29 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.shared_size = sizeof(PgStatShared_Function),
|
|
|
|
.shared_data_off = offsetof(PgStatShared_Function, stats),
|
|
|
|
.shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
|
|
|
|
.pending_size = sizeof(PgStat_BackendFunctionEntry),
|
2004-03-23 00:55:29 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.flush_pending_cb = pgstat_function_flush_cb,
|
|
|
|
},
|
2004-03-23 00:55:29 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
[PGSTAT_KIND_REPLSLOT] = {
|
|
|
|
.name = "replslot",
|
2004-03-23 00:55:29 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.fixed_amount = false,
|
2004-03-23 00:55:29 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.accessed_across_databases = true,
|
|
|
|
.named_on_disk = true,
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.shared_size = sizeof(PgStatShared_ReplSlot),
|
|
|
|
.shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
|
|
|
|
.shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
|
2001-08-05 04:06:50 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
|
|
|
|
.to_serialized_name = pgstat_replslot_to_serialized_name_cb,
|
|
|
|
.from_serialized_name = pgstat_replslot_from_serialized_name_cb,
|
|
|
|
},
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
[PGSTAT_KIND_SUBSCRIPTION] = {
|
|
|
|
.name = "subscription",
|
Try to ensure that stats collector's receive buffer size is at least 100KB.
Since commit 4e37b3e15, buildfarm member frogmouth has been failing
occasionally with symptoms indicating that some expected stats data is
getting dropped. The reason that that commit changed the behavior seems
probably to be that more data is getting shoved at the collector in a short
span of time. In current sources, the stats test's first session sends
about 9KB of data while exiting, which is probably the same as what was
sent just before wait_for_stats() in the previous test design. But now,
the test's second session is starting up concurrently, and it sends another
2KB (presumably reflecting its initial catalog accesses). Since frogmouth
is running on Windows XP, which reputedly has a default socket receive
buffer size of only 8KB, it is not very surprising if this has put us over
the threshold where the receive buffer can overflow and drop messages.
The same mechanism could very easily explain the intermittent stats test
failures we've been seeing for years, since background processes such
as the bgwriter will sometimes send data concurrently with all this, and
could thus cause occasional buffer overflows.
Hence, insert some code into pgstat_init() to increase the stats socket's
receive buffer size to 100KB if it's less than that. (On failure, emit a
LOG message, but keep going.) Modern systems seem to have default sizes
in the range of 100KB-250KB, but older platforms don't. I couldn't find
any platforms that wouldn't accept 100KB, so in theory this won't cause
any portability problems.
If this is successful at reducing the buildfarm failure rate in HEAD,
we should back-patch it, because it's certain that similar buffer overflows
happen in the field on platforms with small buffer sizes. Going forward,
there might be an argument for trying to increase the buffer size even
more, but let's take a baby step first.
Discussion: https://postgr.es/m/22173.1494788088@sss.pgh.pa.us
2017-05-16 21:24:52 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.fixed_amount = false,
|
|
|
|
/* so pg_stat_subscription_stats entries can be seen in all databases */
|
|
|
|
.accessed_across_databases = true,
|
Try to ensure that stats collector's receive buffer size is at least 100KB.
Since commit 4e37b3e15, buildfarm member frogmouth has been failing
occasionally with symptoms indicating that some expected stats data is
getting dropped. The reason that that commit changed the behavior seems
probably to be that more data is getting shoved at the collector in a short
span of time. In current sources, the stats test's first session sends
about 9KB of data while exiting, which is probably the same as what was
sent just before wait_for_stats() in the previous test design. But now,
the test's second session is starting up concurrently, and it sends another
2KB (presumably reflecting its initial catalog accesses). Since frogmouth
is running on Windows XP, which reputedly has a default socket receive
buffer size of only 8KB, it is not very surprising if this has put us over
the threshold where the receive buffer can overflow and drop messages.
The same mechanism could very easily explain the intermittent stats test
failures we've been seeing for years, since background processes such
as the bgwriter will sometimes send data concurrently with all this, and
could thus cause occasional buffer overflows.
Hence, insert some code into pgstat_init() to increase the stats socket's
receive buffer size to 100KB if it's less than that. (On failure, emit a
LOG message, but keep going.) Modern systems seem to have default sizes
in the range of 100KB-250KB, but older platforms don't. I couldn't find
any platforms that wouldn't accept 100KB, so in theory this won't cause
any portability problems.
If this is successful at reducing the buildfarm failure rate in HEAD,
we should back-patch it, because it's certain that similar buffer overflows
happen in the field on platforms with small buffer sizes. Going forward,
there might be an argument for trying to increase the buffer size even
more, but let's take a baby step first.
Discussion: https://postgr.es/m/22173.1494788088@sss.pgh.pa.us
2017-05-16 21:24:52 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.shared_size = sizeof(PgStatShared_Subscription),
|
|
|
|
.shared_data_off = offsetof(PgStatShared_Subscription, stats),
|
|
|
|
.shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
|
|
|
|
.pending_size = sizeof(PgStat_BackendSubEntry),
|
Try to ensure that stats collector's receive buffer size is at least 100KB.
Since commit 4e37b3e15, buildfarm member frogmouth has been failing
occasionally with symptoms indicating that some expected stats data is
getting dropped. The reason that that commit changed the behavior seems
probably to be that more data is getting shoved at the collector in a short
span of time. In current sources, the stats test's first session sends
about 9KB of data while exiting, which is probably the same as what was
sent just before wait_for_stats() in the previous test design. But now,
the test's second session is starting up concurrently, and it sends another
2KB (presumably reflecting its initial catalog accesses). Since frogmouth
is running on Windows XP, which reputedly has a default socket receive
buffer size of only 8KB, it is not very surprising if this has put us over
the threshold where the receive buffer can overflow and drop messages.
The same mechanism could very easily explain the intermittent stats test
failures we've been seeing for years, since background processes such
as the bgwriter will sometimes send data concurrently with all this, and
could thus cause occasional buffer overflows.
Hence, insert some code into pgstat_init() to increase the stats socket's
receive buffer size to 100KB if it's less than that. (On failure, emit a
LOG message, but keep going.) Modern systems seem to have default sizes
in the range of 100KB-250KB, but older platforms don't. I couldn't find
any platforms that wouldn't accept 100KB, so in theory this won't cause
any portability problems.
If this is successful at reducing the buildfarm failure rate in HEAD,
we should back-patch it, because it's certain that similar buffer overflows
happen in the field on platforms with small buffer sizes. Going forward,
there might be an argument for trying to increase the buffer size even
more, but let's take a baby step first.
Discussion: https://postgr.es/m/22173.1494788088@sss.pgh.pa.us
2017-05-16 21:24:52 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.flush_pending_cb = pgstat_subscription_flush_cb,
|
|
|
|
.reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
|
|
|
|
},
|
2003-11-15 18:24:07 +01:00
|
|
|
|
Account explicitly for long-lived FDs that are allocated outside fd.c.
The comments in fd.c have long claimed that all file allocations should
go through that module, but in reality that's not always practical.
fd.c doesn't supply APIs for invoking some FD-producing syscalls like
pipe() or epoll_create(); and the APIs it does supply for non-virtual
FDs are mostly insistent on releasing those FDs at transaction end;
and in some cases the actual open() call is in code that can't be made
to use fd.c, such as libpq.
This has led to a situation where, in a modern server, there are likely
to be seven or so long-lived FDs per backend process that are not known
to fd.c. Since NUM_RESERVED_FDS is only 10, that meant we had *very*
few spare FDs if max_files_per_process is >= the system ulimit and
fd.c had opened all the files it thought it safely could. The
contrib/postgres_fdw regression test, in particular, could easily be
made to fall over by running it under a restrictive ulimit.
To improve matters, invent functions Acquire/Reserve/ReleaseExternalFD
that allow outside callers to tell fd.c that they have or want to allocate
a FD that's not directly managed by fd.c. Add calls to track all the
fixed FDs in a standard backend session, so that we are honestly
guaranteeing that NUM_RESERVED_FDS FDs remain unused below the EMFILE
limit in a backend's idle state. The coding rules for these functions say
that there's no need to call them in code that just allocates one FD over
a fairly short interval; we can dip into NUM_RESERVED_FDS for such cases.
That means that there aren't all that many places where we need to worry.
But postgres_fdw and dblink must use this facility to account for
long-lived FDs consumed by libpq connections. There may be other places
where it's worth doing such accounting, too, but this seems like enough
to solve the immediate problem.
Internally to fd.c, "external" FDs are limited to max_safe_fds/3 FDs.
(Callers can choose to ignore this limit, but of course it's unwise
to do so except for fixed file allocations.) I also reduced the limit
on "allocated" files to max_safe_fds/3 FDs (it had been max_safe_fds/2).
Conceivably a smarter rule could be used here --- but in practice,
on reasonable systems, max_safe_fds should be large enough that this
isn't much of an issue, so KISS for now. To avoid possible regression
in the number of external or allocated files that can be opened,
increase FD_MINFREE and the lower limit on max_files_per_process a
little bit; we now insist that the effective "ulimit -n" be at least 64.
This seems like pretty clearly a bug fix, but in view of the lack of
field complaints, I'll refrain from risking a back-patch.
Discussion: https://postgr.es/m/E1izCmM-0005pV-Co@gemulon.postgresql.org
2020-02-24 23:28:33 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* stats for fixed-numbered (mostly 1) objects */
|
2002-04-03 02:27:25 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
[PGSTAT_KIND_ARCHIVER] = {
|
|
|
|
.name = "archiver",
|
2004-12-20 20:17:56 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.fixed_amount = true,
|
2003-06-12 09:36:51 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.reset_all_cb = pgstat_archiver_reset_all_cb,
|
|
|
|
.snapshot_cb = pgstat_archiver_snapshot_cb,
|
|
|
|
},
|
2002-04-03 02:27:25 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
[PGSTAT_KIND_BGWRITER] = {
|
|
|
|
.name = "bgwriter",
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.fixed_amount = true,
|
2013-02-18 21:56:08 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.reset_all_cb = pgstat_bgwriter_reset_all_cb,
|
|
|
|
.snapshot_cb = pgstat_bgwriter_snapshot_cb,
|
|
|
|
},
|
2013-02-18 21:56:08 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
[PGSTAT_KIND_CHECKPOINTER] = {
|
|
|
|
.name = "checkpointer",
|
2013-08-19 23:48:17 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
.fixed_amount = true,
|
|
|
|
|
|
|
|
.reset_all_cb = pgstat_checkpointer_reset_all_cb,
|
|
|
|
.snapshot_cb = pgstat_checkpointer_snapshot_cb,
|
|
|
|
},
|
|
|
|
|
|
|
|
[PGSTAT_KIND_SLRU] = {
|
|
|
|
.name = "slru",
|
|
|
|
|
|
|
|
.fixed_amount = true,
|
|
|
|
|
|
|
|
.reset_all_cb = pgstat_slru_reset_all_cb,
|
|
|
|
.snapshot_cb = pgstat_slru_snapshot_cb,
|
|
|
|
},
|
|
|
|
|
|
|
|
[PGSTAT_KIND_WAL] = {
|
|
|
|
.name = "wal",
|
|
|
|
|
|
|
|
.fixed_amount = true,
|
|
|
|
|
|
|
|
.reset_all_cb = pgstat_wal_reset_all_cb,
|
|
|
|
.snapshot_cb = pgstat_wal_snapshot_cb,
|
|
|
|
},
|
|
|
|
};
|
2013-02-18 21:56:08 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
|
|
|
|
/* ------------------------------------------------------------
|
|
|
|
* Functions managing the state of the stats system for all backends.
|
|
|
|
* ------------------------------------------------------------
|
|
|
|
*/
|
2013-02-18 21:56:08 +01:00
|
|
|
|
2005-08-11 23:11:50 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Read on-disk stats into memory at server start.
|
|
|
|
*
|
|
|
|
* Should only be called by the startup process or in single user mode.
|
2005-08-11 23:11:50 +02:00
|
|
|
*/
|
|
|
|
void
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_restore_stats(void)
|
2005-08-11 23:11:50 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_read_statsfile();
|
2005-08-11 23:11:50 +02:00
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2004-05-28 07:13:32 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Remove the stats file. This is currently used only if WAL recovery is
|
|
|
|
* needed after a crash.
|
|
|
|
*
|
|
|
|
* Should only be called by the startup process or in single user mode.
|
2003-12-25 04:52:51 +01:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
void
|
|
|
|
pgstat_discard_stats(void)
|
2003-12-25 04:52:51 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
int ret;
|
2004-05-28 07:13:32 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* NB: this needs to be done even in single user mode */
|
2003-12-25 04:52:51 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
|
|
|
|
if (ret != 0)
|
|
|
|
{
|
|
|
|
if (errno == ENOENT)
|
|
|
|
elog(DEBUG2,
|
|
|
|
"didn't need to unlink permanent stats file \"%s\" - didn't exist",
|
|
|
|
PGSTAT_STAT_PERMANENT_FILENAME);
|
|
|
|
else
|
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not unlink permanent statistics file \"%s\": %m",
|
|
|
|
PGSTAT_STAT_PERMANENT_FILENAME)));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ereport(DEBUG2,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("unlinked permanent statistics file \"%s\"",
|
|
|
|
PGSTAT_STAT_PERMANENT_FILENAME)));
|
|
|
|
}
|
2003-12-25 04:52:51 +01:00
|
|
|
}
|
|
|
|
|
2007-09-24 05:12:23 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* pgstat_before_server_shutdown() needs to be called by exactly one process
|
|
|
|
* during regular server shutdowns. Otherwise all stats will be lost.
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2022-04-07 06:29:46 +02:00
|
|
|
* We currently only write out stats for proc_exit(0). We might want to change
|
|
|
|
* that at some point... But right now pgstat_discard_stats() would be called
|
|
|
|
* during the start after a disorderly shutdown, anyway.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
void
|
|
|
|
pgstat_before_server_shutdown(int code, Datum arg)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
Assert(pgStatLocal.shmem != NULL);
|
|
|
|
Assert(!pgStatLocal.shmem->is_shutdown);
|
2003-04-26 04:57:14 +02:00
|
|
|
|
2001-07-05 17:19:40 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Stats should only be reported after pgstat_initialize() and before
|
|
|
|
* pgstat_shutdown(). This is a convenient point to catch most violations
|
|
|
|
* of this rule.
|
2001-07-05 17:19:40 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
Assert(pgstat_is_initialized && !pgstat_is_shutdown);
|
2001-07-05 17:19:40 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* flush out our own pending changes before writing out */
|
|
|
|
pgstat_report_stat(true);
|
2003-04-26 04:57:14 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Only write out file during normal shutdown. Don't even signal that
|
|
|
|
* we've shutdown during irregular shutdowns, because the shutdown
|
|
|
|
* sequence isn't coordinated to ensure this backend shuts down last.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
if (code == 0)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
pgStatLocal.shmem->is_shutdown = true;
|
|
|
|
pgstat_write_statsfile();
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
2007-03-22 20:53:31 +01:00
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-03-22 00:16:42 +01:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/* ------------------------------------------------------------
|
2022-03-22 00:16:42 +01:00
|
|
|
* Backend initialization / shutdown functions
|
|
|
|
* ------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Shut down a single backend's statistics reporting at process exit.
|
|
|
|
*
|
2022-04-06 22:56:06 +02:00
|
|
|
* Flush out any remaining statistics counts. Without this, operations
|
|
|
|
* triggered during backend exit (such as temp table deletions) won't be
|
|
|
|
* counted.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2022-03-22 00:16:42 +01:00
|
|
|
static void
|
|
|
|
pgstat_shutdown_hook(int code, Datum arg)
|
|
|
|
{
|
|
|
|
Assert(!pgstat_is_shutdown);
|
2022-04-07 06:29:46 +02:00
|
|
|
Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-03-22 00:16:42 +01:00
|
|
|
/*
|
2022-04-06 22:56:06 +02:00
|
|
|
* If we got as far as discovering our own database ID, we can flush out
|
|
|
|
* what we did so far. Otherwise, we'd be reporting an invalid database
|
|
|
|
* ID, so forget it. (This means that accesses to pg_database during
|
|
|
|
* failed backend starts might never get counted.)
|
2022-03-22 00:16:42 +01:00
|
|
|
*/
|
|
|
|
if (OidIsValid(MyDatabaseId))
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_report_disconnect(MyDatabaseId);
|
|
|
|
|
|
|
|
pgstat_report_stat(true);
|
|
|
|
|
|
|
|
/* there shouldn't be any pending changes left */
|
|
|
|
Assert(dlist_is_empty(&pgStatPending));
|
|
|
|
dlist_init(&pgStatPending);
|
|
|
|
|
|
|
|
pgstat_detach_shmem();
|
2022-03-22 00:16:42 +01:00
|
|
|
|
|
|
|
#ifdef USE_ASSERT_CHECKING
|
|
|
|
pgstat_is_shutdown = true;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
|
|
|
* Initialize pgstats state, and set up our on-proc-exit hook. Called from
|
|
|
|
* BaseInit().
|
2022-03-22 00:16:42 +01:00
|
|
|
*
|
2022-04-04 21:14:34 +02:00
|
|
|
* NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
|
2022-03-22 00:16:42 +01:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_initialize(void)
|
|
|
|
{
|
|
|
|
Assert(!pgstat_is_initialized);
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_attach_shmem();
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_init_wal();
|
2022-03-22 00:16:42 +01:00
|
|
|
|
|
|
|
/* Set up a process-exit hook to clean up */
|
|
|
|
before_shmem_exit(pgstat_shutdown_hook, 0);
|
|
|
|
|
|
|
|
#ifdef USE_ASSERT_CHECKING
|
|
|
|
pgstat_is_initialized = true;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ------------------------------------------------------------
|
|
|
|
* Public functions used by backends follow
|
|
|
|
* ------------------------------------------------------------
|
|
|
|
*/
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
|
|
|
* Must be called by processes that performs DML: tcop/postgres.c, logical
|
2022-04-07 06:29:46 +02:00
|
|
|
* receiver processes, SPI worker, etc. to flush pending statistics updates to
|
|
|
|
* shared memory.
|
|
|
|
*
|
|
|
|
* Unless called with 'force', pending stats updates are flushed happen once
|
|
|
|
* per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
|
|
|
|
* block on lock acquisition, except if stats updates have been pending for
|
|
|
|
* longer than PGSTAT_MAX_INTERVAL (60000ms).
|
|
|
|
*
|
|
|
|
* Whenever pending stats updates remain at the end of pgstat_report_stat() a
|
|
|
|
* suggested idle timeout is returned. Currently this is always
|
|
|
|
* PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
|
|
|
|
* a timeout after which to call pgstat_report_stat(true), but are not
|
|
|
|
* required to to do so.
|
2021-01-17 13:34:09 +01:00
|
|
|
*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Note that this is called only when not within a transaction, so it is fair
|
|
|
|
* to use transaction stop time as an approximation of current time.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
long
|
|
|
|
pgstat_report_stat(bool force)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
static TimestampTz pending_since = 0;
|
|
|
|
static TimestampTz last_flush = 0;
|
|
|
|
bool partial_flush;
|
2007-04-30 05:23:49 +02:00
|
|
|
TimestampTz now;
|
2022-04-07 06:29:46 +02:00
|
|
|
bool nowait;
|
2007-04-30 05:23:49 +02:00
|
|
|
|
2021-08-07 03:54:39 +02:00
|
|
|
pgstat_assert_is_up();
|
2022-04-07 06:29:46 +02:00
|
|
|
Assert(!IsTransactionBlock());
|
2021-08-07 03:54:39 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* Don't expend a clock check if nothing to do */
|
|
|
|
if (dlist_is_empty(&pgStatPending) &&
|
|
|
|
!have_slrustats &&
|
|
|
|
!pgstat_have_pending_wal())
|
|
|
|
{
|
|
|
|
Assert(pending_since == 0);
|
|
|
|
return 0;
|
|
|
|
}
|
2007-04-30 05:23:49 +02:00
|
|
|
|
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* There should never be stats to report once stats are shut down. Can't
|
|
|
|
* assert that before the checks above, as there is an unconditional
|
|
|
|
* pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
|
|
|
|
* the process that ran pgstat_before_server_shutdown() will still call.
|
2007-04-30 05:23:49 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
Assert(!pgStatLocal.shmem->is_shutdown);
|
|
|
|
|
2007-04-30 05:23:49 +02:00
|
|
|
now = GetCurrentTransactionStopTimestamp();
|
2021-01-17 13:34:09 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!force)
|
|
|
|
{
|
|
|
|
if (pending_since > 0 &&
|
|
|
|
TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
|
|
|
|
{
|
|
|
|
/* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
|
|
|
|
force = true;
|
|
|
|
}
|
|
|
|
else if (last_flush > 0 &&
|
|
|
|
!TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
|
|
|
|
{
|
|
|
|
/* don't flush too frequently */
|
|
|
|
if (pending_since == 0)
|
|
|
|
pending_since = now;
|
2007-04-30 05:23:49 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
return PGSTAT_IDLE_INTERVAL;
|
|
|
|
}
|
|
|
|
}
|
2021-09-16 11:02:40 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_update_dbstats(now);
|
2022-03-19 19:32:18 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* don't wait for lock acquisition when !force */
|
|
|
|
nowait = !force;
|
2022-03-19 19:32:18 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
partial_flush = false;
|
2022-03-19 19:32:18 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* flush database / relation / function / ... stats */
|
|
|
|
partial_flush |= pgstat_flush_pending_entries(nowait);
|
2022-03-19 19:32:18 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* flush wal stats */
|
|
|
|
partial_flush |= pgstat_flush_wal(nowait);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* flush SLRU stats */
|
|
|
|
partial_flush |= pgstat_slru_flush(nowait);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
last_flush = now;
|
2006-01-18 21:35:06 +01:00
|
|
|
|
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* If some of the pending stats could not be flushed due to lock
|
|
|
|
* contention, let the caller know when to retry.
|
2006-01-18 21:35:06 +01:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
if (partial_flush)
|
2006-01-18 21:35:06 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
/* force should have prevented us from getting here */
|
|
|
|
Assert(!force);
|
2006-01-18 21:35:06 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* remember since when stats have been pending */
|
|
|
|
if (pending_since == 0)
|
|
|
|
pending_since = now;
|
2021-04-27 05:39:11 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
return PGSTAT_IDLE_INTERVAL;
|
2021-04-27 05:39:11 +02:00
|
|
|
}
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
pending_since = 0;
|
2022-03-01 01:47:52 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
return 0;
|
|
|
|
}
|
2022-03-01 01:47:52 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/*
|
|
|
|
* Only for use by pgstat_reset_counters()
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
|
|
|
|
{
|
|
|
|
return entry->key.dboid == DatumGetObjectId(MyDatabaseId);
|
|
|
|
}
|
2022-03-01 01:47:52 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/*
|
|
|
|
* Reset counters for our database.
|
|
|
|
*
|
|
|
|
* Permission checking for this function is managed through the normal
|
|
|
|
* GRANT system.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_reset_counters(void)
|
|
|
|
{
|
|
|
|
TimestampTz ts = GetCurrentTimestamp();
|
2022-03-01 01:47:52 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_reset_matching_entries(match_db_entries,
|
|
|
|
ObjectIdGetDatum(MyDatabaseId),
|
|
|
|
ts);
|
|
|
|
}
|
2022-03-01 01:47:52 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/*
|
|
|
|
* Reset a single variable-numbered entry.
|
|
|
|
*
|
|
|
|
* If the stats kind is within a database, also reset the database's
|
|
|
|
* stat_reset_timestamp.
|
|
|
|
*
|
|
|
|
* Permission checking for this function is managed through the normal
|
|
|
|
* GRANT system.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_reset(PgStat_Kind kind, Oid dboid, Oid objoid)
|
|
|
|
{
|
|
|
|
const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
|
|
|
|
TimestampTz ts = GetCurrentTimestamp();
|
2006-01-18 21:35:06 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* not needed atm, and doesn't make sense with the current signature */
|
|
|
|
Assert(!pgstat_get_kind_info(kind)->fixed_amount);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* reset the "single counter" */
|
|
|
|
pgstat_reset_entry(kind, dboid, objoid, ts);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!kind_info->accessed_across_databases)
|
|
|
|
pgstat_reset_database_timestamp(dboid, ts);
|
|
|
|
}
|
2007-01-12 00:06:03 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/*
|
|
|
|
* Reset stats for all entries of a kind.
|
|
|
|
*
|
|
|
|
* Permission checking for this function is managed through the normal
|
|
|
|
* GRANT system.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_reset_of_kind(PgStat_Kind kind)
|
|
|
|
{
|
|
|
|
const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
|
|
|
|
TimestampTz ts = GetCurrentTimestamp();
|
2007-01-12 00:06:03 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (kind_info->fixed_amount)
|
|
|
|
kind_info->reset_all_cb(ts);
|
|
|
|
else
|
|
|
|
pgstat_reset_entries_of_kind(kind, ts);
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* ------------------------------------------------------------
|
|
|
|
* Fetching of stats
|
|
|
|
* ------------------------------------------------------------
|
|
|
|
*/
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/*
|
|
|
|
* Discard any data collected in the current transaction. Any subsequent
|
|
|
|
* request will cause new snapshots to be read.
|
|
|
|
*
|
|
|
|
* This is also invoked during transaction commit or abort to discard
|
|
|
|
* the no-longer-wanted snapshot.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_clear_snapshot(void)
|
|
|
|
{
|
|
|
|
pgstat_assert_is_up();
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
memset(&pgStatLocal.snapshot.fixed_valid, 0,
|
|
|
|
sizeof(pgStatLocal.snapshot.fixed_valid));
|
|
|
|
pgStatLocal.snapshot.stats = NULL;
|
|
|
|
pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* Release memory, if any was allocated */
|
|
|
|
if (pgStatLocal.snapshot.context)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
MemoryContextDelete(pgStatLocal.snapshot.context);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* Reset variables */
|
|
|
|
pgStatLocal.snapshot.context = NULL;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2008-05-15 02:17:41 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Historically the backend_status.c facilities lived in this file, and
|
|
|
|
* were reset with the same function. For now keep it that way, and
|
|
|
|
* forward the reset request.
|
2008-05-15 02:17:41 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_clear_backend_activity_snapshot();
|
|
|
|
}
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
void *
|
|
|
|
pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
|
|
|
|
{
|
|
|
|
PgStat_HashKey key;
|
|
|
|
PgStat_EntryRef *entry_ref;
|
|
|
|
void *stats_data;
|
|
|
|
const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* should be called from backends */
|
|
|
|
Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
|
|
|
|
AssertArg(!kind_info->fixed_amount);
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_prep_snapshot();
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
key.kind = kind;
|
|
|
|
key.dboid = dboid;
|
|
|
|
key.objoid = objoid;
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* if we need to build a full snapshot, do so */
|
|
|
|
if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
|
|
|
|
pgstat_build_snapshot();
|
2008-12-08 16:44:54 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* if caching is desired, look up in cache */
|
|
|
|
if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
|
|
|
|
{
|
|
|
|
PgStat_SnapshotEntry *entry = NULL;
|
2008-12-08 16:44:54 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
|
2008-12-08 16:44:54 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (entry)
|
|
|
|
return entry->data;
|
2008-05-15 02:17:41 +02:00
|
|
|
|
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* If we built a full snapshot and the key is not in
|
|
|
|
* pgStatLocal.snapshot.stats, there are no matching stats.
|
2008-05-15 02:17:41 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
|
|
|
|
return NULL;
|
|
|
|
}
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
|
2007-01-12 00:06:03 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (entry_ref == NULL || entry_ref->shared_entry->dropped)
|
2007-01-12 00:06:03 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
/* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
|
|
|
|
if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
|
|
|
|
{
|
|
|
|
PgStat_SnapshotEntry *entry = NULL;
|
|
|
|
bool found;
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
|
|
|
|
Assert(!found);
|
|
|
|
entry->data = NULL;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
2007-01-12 00:06:03 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/*
|
|
|
|
* Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
|
|
|
|
* otherwise we could quickly end up with a fair bit of memory used due to
|
|
|
|
* repeated accesses.
|
|
|
|
*/
|
|
|
|
if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
|
|
|
|
stats_data = palloc(kind_info->shared_data_len);
|
|
|
|
else
|
|
|
|
stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
|
|
|
|
kind_info->shared_data_len);
|
|
|
|
memcpy(stats_data,
|
|
|
|
pgstat_get_entry_data(kind, entry_ref->shared_stats),
|
|
|
|
kind_info->shared_data_len);
|
|
|
|
|
|
|
|
if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
|
|
|
|
{
|
|
|
|
PgStat_SnapshotEntry *entry = NULL;
|
|
|
|
bool found;
|
2007-01-12 00:06:03 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
|
|
|
|
entry->data = stats_data;
|
2007-01-12 00:06:03 +01:00
|
|
|
}
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
return stats_data;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* If a stats snapshot has been taken, return the timestamp at which that was
|
|
|
|
* done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
|
|
|
|
* false.
|
2006-01-18 21:35:06 +01:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
TimestampTz
|
|
|
|
pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
|
2006-01-18 21:35:06 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
|
|
|
|
{
|
|
|
|
*have_snapshot = true;
|
|
|
|
return pgStatLocal.snapshot.snapshot_timestamp;
|
|
|
|
}
|
2006-01-18 21:35:06 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
*have_snapshot = false;
|
2006-01-18 21:35:06 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
return 0;
|
2006-01-18 21:35:06 +01:00
|
|
|
}
|
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Ensure snapshot for fixed-numbered 'kind' exists.
|
2016-04-07 03:45:32 +02:00
|
|
|
*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Typically used by the pgstat_fetch_* functions for a kind of stats, before
|
|
|
|
* massaging the data into the desired format.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
void
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_snapshot_fixed(PgStat_Kind kind)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
AssertArg(pgstat_is_kind_valid(kind));
|
|
|
|
AssertArg(pgstat_get_kind_info(kind)->fixed_amount);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
|
|
|
|
pgstat_build_snapshot();
|
|
|
|
else
|
|
|
|
pgstat_build_snapshot_fixed(kind);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
Assert(pgStatLocal.snapshot.fixed_valid[kind]);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
static void
|
|
|
|
pgstat_prep_snapshot(void)
|
2010-01-19 15:11:32 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
|
|
|
|
pgStatLocal.snapshot.stats != NULL)
|
2010-01-19 15:11:32 +01:00
|
|
|
return;
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!pgStatLocal.snapshot.context)
|
|
|
|
pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
|
|
|
|
"PgStat Snapshot",
|
|
|
|
ALLOCSET_SMALL_SIZES);
|
2010-01-28 15:25:41 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
pgStatLocal.snapshot.stats =
|
|
|
|
pgstat_snapshot_create(pgStatLocal.snapshot.context,
|
|
|
|
PGSTAT_SNAPSHOT_HASH_SIZE,
|
|
|
|
NULL);
|
2010-01-28 15:25:41 +01:00
|
|
|
}
|
|
|
|
|
2022-03-21 20:02:25 +01:00
|
|
|
static void
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_build_snapshot(void)
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
dshash_seq_status hstat;
|
|
|
|
PgStatShared_HashEntry *p;
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* should only be called when we need a snapshot */
|
|
|
|
Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* snapshot already built */
|
|
|
|
if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
|
|
|
|
return;
|
2022-04-06 22:23:47 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_prep_snapshot();
|
2022-04-06 22:23:47 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
Assert(pgStatLocal.snapshot.stats->members == 0);
|
2022-04-06 22:23:47 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
|
2022-04-06 22:23:47 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Snapshot all variable stats.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
|
|
|
|
while ((p = dshash_seq_next(&hstat)) != NULL)
|
|
|
|
{
|
|
|
|
PgStat_Kind kind = p->key.kind;
|
|
|
|
const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
|
|
|
|
bool found;
|
|
|
|
PgStat_SnapshotEntry *entry;
|
|
|
|
PgStatShared_Common *stats_data;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if the stats object should be included in the snapshot.
|
|
|
|
* Unless the stats kind can be accessed from all databases (e.g.,
|
|
|
|
* database stats themselves), we only include stats for the current
|
|
|
|
* database or objects not associated with a database (e.g. shared
|
|
|
|
* relations).
|
|
|
|
*/
|
|
|
|
if (p->key.dboid != MyDatabaseId &&
|
|
|
|
p->key.dboid != InvalidOid &&
|
|
|
|
!kind_info->accessed_across_databases)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (p->dropped)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
Assert(pg_atomic_read_u32(&p->refcount) > 0);
|
|
|
|
|
|
|
|
stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
|
|
|
|
Assert(stats_data);
|
|
|
|
|
|
|
|
entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
|
|
|
|
Assert(!found);
|
|
|
|
|
|
|
|
entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
|
|
|
|
kind_info->shared_size);
|
|
|
|
memcpy(entry->data,
|
|
|
|
pgstat_get_entry_data(kind, stats_data),
|
|
|
|
kind_info->shared_size);
|
|
|
|
}
|
|
|
|
dshash_seq_term(&hstat);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-03-22 00:16:42 +01:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Build snapshot of all fixed-numbered stats.
|
2022-03-22 00:16:42 +01:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
|
|
|
|
{
|
|
|
|
const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
|
2022-03-22 00:16:42 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!kind_info->fixed_amount)
|
|
|
|
{
|
|
|
|
Assert(kind_info->snapshot_cb == NULL);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
pgstat_build_snapshot_fixed(kind);
|
|
|
|
}
|
2022-03-22 00:16:42 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
static void
|
|
|
|
pgstat_build_snapshot_fixed(PgStat_Kind kind)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
Assert(kind_info->fixed_amount);
|
|
|
|
Assert(kind_info->snapshot_cb != NULL);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
|
2005-07-29 21:30:09 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
/* rebuild every time */
|
|
|
|
pgStatLocal.snapshot.fixed_valid[kind] = false;
|
2005-07-29 21:30:09 +02:00
|
|
|
}
|
2022-04-07 06:29:46 +02:00
|
|
|
else if (pgStatLocal.snapshot.fixed_valid[kind])
|
2005-07-29 21:30:09 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
/* in snapshot mode we shouldn't get called again */
|
|
|
|
Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
|
|
|
|
return;
|
2005-07-29 21:30:09 +02:00
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
Assert(!pgStatLocal.snapshot.fixed_valid[kind]);
|
|
|
|
|
|
|
|
kind_info->snapshot_cb();
|
|
|
|
|
|
|
|
Assert(!pgStatLocal.snapshot.fixed_valid[kind]);
|
|
|
|
pgStatLocal.snapshot.fixed_valid[kind] = true;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* ------------------------------------------------------------
|
|
|
|
* Backend-local pending stats infrastructure
|
|
|
|
* ------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Returns the appropriate PgStat_EntryRef, preparing it to receive pending
|
|
|
|
* stats if not already done.
|
|
|
|
*
|
|
|
|
* If created_entry is non-NULL, it'll be set to true if the entry is newly
|
|
|
|
* created, false otherwise.
|
2008-05-15 02:17:41 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
PgStat_EntryRef *
|
|
|
|
pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid, bool *created_entry)
|
2008-05-15 02:17:41 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
PgStat_EntryRef *entry_ref;
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* need to be able to flush out */
|
|
|
|
Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (unlikely(!pgStatPendingContext))
|
2008-05-15 02:17:41 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
pgStatPendingContext =
|
|
|
|
AllocSetContextCreate(CacheMemoryContext,
|
|
|
|
"PgStat Pending",
|
|
|
|
ALLOCSET_SMALL_SIZES);
|
2008-05-15 02:17:41 +02:00
|
|
|
}
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
entry_ref = pgstat_get_entry_ref(kind, dboid, objoid,
|
|
|
|
true, created_entry);
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (entry_ref->pending == NULL)
|
|
|
|
{
|
|
|
|
size_t entrysize = pgstat_get_kind_info(kind)->pending_size;
|
2014-01-28 18:58:22 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
Assert(entrysize != (size_t) -1);
|
2014-01-28 18:58:22 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
|
|
|
|
dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
|
|
|
|
}
|
2021-08-05 04:16:04 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
return entry_ref;
|
2021-08-05 04:16:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Return an existing stats entry, or NULL.
|
|
|
|
*
|
|
|
|
* This should only be used for helper function for pgstatfuncs.c - outside of
|
|
|
|
* that it shouldn't be needed.
|
2021-08-05 04:16:04 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
PgStat_EntryRef *
|
|
|
|
pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
|
2021-08-05 04:16:04 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
PgStat_EntryRef *entry_ref;
|
2021-08-05 04:16:04 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
|
2014-01-28 18:58:22 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (entry_ref == NULL || entry_ref->pending == NULL)
|
|
|
|
return NULL;
|
2020-10-02 03:17:11 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
return entry_ref;
|
2020-10-02 03:17:11 +02:00
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
void
|
|
|
|
pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
PgStat_Kind kind = entry_ref->shared_entry->key.kind;
|
|
|
|
const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
|
|
|
|
void *pending_data = entry_ref->pending;
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
Assert(pending_data != NULL);
|
|
|
|
/* !fixed_amount stats should be handled explicitly */
|
|
|
|
Assert(!pgstat_get_kind_info(kind)->fixed_amount);
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (kind_info->delete_pending_cb)
|
|
|
|
kind_info->delete_pending_cb(entry_ref);
|
|
|
|
|
|
|
|
pfree(pending_data);
|
|
|
|
entry_ref->pending = NULL;
|
2020-10-08 05:39:08 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
dlist_delete(&entry_ref->pending_node);
|
2020-10-08 05:39:08 +02:00
|
|
|
}
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
|
2022-03-01 01:47:52 +01:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Flush out pending stats for database objects (databases, relations,
|
|
|
|
* functions).
|
2022-03-01 01:47:52 +01:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
static bool
|
|
|
|
pgstat_flush_pending_entries(bool nowait)
|
2022-03-01 01:47:52 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
bool have_pending = false;
|
|
|
|
dlist_node *cur = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Need to be a bit careful iterating over the list of pending entries.
|
|
|
|
* Processing a pending entry may queue further pending entries to the end
|
|
|
|
* of the list that we want to process, so a simple iteration won't do.
|
|
|
|
* Further complicating matters is that we want to delete the current
|
|
|
|
* entry in each iteration from the list if we flushed successfully.
|
|
|
|
*
|
|
|
|
* So we just keep track of the next pointer in each loop iteration.
|
|
|
|
*/
|
|
|
|
if (!dlist_is_empty(&pgStatPending))
|
|
|
|
cur = dlist_head_node(&pgStatPending);
|
|
|
|
|
|
|
|
while (cur)
|
|
|
|
{
|
|
|
|
PgStat_EntryRef *entry_ref =
|
|
|
|
dlist_container(PgStat_EntryRef, pending_node, cur);
|
|
|
|
PgStat_HashKey key = entry_ref->shared_entry->key;
|
|
|
|
PgStat_Kind kind = key.kind;
|
|
|
|
const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
|
|
|
|
bool did_flush;
|
|
|
|
dlist_node *next;
|
|
|
|
|
|
|
|
Assert(!kind_info->fixed_amount);
|
|
|
|
Assert(kind_info->flush_pending_cb != NULL);
|
|
|
|
|
|
|
|
/* flush the stats, if possible */
|
|
|
|
did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
|
|
|
|
|
|
|
|
Assert(did_flush || nowait);
|
2022-03-01 01:47:52 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* determine next entry, before deleting the pending entry */
|
|
|
|
if (dlist_has_next(&pgStatPending, cur))
|
|
|
|
next = dlist_next_node(&pgStatPending, cur);
|
|
|
|
else
|
|
|
|
next = NULL;
|
|
|
|
|
|
|
|
/* if successfully flushed, remove entry */
|
|
|
|
if (did_flush)
|
|
|
|
pgstat_delete_pending_entry(entry_ref);
|
|
|
|
else
|
|
|
|
have_pending = true;
|
|
|
|
|
|
|
|
cur = next;
|
|
|
|
}
|
|
|
|
|
|
|
|
Assert(dlist_is_empty(&pgStatPending) == !have_pending);
|
|
|
|
|
|
|
|
return have_pending;
|
2022-03-01 01:47:52 +01:00
|
|
|
}
|
|
|
|
|
2021-08-07 03:54:39 +02:00
|
|
|
|
2022-03-22 00:16:42 +01:00
|
|
|
/* ------------------------------------------------------------
|
|
|
|
* Helper / infrastructure functions
|
|
|
|
* ------------------------------------------------------------
|
|
|
|
*/
|
2006-06-19 03:51:22 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
PgStat_Kind
|
|
|
|
pgstat_get_kind_from_str(char *kind_str)
|
|
|
|
{
|
|
|
|
for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
|
|
|
|
{
|
|
|
|
if (pg_strcasecmp(kind_str, pgstat_kind_infos[kind].name) == 0)
|
|
|
|
return kind;
|
|
|
|
}
|
|
|
|
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("invalid statistics kind: \"%s\"", kind_str)));
|
|
|
|
return PGSTAT_KIND_DATABASE; /* avoid compiler warnings */
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
pgstat_is_kind_valid(int ikind)
|
2011-10-21 19:26:40 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
return ikind >= PGSTAT_KIND_FIRST_VALID && ikind <= PGSTAT_KIND_LAST;
|
|
|
|
}
|
|
|
|
|
|
|
|
const PgStat_KindInfo *
|
|
|
|
pgstat_get_kind_info(PgStat_Kind kind)
|
|
|
|
{
|
|
|
|
AssertArg(pgstat_is_kind_valid(kind));
|
|
|
|
|
|
|
|
return &pgstat_kind_infos[kind];
|
2011-10-21 19:26:40 +02:00
|
|
|
}
|
2008-03-21 22:08:31 +01:00
|
|
|
|
2022-03-22 00:16:42 +01:00
|
|
|
/*
|
|
|
|
* Stats should only be reported after pgstat_initialize() and before
|
|
|
|
* pgstat_shutdown(). This check is put in a few central places to catch
|
|
|
|
* violations of this rule more easily.
|
2006-06-19 03:51:22 +02:00
|
|
|
*/
|
2022-03-22 00:16:42 +01:00
|
|
|
#ifdef USE_ASSERT_CHECKING
|
|
|
|
void
|
|
|
|
pgstat_assert_is_up(void)
|
|
|
|
{
|
|
|
|
Assert(pgstat_is_initialized && !pgstat_is_shutdown);
|
|
|
|
}
|
|
|
|
#endif
|
2006-06-19 03:51:22 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* ------------------------------------------------------------
|
|
|
|
* reading and writing of on-disk stats file
|
|
|
|
* ------------------------------------------------------------
|
2022-03-22 00:16:42 +01:00
|
|
|
*/
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* helpers for pgstat_write_statsfile() */
|
2022-03-22 00:16:42 +01:00
|
|
|
static void
|
2022-04-07 06:29:46 +02:00
|
|
|
write_chunk(FILE *fpout, void *ptr, size_t len)
|
2022-03-22 00:16:42 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
int rc;
|
2022-03-22 00:16:42 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
rc = fwrite(ptr, len, 1, fpout);
|
2022-03-22 00:16:42 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* we'll check for errors with ferror once at the end */
|
|
|
|
(void) rc;
|
2022-03-22 00:16:42 +01:00
|
|
|
}
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
#define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
|
2022-03-22 00:16:42 +01:00
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* This function is called in the last process that is accessing the shared
|
|
|
|
* stats so locking is not required.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
static void
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_write_statsfile(void)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
|
|
|
FILE *fpout;
|
2005-07-14 07:13:45 +02:00
|
|
|
int32 format_id;
|
2022-04-07 06:29:46 +02:00
|
|
|
const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
|
|
|
|
const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
|
|
|
|
dshash_seq_status hstat;
|
|
|
|
PgStatShared_HashEntry *ps;
|
|
|
|
|
|
|
|
pgstat_assert_is_up();
|
|
|
|
|
|
|
|
/* we're shutting down, so it's ok to just override this */
|
|
|
|
pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2014-12-11 21:41:15 +01:00
|
|
|
elog(DEBUG2, "writing stats file \"%s\"", statfile);
|
2013-02-18 21:56:08 +01:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
|
|
|
* Open the statistics temp file to write out the current values.
|
|
|
|
*/
|
2008-11-03 02:17:08 +01:00
|
|
|
fpout = AllocateFile(tmpfile, PG_BINARY_W);
|
2001-06-22 21:18:36 +02:00
|
|
|
if (fpout == NULL)
|
|
|
|
{
|
2003-07-22 21:00:12 +02:00
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
2003-09-25 08:58:07 +02:00
|
|
|
errmsg("could not open temporary statistics file \"%s\": %m",
|
2008-08-05 14:09:30 +02:00
|
|
|
tmpfile)));
|
2001-06-22 21:18:36 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2005-07-14 07:13:45 +02:00
|
|
|
/*
|
|
|
|
* Write the file header --- currently just a format ID.
|
|
|
|
*/
|
|
|
|
format_id = PGSTAT_FILE_FORMAT_ID;
|
2022-04-07 06:29:46 +02:00
|
|
|
write_chunk_s(fpout, &format_id);
|
2005-07-14 07:13:45 +02:00
|
|
|
|
2007-03-30 20:34:56 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* XXX: The following could now be generalized to just iterate over
|
|
|
|
* pgstat_kind_infos instead of knowing about the different kinds of
|
|
|
|
* stats.
|
2007-03-30 20:34:56 +02:00
|
|
|
*/
|
|
|
|
|
2014-01-28 18:58:22 +01:00
|
|
|
/*
|
|
|
|
* Write archiver stats struct
|
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_build_snapshot_fixed(PGSTAT_KIND_ARCHIVER);
|
|
|
|
write_chunk_s(fpout, &pgStatLocal.snapshot.archiver);
|
2014-01-28 18:58:22 +01:00
|
|
|
|
2020-10-02 03:17:11 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Write bgwriter stats struct
|
2020-10-02 03:17:11 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_build_snapshot_fixed(PGSTAT_KIND_BGWRITER);
|
|
|
|
write_chunk_s(fpout, &pgStatLocal.snapshot.bgwriter);
|
2020-10-02 03:17:11 +02:00
|
|
|
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Write checkpointer stats struct
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_build_snapshot_fixed(PGSTAT_KIND_CHECKPOINTER);
|
|
|
|
write_chunk_s(fpout, &pgStatLocal.snapshot.checkpointer);
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Write SLRU stats struct
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_build_snapshot_fixed(PGSTAT_KIND_SLRU);
|
|
|
|
write_chunk_s(fpout, &pgStatLocal.snapshot.slru);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2020-10-08 05:39:08 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Write WAL stats struct
|
2020-10-08 05:39:08 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_build_snapshot_fixed(PGSTAT_KIND_WAL);
|
|
|
|
write_chunk_s(fpout, &pgStatLocal.snapshot.wal);
|
2020-10-08 05:39:08 +02:00
|
|
|
|
2022-03-01 01:47:52 +01:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Walk through the stats entries
|
2022-03-01 01:47:52 +01:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
|
|
|
|
while ((ps = dshash_seq_next(&hstat)) != NULL)
|
2022-03-01 01:47:52 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
PgStatShared_Common *shstats;
|
|
|
|
const PgStat_KindInfo *kind_info = NULL;
|
|
|
|
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
|
|
|
/* we may have some "dropped" entries not yet removed, skip them */
|
|
|
|
Assert(!ps->dropped);
|
|
|
|
if (ps->dropped)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
|
|
|
|
|
|
|
|
kind_info = pgstat_get_kind_info(ps->key.kind);
|
|
|
|
|
|
|
|
/* if not dropped the valid-entry refcount should exist */
|
|
|
|
Assert(pg_atomic_read_u32(&ps->refcount) > 0);
|
2022-03-01 01:47:52 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!kind_info->to_serialized_name)
|
2022-03-01 01:47:52 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
/* normal stats entry, identified by PgStat_HashKey */
|
2022-03-01 01:47:52 +01:00
|
|
|
fputc('S', fpout);
|
2022-04-07 06:29:46 +02:00
|
|
|
write_chunk_s(fpout, &ps->key);
|
2022-03-01 01:47:52 +01:00
|
|
|
}
|
2022-04-07 06:29:46 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/* stats entry identified by name on disk (e.g. slots) */
|
|
|
|
NameData name;
|
|
|
|
|
|
|
|
kind_info->to_serialized_name(shstats, &name);
|
|
|
|
|
|
|
|
fputc('N', fpout);
|
|
|
|
write_chunk_s(fpout, &ps->key.kind);
|
|
|
|
write_chunk_s(fpout, &name);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Write except the header part of the entry */
|
|
|
|
write_chunk(fpout,
|
|
|
|
pgstat_get_entry_data(ps->key.kind, shstats),
|
|
|
|
pgstat_get_entry_len(ps->key.kind));
|
2022-03-01 01:47:52 +01:00
|
|
|
}
|
2022-04-07 06:29:46 +02:00
|
|
|
dshash_seq_term(&hstat);
|
2022-03-01 01:47:52 +01:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
|
|
|
* No more output to be done. Close the temp file and replace the old
|
2006-01-18 21:35:06 +01:00
|
|
|
* pgstat.stat with it. The ferror() check replaces testing for error
|
2022-04-07 06:29:46 +02:00
|
|
|
* after each individual fputc or fwrite (in write_chunk()) above.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
fputc('E', fpout);
|
2006-01-18 21:35:06 +01:00
|
|
|
|
|
|
|
if (ferror(fpout))
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not write temporary statistics file \"%s\": %m",
|
2008-08-05 14:09:30 +02:00
|
|
|
tmpfile)));
|
2008-11-03 02:17:08 +01:00
|
|
|
FreeFile(fpout);
|
2008-08-05 14:09:30 +02:00
|
|
|
unlink(tmpfile);
|
2006-01-18 21:35:06 +01:00
|
|
|
}
|
2008-11-03 02:17:08 +01:00
|
|
|
else if (FreeFile(fpout) < 0)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2003-07-22 21:00:12 +02:00
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
2003-09-25 08:58:07 +02:00
|
|
|
errmsg("could not close temporary statistics file \"%s\": %m",
|
2008-08-05 14:09:30 +02:00
|
|
|
tmpfile)));
|
|
|
|
unlink(tmpfile);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
2008-08-05 14:09:30 +02:00
|
|
|
else if (rename(tmpfile, statfile) < 0)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2006-01-18 21:35:06 +01:00
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
|
2008-08-05 14:09:30 +02:00
|
|
|
tmpfile, statfile)));
|
|
|
|
unlink(tmpfile);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
2022-04-07 06:29:46 +02:00
|
|
|
}
|
2013-02-18 21:56:08 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* helpers for pgstat_read_statsfile() */
|
|
|
|
static bool
|
|
|
|
read_chunk(FILE *fpin, void *ptr, size_t len)
|
2013-02-18 21:56:08 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
return fread(ptr, 1, len, fpin) == len;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
#define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Reads in existing statistics file into the shared stats hash.
|
Avoid useless closely-spaced writes of statistics files.
The original intent in the stats collector was that we should not write out
stats data oftener than every PGSTAT_STAT_INTERVAL msec. Backends will not
make requests at all if they see the existing data is newer than that, and
the stats collector is supposed to disregard requests having a cutoff_time
older than its most recently written data, so that close-together requests
don't result in multiple writes. But the latter part of that got broken
in commit 187492b6c2e8cafc, so that if two backends concurrently decide
the existing stats are too old, the collector would write the data twice.
(In principle the collector's logic would still merge requests as long as
the second one arrives before we've actually written data ... but since
the message collection loop would write data immediately after processing
a single inquiry message, that never happened in practice, and in any case
the window in which it might work would be much shorter than
PGSTAT_STAT_INTERVAL.)
To fix, improve pgstat_recv_inquiry so that it checks whether the cutoff
time is too old, and doesn't add a request to the queue if so. This means
that we do not need DBWriteRequest.request_time, because the decision is
taken before making a queue entry. And that means that we don't really
need the DBWriteRequest data structure at all; an OID list of database
OIDs will serve and allow removal of some rather verbose and crufty code.
In passing, improve the comments in this area, which have been rather
neglected. Also change backend_read_statsfile so that it's not silently
relying on MyDatabaseId to have some particular value in the autovacuum
launcher process. It accidentally worked as desired because MyDatabaseId
is zero in that process; but that does not seem like a dependency we want,
especially with no documentation about it.
Although this patch is mine, it turns out I'd rediscovered a known bug,
for which Tomas Vondra had already submitted a patch that's functionally
equivalent to the non-cosmetic aspects of this patch. Thanks to Tomas
for reviewing this version.
Back-patch to 9.3 where the bug was introduced.
Prior-Discussion: <1718942738eb65c8407fcd864883f4c8@fuzzy.cz>
Patch: <4625.1464202586@sss.pgh.pa.us>
2016-05-31 21:54:46 +02:00
|
|
|
*
|
2022-04-07 06:29:46 +02:00
|
|
|
* This function is called in the only process that is accessing the shared
|
|
|
|
* stats so locking is not required.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
static void
|
|
|
|
pgstat_read_statsfile(void)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
|
|
|
FILE *fpin;
|
2005-07-14 07:13:45 +02:00
|
|
|
int32 format_id;
|
2001-06-22 21:18:36 +02:00
|
|
|
bool found;
|
2022-04-07 06:29:46 +02:00
|
|
|
const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
|
|
|
|
PgStat_ShmemControl *shmem = pgStatLocal.shmem;
|
|
|
|
TimestampTz ts = GetCurrentTimestamp();
|
2007-03-30 20:34:56 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* shouldn't be called from postmaster */
|
|
|
|
Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
|
2011-02-10 15:09:35 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
elog(DEBUG2, "reading stats file \"%s\"", statfile);
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2013-02-18 21:56:08 +01:00
|
|
|
* Try to open the stats file. If it doesn't exist, the backends simply
|
2022-04-07 06:29:46 +02:00
|
|
|
* returns zero for anything and statistics simply starts from scratch
|
2001-06-22 21:18:36 +02:00
|
|
|
* with empty counters.
|
2010-03-12 23:19:19 +01:00
|
|
|
*
|
2022-04-07 06:29:46 +02:00
|
|
|
* ENOENT is a possibility if stats collection was previously disabled or
|
|
|
|
* has not yet written the stats file for the first time. Any other
|
|
|
|
* failure condition is suspicious.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2008-08-05 14:09:30 +02:00
|
|
|
if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
|
2010-03-12 23:19:19 +01:00
|
|
|
{
|
|
|
|
if (errno != ENOENT)
|
2022-04-07 06:29:46 +02:00
|
|
|
ereport(LOG,
|
2010-03-12 23:19:19 +01:00
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not open statistics file \"%s\": %m",
|
|
|
|
statfile)));
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_reset_after_failure(ts);
|
|
|
|
return;
|
2010-03-12 23:19:19 +01:00
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2005-07-14 07:13:45 +02:00
|
|
|
/*
|
|
|
|
* Verify it's of the expected format.
|
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!read_chunk_s(fpin, &format_id) ||
|
2013-02-18 21:56:08 +01:00
|
|
|
format_id != PGSTAT_FILE_FORMAT_ID)
|
2022-04-07 06:29:46 +02:00
|
|
|
goto error;
|
2005-07-14 07:13:45 +02:00
|
|
|
|
2007-03-30 20:34:56 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* XXX: The following could now be generalized to just iterate over
|
|
|
|
* pgstat_kind_infos instead of knowing about the different kinds of
|
|
|
|
* stats.
|
2007-03-30 20:34:56 +02:00
|
|
|
*/
|
|
|
|
|
Ignore old stats file timestamps when starting the stats collector.
The stats collector disregards inquiry messages that bear a cutoff_time
before when it last wrote the relevant stats file. That's fine, but at
startup when it reads the "permanent" stats files, it absorbed their
timestamps as if they were the times at which the corresponding temporary
stats files had been written. In reality, of course, there's no data
out there at all. This led to disregarding inquiry messages soon after
startup if the postmaster had been shut down and restarted within less
than PGSTAT_STAT_INTERVAL; which is a pretty common scenario, both for
testing and in the field. Requesting backends would hang for 10 seconds
and then report failure to read statistics, unless they got bailed out
by some other backend coming along and making a newer request within
that interval.
I came across this through investigating unexpected delays in the
src/test/recovery TAP tests: it manifests there because the autovacuum
launcher hangs for 10 seconds when it can't get statistics at startup,
thus preventing a second shutdown from occurring promptly. We might
want to do some things in the autovac code to make it less prone to
getting stuck that way, but this change is a good bug fix regardless.
In passing, also fix pgstat_read_statsfiles() to ensure that it
re-zeroes its global stats variables if they are corrupted by a
short read from the stats file. (Other reads in that function
go into temp variables, so that the issue doesn't arise.)
This has been broken since we created the separation between permanent
and temporary stats files in 8.4, so back-patch to all supported branches.
Discussion: https://postgr.es/m/16860.1498442626@sss.pgh.pa.us
2017-06-26 22:17:05 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Read archiver stats struct
|
Ignore old stats file timestamps when starting the stats collector.
The stats collector disregards inquiry messages that bear a cutoff_time
before when it last wrote the relevant stats file. That's fine, but at
startup when it reads the "permanent" stats files, it absorbed their
timestamps as if they were the times at which the corresponding temporary
stats files had been written. In reality, of course, there's no data
out there at all. This led to disregarding inquiry messages soon after
startup if the postmaster had been shut down and restarted within less
than PGSTAT_STAT_INTERVAL; which is a pretty common scenario, both for
testing and in the field. Requesting backends would hang for 10 seconds
and then report failure to read statistics, unless they got bailed out
by some other backend coming along and making a newer request within
that interval.
I came across this through investigating unexpected delays in the
src/test/recovery TAP tests: it manifests there because the autovacuum
launcher hangs for 10 seconds when it can't get statistics at startup,
thus preventing a second shutdown from occurring promptly. We might
want to do some things in the autovac code to make it less prone to
getting stuck that way, but this change is a good bug fix regardless.
In passing, also fix pgstat_read_statsfiles() to ensure that it
re-zeroes its global stats variables if they are corrupted by a
short read from the stats file. (Other reads in that function
go into temp variables, so that the issue doesn't arise.)
This has been broken since we created the separation between permanent
and temporary stats files in 8.4, so back-patch to all supported branches.
Discussion: https://postgr.es/m/16860.1498442626@sss.pgh.pa.us
2017-06-26 22:17:05 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!read_chunk_s(fpin, &shmem->archiver.stats))
|
|
|
|
goto error;
|
Ignore old stats file timestamps when starting the stats collector.
The stats collector disregards inquiry messages that bear a cutoff_time
before when it last wrote the relevant stats file. That's fine, but at
startup when it reads the "permanent" stats files, it absorbed their
timestamps as if they were the times at which the corresponding temporary
stats files had been written. In reality, of course, there's no data
out there at all. This led to disregarding inquiry messages soon after
startup if the postmaster had been shut down and restarted within less
than PGSTAT_STAT_INTERVAL; which is a pretty common scenario, both for
testing and in the field. Requesting backends would hang for 10 seconds
and then report failure to read statistics, unless they got bailed out
by some other backend coming along and making a newer request within
that interval.
I came across this through investigating unexpected delays in the
src/test/recovery TAP tests: it manifests there because the autovacuum
launcher hangs for 10 seconds when it can't get statistics at startup,
thus preventing a second shutdown from occurring promptly. We might
want to do some things in the autovac code to make it less prone to
getting stuck that way, but this change is a good bug fix regardless.
In passing, also fix pgstat_read_statsfiles() to ensure that it
re-zeroes its global stats variables if they are corrupted by a
short read from the stats file. (Other reads in that function
go into temp variables, so that the issue doesn't arise.)
This has been broken since we created the separation between permanent
and temporary stats files in 8.4, so back-patch to all supported branches.
Discussion: https://postgr.es/m/16860.1498442626@sss.pgh.pa.us
2017-06-26 22:17:05 +02:00
|
|
|
|
2014-01-28 18:58:22 +01:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Read bgwriter stats struct
|
2014-01-28 18:58:22 +01:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!read_chunk_s(fpin, &shmem->bgwriter.stats))
|
|
|
|
goto error;
|
2014-01-28 18:58:22 +01:00
|
|
|
|
2020-10-02 03:17:11 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Read checkpointer stats struct
|
2020-10-02 03:17:11 +02:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!read_chunk_s(fpin, &shmem->checkpointer.stats))
|
|
|
|
goto error;
|
2020-10-02 03:17:11 +02:00
|
|
|
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
/*
|
|
|
|
* Read SLRU stats struct
|
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!read_chunk_s(fpin, &shmem->slru.stats))
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read WAL stats struct
|
|
|
|
*/
|
|
|
|
if (!read_chunk_s(fpin, &shmem->wal.stats))
|
|
|
|
goto error;
|
Collect statistics about SLRU caches
There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.
This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.
The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.
This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.
Bump catversion as the patch introduces new functions and system view.
Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
2020-04-02 02:11:38 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* We found an existing statistics file. Read it and put all the hash
|
|
|
|
* table entries into place.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
for (;;)
|
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
char t = fgetc(fpin);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
switch (t)
|
|
|
|
{
|
|
|
|
case 'S':
|
|
|
|
case 'N':
|
2005-07-29 21:30:09 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
PgStat_HashKey key;
|
|
|
|
PgStatShared_HashEntry *p;
|
|
|
|
PgStatShared_Common *header;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
CHECK_FOR_INTERRUPTS();
|
2021-04-27 05:39:11 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (t == 'S')
|
2021-04-27 05:39:11 +02:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
/* normal stats entry, identified by PgStat_HashKey */
|
|
|
|
if (!read_chunk_s(fpin, &key))
|
|
|
|
goto error;
|
2021-04-27 05:39:11 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!pgstat_is_kind_valid(key.kind))
|
|
|
|
goto error;
|
2021-04-27 05:39:11 +02:00
|
|
|
}
|
2022-04-07 06:29:46 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/* stats entry identified by name on disk (e.g. slots) */
|
|
|
|
const PgStat_KindInfo *kind_info = NULL;
|
|
|
|
PgStat_Kind kind;
|
|
|
|
NameData name;
|
2021-04-27 05:39:11 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!read_chunk_s(fpin, &kind))
|
|
|
|
goto error;
|
|
|
|
if (!read_chunk_s(fpin, &name))
|
|
|
|
goto error;
|
|
|
|
if (!pgstat_is_kind_valid(kind))
|
|
|
|
goto error;
|
2020-10-08 05:39:08 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
kind_info = pgstat_get_kind_info(kind);
|
2022-03-01 01:47:52 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!kind_info->from_serialized_name)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
if (!kind_info->from_serialized_name(&name, &key))
|
|
|
|
{
|
|
|
|
/* skip over data for entry we don't care about */
|
|
|
|
if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
Assert(key.kind == kind);
|
2022-03-01 01:47:52 +01:00
|
|
|
}
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/*
|
|
|
|
* This intentionally doesn't use pgstat_get_entry_ref() -
|
|
|
|
* putting all stats into checkpointer's
|
|
|
|
* pgStatEntryRefHash would be wasted effort and memory.
|
|
|
|
*/
|
|
|
|
p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
|
|
|
|
|
|
|
|
/* don't allow duplicate entries */
|
|
|
|
if (found)
|
2022-03-01 01:47:52 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
dshash_release_lock(pgStatLocal.shared_hash, p);
|
|
|
|
elog(WARNING, "found duplicate stats entry %d/%u/%u",
|
|
|
|
key.kind, key.dboid, key.objoid);
|
|
|
|
goto error;
|
2022-03-01 01:47:52 +01:00
|
|
|
}
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
header = pgstat_init_entry(key.kind, p);
|
|
|
|
dshash_release_lock(pgStatLocal.shared_hash, p);
|
|
|
|
|
|
|
|
if (!read_chunk(fpin,
|
|
|
|
pgstat_get_entry_data(key.kind, header),
|
|
|
|
pgstat_get_entry_len(key.kind)))
|
|
|
|
goto error;
|
2022-03-01 01:47:52 +01:00
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2013-02-18 21:56:08 +01:00
|
|
|
case 'E':
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
default:
|
2022-04-07 06:29:46 +02:00
|
|
|
goto error;
|
2013-02-18 21:56:08 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
FreeFile(fpin);
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
|
|
|
|
unlink(statfile);
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
return;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
error:
|
|
|
|
ereport(LOG,
|
|
|
|
(errmsg("corrupted statistics file \"%s\"", statfile)));
|
2001-10-25 07:50:21 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* Set the current timestamp as reset timestamp */
|
|
|
|
pgstat_reset_after_failure(ts);
|
2007-02-08 00:11:30 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
goto done;
|
2006-06-19 03:51:22 +02:00
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
2022-04-07 06:29:46 +02:00
|
|
|
* Helper to reset / drop stats after restoring stats from disk failed,
|
|
|
|
* potentially after already loading parts.
|
2008-11-03 02:17:08 +01:00
|
|
|
*/
|
2022-04-07 06:29:46 +02:00
|
|
|
static void
|
|
|
|
pgstat_reset_after_failure(TimestampTz ts)
|
2008-11-03 02:17:08 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
/* reset fixed-numbered stats */
|
|
|
|
for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
|
2008-11-03 02:17:08 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
|
2008-11-03 02:17:08 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
if (!kind_info->fixed_amount)
|
|
|
|
continue;
|
2008-11-03 02:17:08 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
kind_info->reset_all_cb(ts);
|
2014-01-28 18:58:22 +01:00
|
|
|
}
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* and drop variable-numbered ones */
|
|
|
|
pgstat_drop_all_entries();
|
2021-11-30 04:24:30 +01:00
|
|
|
}
|