postgresql/src/include/pgstat.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1250 lines
35 KiB
C
Raw Normal View History

/* ----------
* pgstat.h
*
* Definitions for the PostgreSQL cumulative statistics system.
*
* Copyright (c) 2001-2022, PostgreSQL Global Development Group
*
2010-09-20 22:08:53 +02:00
* src/include/pgstat.h
* ----------
*/
#ifndef PGSTAT_H
#define PGSTAT_H
#include "datatype/timestamp.h"
#include "portability/instr_time.h"
#include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */
#include "replication/logicalproto.h"
#include "utils/backend_progress.h" /* for backward compatibility */
#include "utils/backend_status.h" /* for backward compatibility */
#include "utils/hsearch.h"
#include "utils/relcache.h"
#include "utils/wait_event.h" /* for backward compatibility */
/* ----------
* Paths for the statistics files (relative to installation's $PGDATA).
* ----------
*/
#define PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat"
#define PGSTAT_STAT_PERMANENT_FILENAME "pg_stat/global.stat"
#define PGSTAT_STAT_PERMANENT_TMPFILE "pg_stat/global.tmp"
/* Default directory to store temporary statistics data in */
#define PG_STAT_TMP_DIR "pg_stat_tmp"
/* The types of statistics entries */
typedef enum PgStat_Kind
{
/* use 0 for INVALID, to catch zero-initialized data */
PGSTAT_KIND_INVALID = 0,
/* stats for variable-numbered objects */
PGSTAT_KIND_DATABASE, /* database-wide statistics */
PGSTAT_KIND_RELATION, /* per-table statistics */
PGSTAT_KIND_FUNCTION, /* per-function statistics */
PGSTAT_KIND_REPLSLOT, /* per-slot statistics */
PGSTAT_KIND_SUBSCRIPTION, /* per-subscription statistics */
/* stats for fixed-numbered objects */
PGSTAT_KIND_ARCHIVER,
PGSTAT_KIND_BGWRITER,
PGSTAT_KIND_CHECKPOINTER,
PGSTAT_KIND_SLRU,
PGSTAT_KIND_WAL,
} PgStat_Kind;
#define PGSTAT_KIND_FIRST_VALID PGSTAT_KIND_DATABASE
#define PGSTAT_KIND_LAST PGSTAT_KIND_WAL
#define PGSTAT_NUM_KINDS (PGSTAT_KIND_LAST + 1)
/* Values for track_functions GUC variable --- order is significant! */
typedef enum TrackFunctionsLevel
{
TRACK_FUNC_OFF,
TRACK_FUNC_PL,
TRACK_FUNC_ALL
} TrackFunctionsLevel;
/* Values to track the cause of session termination */
typedef enum SessionEndType
{
DISCONNECT_NOT_YET, /* still active */
DISCONNECT_NORMAL,
DISCONNECT_CLIENT_EOF,
DISCONNECT_FATAL,
DISCONNECT_KILLED
} SessionEndType;
/* ----------
* The data type used for counters.
* ----------
*/
typedef int64 PgStat_Counter;
/* ------------------------------------------------------------
* Structures kept in backend local memory while accumulating counts
* ------------------------------------------------------------
*/
/* ----------
* PgStat_FunctionCounts The actual per-function counts kept by a backend
*
* This struct should contain only actual event counters, because we memcmp
* it against zeroes to detect whether there are any counts to transmit.
*
* Note that the time counters are in instr_time format here. We convert to
* microseconds in PgStat_Counter format when flushing out pending statistics.
* ----------
*/
typedef struct PgStat_FunctionCounts
{
PgStat_Counter f_numcalls;
instr_time f_total_time;
instr_time f_self_time;
} PgStat_FunctionCounts;
/* ----------
* PgStat_BackendFunctionEntry Entry in backend's per-function hash table
* ----------
*/
typedef struct PgStat_BackendFunctionEntry
{
Oid f_id;
PgStat_FunctionCounts f_counts;
} PgStat_BackendFunctionEntry;
/*
* Working state needed to accumulate per-function-call timing statistics.
*/
typedef struct PgStat_FunctionCallUsage
{
/* Link to function's hashtable entry (must still be there at exit!) */
/* NULL means we are not tracking the current function call */
PgStat_FunctionCounts *fs;
/* Total time previously charged to function, as of function start */
instr_time save_f_total_time;
/* Backend-wide total time as of function start */
instr_time save_total;
/* system clock as of function start */
instr_time f_start;
} PgStat_FunctionCallUsage;
/* ----------
* PgStat_TableCounts The actual per-table counts kept by a backend
*
* This struct should contain only actual event counters, because we memcmp
* it against zeroes to detect whether there are any counts to transmit.
* It is a component of PgStat_TableStatus (within-backend state) and
* PgStat_TableEntry (the transmitted message format).
*
* Note: for a table, tuples_returned is the number of tuples successfully
* fetched by heap_getnext, while tuples_fetched is the number of tuples
* successfully fetched by heap_fetch under the control of bitmap indexscans.
* For an index, tuples_returned is the number of index entries returned by
* the index AM, while tuples_fetched is the number of tuples successfully
* fetched by heap_fetch under the control of simple indexscans for this index.
*
* tuples_inserted/updated/deleted/hot_updated count attempted actions,
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
* regardless of whether the transaction committed. delta_live_tuples,
Revert analyze support for partitioned tables This reverts the following commits: 1b5617eb844cd2470a334c1d2eec66cf9b39c41a Describe (auto-)analyze behavior for partitioned tables 0e69f705cc1a3df273b38c9883fb5765991e04fe Set pg_class.reltuples for partitioned tables 41badeaba8beee7648ebe7923a41c04f1f3cb302 Document ANALYZE storage parameters for partitioned tables 0827e8af70f4653ba17ed773f123a60eadd9f9c9 autovacuum: handle analyze for partitioned tables There are efficiency issues in this code when handling databases with large numbers of partitions, and it doesn't look like there isn't any trivial way to handle those. There are some other issues as well. It's now too late in the cycle for nontrivial fixes, so we'll have to let Postgres 14 users continue to manually deal with ANALYZE their partitioned tables, and hopefully we can fix the issues for Postgres 15. I kept [most of] be280cdad298 ("Don't reset relhasindex for partitioned tables on ANALYZE") because while we added it due to 0827e8af70f4, it is a good bugfix in its own right, since it affects manual analyze as well as autovacuum-induced analyze, and there's no reason to revert it. I retained the addition of relkind 'p' to tables included by pg_stat_user_tables, because reverting that would require a catversion bump. Also, in pg14 only, I keep a struct member that was added to PgStat_TabStatEntry to avoid breaking compatibility with existing stat files. Backpatch to 14. Discussion: https://postgr.es/m/20210722205458.f2bug3z6qzxzpx2s@alap3.anarazel.de
2021-08-16 23:27:52 +02:00
* delta_dead_tuples, and changed_tuples are set depending on commit or abort.
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
* Note that delta_live_tuples and delta_dead_tuples can be negative!
* ----------
*/
typedef struct PgStat_TableCounts
{
PgStat_Counter t_numscans;
PgStat_Counter t_tuples_returned;
PgStat_Counter t_tuples_fetched;
PgStat_Counter t_tuples_inserted;
PgStat_Counter t_tuples_updated;
PgStat_Counter t_tuples_deleted;
PgStat_Counter t_tuples_hot_updated;
bool t_truncdropped;
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
PgStat_Counter t_delta_live_tuples;
PgStat_Counter t_delta_dead_tuples;
PgStat_Counter t_changed_tuples;
PgStat_Counter t_blocks_fetched;
PgStat_Counter t_blocks_hit;
} PgStat_TableCounts;
/* ----------
* PgStat_TableStatus Per-table status within a backend
*
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
* Many of the event counters are nontransactional, ie, we count events
* in committed and aborted transactions alike. For these, we just count
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
* directly in the PgStat_TableStatus. However, delta_live_tuples,
* delta_dead_tuples, and changed_tuples must be derived from event counts
* with awareness of whether the transaction or subtransaction committed or
* aborted. Hence, we also keep a stack of per-(sub)transaction status
* records for every table modified in the current transaction. At commit
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
* or abort, we propagate tuples_inserted/updated/deleted up to the
* parent subtransaction level, or out to the parent PgStat_TableStatus,
* as appropriate.
* ----------
*/
typedef struct PgStat_TableStatus
{
Oid t_id; /* table's OID */
bool t_shared; /* is it a shared catalog? */
struct PgStat_TableXactStatus *trans; /* lowest subxact's counts */
PgStat_TableCounts t_counts; /* event counts to be sent */
} PgStat_TableStatus;
/* ----------
* PgStat_TableXactStatus Per-table, per-subtransaction status
* ----------
*/
typedef struct PgStat_TableXactStatus
{
PgStat_Counter tuples_inserted; /* tuples inserted in (sub)xact */
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
PgStat_Counter tuples_updated; /* tuples updated in (sub)xact */
PgStat_Counter tuples_deleted; /* tuples deleted in (sub)xact */
bool truncdropped; /* relation truncated/dropped in this
* (sub)xact */
/* tuples i/u/d prior to truncate/drop */
PgStat_Counter inserted_pre_truncdrop;
PgStat_Counter updated_pre_truncdrop;
PgStat_Counter deleted_pre_truncdrop;
int nest_level; /* subtransaction nest level */
/* links to other structs for same relation: */
struct PgStat_TableXactStatus *upper; /* next higher subxact if any */
PgStat_TableStatus *parent; /* per-table status */
/* structs of same subxact level are linked here: */
struct PgStat_TableXactStatus *next; /* next of same subxact */
} PgStat_TableXactStatus;
/* ------------------------------------------------------------
* Message formats follow
* ------------------------------------------------------------
*/
/* ----------
* The types of backend -> collector messages
* ----------
*/
typedef enum StatMsgType
{
PGSTAT_MTYPE_DUMMY,
PGSTAT_MTYPE_INQUIRY,
PGSTAT_MTYPE_TABSTAT,
PGSTAT_MTYPE_TABPURGE,
PGSTAT_MTYPE_DROPDB,
PGSTAT_MTYPE_RESETCOUNTER,
PGSTAT_MTYPE_RESETSHAREDCOUNTER,
PGSTAT_MTYPE_RESETSINGLECOUNTER,
PGSTAT_MTYPE_RESETSLRUCOUNTER,
PGSTAT_MTYPE_RESETREPLSLOTCOUNTER,
PGSTAT_MTYPE_RESETSUBCOUNTER,
PGSTAT_MTYPE_AUTOVAC_START,
PGSTAT_MTYPE_VACUUM,
PGSTAT_MTYPE_ANALYZE,
PGSTAT_MTYPE_ARCHIVER,
PGSTAT_MTYPE_BGWRITER,
PGSTAT_MTYPE_CHECKPOINTER,
PGSTAT_MTYPE_WAL,
PGSTAT_MTYPE_SLRU,
PGSTAT_MTYPE_FUNCSTAT,
PGSTAT_MTYPE_FUNCPURGE,
PGSTAT_MTYPE_RECOVERYCONFLICT,
PGSTAT_MTYPE_TEMPFILE,
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE,
PGSTAT_MTYPE_REPLSLOT,
PGSTAT_MTYPE_CONNECT,
PGSTAT_MTYPE_DISCONNECT,
PGSTAT_MTYPE_SUBSCRIPTIONDROP,
PGSTAT_MTYPE_SUBSCRIPTIONERROR,
} StatMsgType;
/* ----------
* PgStat_MsgHdr The common message header
* ----------
*/
typedef struct PgStat_MsgHdr
{
StatMsgType m_type;
int m_size;
} PgStat_MsgHdr;
/* ----------
* Space available in a message. This will keep the UDP packets below 1K,
* which should fit unfragmented into the MTU of the loopback interface.
* (Larger values of PGSTAT_MAX_MSG_SIZE would work for that on most
* platforms, but we're being conservative here.)
* ----------
*/
#define PGSTAT_MAX_MSG_SIZE 1000
#define PGSTAT_MSG_PAYLOAD (PGSTAT_MAX_MSG_SIZE - sizeof(PgStat_MsgHdr))
/* ----------
* PgStat_MsgDummy A dummy message, ignored by the collector
* ----------
*/
typedef struct PgStat_MsgDummy
{
PgStat_MsgHdr m_hdr;
} PgStat_MsgDummy;
/* ----------
* PgStat_MsgInquiry Sent by a backend to ask the collector
Avoid useless closely-spaced writes of statistics files. The original intent in the stats collector was that we should not write out stats data oftener than every PGSTAT_STAT_INTERVAL msec. Backends will not make requests at all if they see the existing data is newer than that, and the stats collector is supposed to disregard requests having a cutoff_time older than its most recently written data, so that close-together requests don't result in multiple writes. But the latter part of that got broken in commit 187492b6c2e8cafc, so that if two backends concurrently decide the existing stats are too old, the collector would write the data twice. (In principle the collector's logic would still merge requests as long as the second one arrives before we've actually written data ... but since the message collection loop would write data immediately after processing a single inquiry message, that never happened in practice, and in any case the window in which it might work would be much shorter than PGSTAT_STAT_INTERVAL.) To fix, improve pgstat_recv_inquiry so that it checks whether the cutoff time is too old, and doesn't add a request to the queue if so. This means that we do not need DBWriteRequest.request_time, because the decision is taken before making a queue entry. And that means that we don't really need the DBWriteRequest data structure at all; an OID list of database OIDs will serve and allow removal of some rather verbose and crufty code. In passing, improve the comments in this area, which have been rather neglected. Also change backend_read_statsfile so that it's not silently relying on MyDatabaseId to have some particular value in the autovacuum launcher process. It accidentally worked as desired because MyDatabaseId is zero in that process; but that does not seem like a dependency we want, especially with no documentation about it. Although this patch is mine, it turns out I'd rediscovered a known bug, for which Tomas Vondra had already submitted a patch that's functionally equivalent to the non-cosmetic aspects of this patch. Thanks to Tomas for reviewing this version. Back-patch to 9.3 where the bug was introduced. Prior-Discussion: <1718942738eb65c8407fcd864883f4c8@fuzzy.cz> Patch: <4625.1464202586@sss.pgh.pa.us>
2016-05-31 21:54:46 +02:00
* to write the stats file(s).
*
* Ordinarily, an inquiry message prompts writing of the global stats file,
* the stats file for shared catalogs, and the stats file for the specified
* database. If databaseid is InvalidOid, only the first two are written.
*
* New file(s) will be written only if the existing file has a timestamp
* older than the specified cutoff_time; this prevents duplicated effort
* when multiple requests arrive at nearly the same time, assuming that
* backends send requests with cutoff_times a little bit in the past.
*
* clock_time should be the requestor's current local time; the collector
* uses this to check for the system clock going backward, but it has no
* effect unless that occurs. We assume clock_time >= cutoff_time, though.
* ----------
*/
typedef struct PgStat_MsgInquiry
{
PgStat_MsgHdr m_hdr;
Fix stats collector to recover nicely when system clock goes backwards. Formerly, if the system clock went backwards, the stats collector would fail to update the stats file any more until the clock reading again exceeds whatever timestamp was last written into the stats file. Such glitches in the clock's behavior are not terribly unlikely on machines not using NTP. Such a scenario has been observed to cause regression test failures in the buildfarm, and it could have bad effects on the behavior of autovacuum, so it seems prudent to install some defenses. We could directly detect the clock going backwards by adding GetCurrentTimestamp calls in the stats collector's main loop, but that would hurt performance on platforms where GetCurrentTimestamp is expensive. To minimize the performance hit in normal cases, adopt a more complicated scheme wherein backends check for clock skew when reading the stats file, and if they see it, signal the stats collector by sending an extra stats inquiry message. The stats collector does an extra GetCurrentTimestamp only when it receives an inquiry with an apparently out-of-order timestamp. To avoid unnecessary GetCurrentTimestamp calls, expand the inquiry messages to carry the backend's current clock reading as well as its stats cutoff time. The latter, being intentionally slightly in-the-past, would trigger more clock rechecks than we need if it were used for this purpose. We might want to backpatch this change at some point, but let's let it shake out in the buildfarm for awhile first.
2012-06-17 23:11:07 +02:00
TimestampTz clock_time; /* observed local clock time */
TimestampTz cutoff_time; /* minimum acceptable file timestamp */
Avoid useless closely-spaced writes of statistics files. The original intent in the stats collector was that we should not write out stats data oftener than every PGSTAT_STAT_INTERVAL msec. Backends will not make requests at all if they see the existing data is newer than that, and the stats collector is supposed to disregard requests having a cutoff_time older than its most recently written data, so that close-together requests don't result in multiple writes. But the latter part of that got broken in commit 187492b6c2e8cafc, so that if two backends concurrently decide the existing stats are too old, the collector would write the data twice. (In principle the collector's logic would still merge requests as long as the second one arrives before we've actually written data ... but since the message collection loop would write data immediately after processing a single inquiry message, that never happened in practice, and in any case the window in which it might work would be much shorter than PGSTAT_STAT_INTERVAL.) To fix, improve pgstat_recv_inquiry so that it checks whether the cutoff time is too old, and doesn't add a request to the queue if so. This means that we do not need DBWriteRequest.request_time, because the decision is taken before making a queue entry. And that means that we don't really need the DBWriteRequest data structure at all; an OID list of database OIDs will serve and allow removal of some rather verbose and crufty code. In passing, improve the comments in this area, which have been rather neglected. Also change backend_read_statsfile so that it's not silently relying on MyDatabaseId to have some particular value in the autovacuum launcher process. It accidentally worked as desired because MyDatabaseId is zero in that process; but that does not seem like a dependency we want, especially with no documentation about it. Although this patch is mine, it turns out I'd rediscovered a known bug, for which Tomas Vondra had already submitted a patch that's functionally equivalent to the non-cosmetic aspects of this patch. Thanks to Tomas for reviewing this version. Back-patch to 9.3 where the bug was introduced. Prior-Discussion: <1718942738eb65c8407fcd864883f4c8@fuzzy.cz> Patch: <4625.1464202586@sss.pgh.pa.us>
2016-05-31 21:54:46 +02:00
Oid databaseid; /* requested DB (InvalidOid => shared only) */
} PgStat_MsgInquiry;
/* ----------
* PgStat_TableEntry Per-table info in a MsgTabstat
* ----------
*/
typedef struct PgStat_TableEntry
{
Oid t_id;
PgStat_TableCounts t_counts;
} PgStat_TableEntry;
/* ----------
* PgStat_MsgTabstat Sent by the backend to report table
* and buffer access statistics.
* ----------
*/
#define PGSTAT_NUM_TABENTRIES \
((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int) - 5 * sizeof(PgStat_Counter)) \
/ sizeof(PgStat_TableEntry))
typedef struct PgStat_MsgTabstat
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_nentries;
int m_xact_commit;
int m_xact_rollback;
PgStat_Counter m_block_read_time; /* times in microseconds */
PgStat_Counter m_block_write_time;
PgStat_Counter m_session_time;
PgStat_Counter m_active_time;
PgStat_Counter m_idle_in_xact_time;
PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES];
} PgStat_MsgTabstat;
/* ----------
* PgStat_MsgTabpurge Sent by the backend to tell the collector
* about dead tables.
* ----------
*/
#define PGSTAT_NUM_TABPURGE \
((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
/ sizeof(Oid))
typedef struct PgStat_MsgTabpurge
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_nentries;
Oid m_tableid[PGSTAT_NUM_TABPURGE];
} PgStat_MsgTabpurge;
/* ----------
* PgStat_MsgDropdb Sent by the backend to tell the collector
* about a dropped database
* ----------
*/
typedef struct PgStat_MsgDropdb
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
} PgStat_MsgDropdb;
/* ----------
* PgStat_MsgResetcounter Sent by the backend to tell the collector
* to reset counters
* ----------
*/
typedef struct PgStat_MsgResetcounter
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
} PgStat_MsgResetcounter;
/* ----------
* PgStat_MsgResetsharedcounter Sent by the backend to tell the collector
* to reset a shared counter
* ----------
*/
typedef struct PgStat_MsgResetsharedcounter
{
PgStat_MsgHdr m_hdr;
PgStat_Kind m_resettarget;
} PgStat_MsgResetsharedcounter;
/* ----------
* PgStat_MsgResetsinglecounter Sent by the backend to tell the collector
* to reset a single counter
* ----------
*/
typedef struct PgStat_MsgResetsinglecounter
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
PgStat_Kind m_resettype;
Oid m_objectid;
} PgStat_MsgResetsinglecounter;
/* ----------
* PgStat_MsgResetslrucounter Sent by the backend to tell the collector
* to reset a SLRU counter
* ----------
*/
typedef struct PgStat_MsgResetslrucounter
{
PgStat_MsgHdr m_hdr;
int m_index;
} PgStat_MsgResetslrucounter;
/* ----------
* PgStat_MsgResetreplslotcounter Sent by the backend to tell the collector
* to reset replication slot counter(s)
* ----------
*/
typedef struct PgStat_MsgResetreplslotcounter
{
PgStat_MsgHdr m_hdr;
NameData m_slotname;
bool clearall;
} PgStat_MsgResetreplslotcounter;
/* ----------
* PgStat_MsgResetsubcounter Sent by the backend to tell the collector
* to reset subscription counter(s)
* ----------
*/
typedef struct PgStat_MsgResetsubcounter
{
PgStat_MsgHdr m_hdr;
Oid m_subid; /* InvalidOid means reset all subscription
* stats */
} PgStat_MsgResetsubcounter;
/* ----------
* PgStat_MsgAutovacStart Sent by the autovacuum daemon to signal
* that a database is going to be processed
* ----------
*/
typedef struct PgStat_MsgAutovacStart
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
TimestampTz m_start_time;
} PgStat_MsgAutovacStart;
/* ----------
* PgStat_MsgVacuum Sent by the backend or autovacuum daemon
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
* after VACUUM
* ----------
*/
typedef struct PgStat_MsgVacuum
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
Oid m_tableoid;
bool m_autovacuum;
TimestampTz m_vacuumtime;
PgStat_Counter m_live_tuples;
PgStat_Counter m_dead_tuples;
} PgStat_MsgVacuum;
/* ----------
* PgStat_MsgAnalyze Sent by the backend or autovacuum daemon
* after ANALYZE
* ----------
*/
typedef struct PgStat_MsgAnalyze
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
Oid m_tableoid;
bool m_autovacuum;
bool m_resetcounter;
TimestampTz m_analyzetime;
PgStat_Counter m_live_tuples;
PgStat_Counter m_dead_tuples;
} PgStat_MsgAnalyze;
/* ----------
* PgStat_MsgArchiver Sent by the archiver to update statistics.
* ----------
*/
typedef struct PgStat_MsgArchiver
{
PgStat_MsgHdr m_hdr;
bool m_failed; /* Failed attempt */
char m_xlog[MAX_XFN_CHARS + 1];
TimestampTz m_timestamp;
} PgStat_MsgArchiver;
/* ----------
* PgStat_MsgBgWriter Sent by the bgwriter to update statistics.
* ----------
*/
typedef struct PgStat_MsgBgWriter
{
PgStat_MsgHdr m_hdr;
PgStat_Counter m_buf_written_clean;
PgStat_Counter m_maxwritten_clean;
PgStat_Counter m_buf_alloc;
} PgStat_MsgBgWriter;
/* ----------
* PgStat_MsgCheckpointer Sent by the checkpointer to update statistics.
* ----------
*/
typedef struct PgStat_MsgCheckpointer
{
PgStat_MsgHdr m_hdr;
PgStat_Counter m_timed_checkpoints;
PgStat_Counter m_requested_checkpoints;
PgStat_Counter m_buf_written_checkpoints;
PgStat_Counter m_buf_written_backend;
PgStat_Counter m_buf_fsync_backend;
PgStat_Counter m_checkpoint_write_time; /* times in milliseconds */
PgStat_Counter m_checkpoint_sync_time;
} PgStat_MsgCheckpointer;
/* ----------
* PgStat_MsgWal Sent by backends and background processes to update WAL statistics.
* ----------
*/
typedef struct PgStat_MsgWal
{
PgStat_MsgHdr m_hdr;
PgStat_Counter m_wal_records;
PgStat_Counter m_wal_fpi;
uint64 m_wal_bytes;
PgStat_Counter m_wal_buffers_full;
PgStat_Counter m_wal_write;
PgStat_Counter m_wal_sync;
PgStat_Counter m_wal_write_time; /* time spent writing wal records in
* microseconds */
PgStat_Counter m_wal_sync_time; /* time spent syncing wal records in
* microseconds */
} PgStat_MsgWal;
/* ----------
* PgStat_MsgSLRU Sent by a backend to update SLRU statistics.
* ----------
*/
typedef struct PgStat_MsgSLRU
{
PgStat_MsgHdr m_hdr;
PgStat_Counter m_index;
PgStat_Counter m_blocks_zeroed;
PgStat_Counter m_blocks_hit;
PgStat_Counter m_blocks_read;
PgStat_Counter m_blocks_written;
PgStat_Counter m_blocks_exists;
PgStat_Counter m_flush;
PgStat_Counter m_truncate;
} PgStat_MsgSLRU;
/* ----------
* PgStat_MsgReplSlot Sent by a backend or a wal sender to update replication
* slot statistics.
* ----------
*/
typedef struct PgStat_MsgReplSlot
{
PgStat_MsgHdr m_hdr;
NameData m_slotname;
bool m_create;
bool m_drop;
PgStat_Counter m_spill_txns;
PgStat_Counter m_spill_count;
PgStat_Counter m_spill_bytes;
PgStat_Counter m_stream_txns;
PgStat_Counter m_stream_count;
PgStat_Counter m_stream_bytes;
PgStat_Counter m_total_txns;
PgStat_Counter m_total_bytes;
} PgStat_MsgReplSlot;
/* ----------
* PgStat_MsgSubscriptionDrop Sent by the backend and autovacuum to tell the
* collector about the dead subscription.
* ----------
*/
typedef struct PgStat_MsgSubscriptionDrop
{
PgStat_MsgHdr m_hdr;
Oid m_subid;
} PgStat_MsgSubscriptionDrop;
/* ----------
* PgStat_MsgSubscriptionError Sent by the apply worker or the table sync
* worker to report an error on the subscription.
* ----------
*/
typedef struct PgStat_MsgSubscriptionError
{
PgStat_MsgHdr m_hdr;
Oid m_subid;
bool m_is_apply_error;
} PgStat_MsgSubscriptionError;
/* ----------
* PgStat_MsgRecoveryConflict Sent by the backend upon recovery conflict
* ----------
*/
typedef struct PgStat_MsgRecoveryConflict
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_reason;
} PgStat_MsgRecoveryConflict;
/* ----------
* PgStat_MsgTempFile Sent by the backend upon creating a temp file
* ----------
*/
typedef struct PgStat_MsgTempFile
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
size_t m_filesize;
} PgStat_MsgTempFile;
/* ----------
* PgStat_FunctionEntry Per-function info in a MsgFuncstat
* ----------
*/
typedef struct PgStat_FunctionEntry
{
Oid f_id;
PgStat_Counter f_numcalls;
PgStat_Counter f_total_time; /* times in microseconds */
PgStat_Counter f_self_time;
} PgStat_FunctionEntry;
/* ----------
* PgStat_MsgFuncstat Sent by the backend to report function
* usage statistics.
* ----------
*/
#define PGSTAT_NUM_FUNCENTRIES \
((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
/ sizeof(PgStat_FunctionEntry))
typedef struct PgStat_MsgFuncstat
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_nentries;
PgStat_FunctionEntry m_entry[PGSTAT_NUM_FUNCENTRIES];
} PgStat_MsgFuncstat;
/* ----------
* PgStat_MsgFuncpurge Sent by the backend to tell the collector
* about dead functions.
* ----------
*/
#define PGSTAT_NUM_FUNCPURGE \
((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
/ sizeof(Oid))
typedef struct PgStat_MsgFuncpurge
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_nentries;
Oid m_functionid[PGSTAT_NUM_FUNCPURGE];
} PgStat_MsgFuncpurge;
/* ----------
* PgStat_MsgDeadlock Sent by the backend to tell the collector
* about a deadlock that occurred.
* ----------
*/
typedef struct PgStat_MsgDeadlock
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
} PgStat_MsgDeadlock;
/* ----------
* PgStat_MsgChecksumFailure Sent by the backend to tell the collector
* about checksum failures noticed.
* ----------
*/
typedef struct PgStat_MsgChecksumFailure
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_failurecount;
TimestampTz m_failure_time;
} PgStat_MsgChecksumFailure;
/* ----------
* PgStat_MsgConnect Sent by the backend upon connection
* establishment
* ----------
*/
typedef struct PgStat_MsgConnect
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
} PgStat_MsgConnect;
/* ----------
* PgStat_MsgDisconnect Sent by the backend when disconnecting
* ----------
*/
typedef struct PgStat_MsgDisconnect
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
SessionEndType m_cause;
} PgStat_MsgDisconnect;
/* ----------
* PgStat_Msg Union over all possible messages.
* ----------
*/
typedef union PgStat_Msg
{
PgStat_MsgHdr msg_hdr;
PgStat_MsgDummy msg_dummy;
PgStat_MsgInquiry msg_inquiry;
PgStat_MsgTabstat msg_tabstat;
PgStat_MsgTabpurge msg_tabpurge;
PgStat_MsgDropdb msg_dropdb;
PgStat_MsgResetcounter msg_resetcounter;
PgStat_MsgResetsharedcounter msg_resetsharedcounter;
PgStat_MsgResetsinglecounter msg_resetsinglecounter;
PgStat_MsgResetslrucounter msg_resetslrucounter;
PgStat_MsgResetreplslotcounter msg_resetreplslotcounter;
PgStat_MsgResetsubcounter msg_resetsubcounter;
PgStat_MsgAutovacStart msg_autovacuum_start;
PgStat_MsgVacuum msg_vacuum;
PgStat_MsgAnalyze msg_analyze;
PgStat_MsgArchiver msg_archiver;
PgStat_MsgBgWriter msg_bgwriter;
PgStat_MsgCheckpointer msg_checkpointer;
PgStat_MsgWal msg_wal;
PgStat_MsgSLRU msg_slru;
PgStat_MsgFuncstat msg_funcstat;
PgStat_MsgFuncpurge msg_funcpurge;
PgStat_MsgRecoveryConflict msg_recoveryconflict;
PgStat_MsgDeadlock msg_deadlock;
PgStat_MsgTempFile msg_tempfile;
PgStat_MsgChecksumFailure msg_checksumfailure;
PgStat_MsgReplSlot msg_replslot;
PgStat_MsgConnect msg_connect;
PgStat_MsgDisconnect msg_disconnect;
PgStat_MsgSubscriptionError msg_subscriptionerror;
PgStat_MsgSubscriptionDrop msg_subscriptiondrop;
} PgStat_Msg;
/* ------------------------------------------------------------
* Statistic collector data structures follow
*
* PGSTAT_FILE_FORMAT_ID should be changed whenever any of these
* data structures change.
* ------------------------------------------------------------
*/
#define PGSTAT_FILE_FORMAT_ID 0x01A5BCA6
typedef struct PgStat_ArchiverStats
{
PgStat_Counter archived_count; /* archival successes */
char last_archived_wal[MAX_XFN_CHARS + 1]; /* last WAL file
* archived */
TimestampTz last_archived_timestamp; /* last archival success time */
PgStat_Counter failed_count; /* failed archival attempts */
char last_failed_wal[MAX_XFN_CHARS + 1]; /* WAL file involved in
* last failure */
TimestampTz last_failed_timestamp; /* last archival failure time */
TimestampTz stat_reset_timestamp;
} PgStat_ArchiverStats;
typedef struct PgStat_BgWriterStats
{
PgStat_Counter buf_written_clean;
PgStat_Counter maxwritten_clean;
PgStat_Counter buf_alloc;
TimestampTz stat_reset_timestamp;
} PgStat_BgWriterStats;
typedef struct PgStat_CheckpointerStats
{
TimestampTz stats_timestamp; /* time of stats file update */
PgStat_Counter timed_checkpoints;
PgStat_Counter requested_checkpoints;
PgStat_Counter checkpoint_write_time; /* times in milliseconds */
PgStat_Counter checkpoint_sync_time;
PgStat_Counter buf_written_checkpoints;
PgStat_Counter buf_written_backend;
PgStat_Counter buf_fsync_backend;
} PgStat_CheckpointerStats;
typedef struct PgStat_StatDBEntry
{
Oid databaseid;
PgStat_Counter n_xact_commit;
PgStat_Counter n_xact_rollback;
PgStat_Counter n_blocks_fetched;
PgStat_Counter n_blocks_hit;
PgStat_Counter n_tuples_returned;
PgStat_Counter n_tuples_fetched;
PgStat_Counter n_tuples_inserted;
PgStat_Counter n_tuples_updated;
PgStat_Counter n_tuples_deleted;
TimestampTz last_autovac_time;
PgStat_Counter n_conflict_tablespace;
PgStat_Counter n_conflict_lock;
PgStat_Counter n_conflict_snapshot;
PgStat_Counter n_conflict_bufferpin;
PgStat_Counter n_conflict_startup_deadlock;
PgStat_Counter n_temp_files;
PgStat_Counter n_temp_bytes;
PgStat_Counter n_deadlocks;
PgStat_Counter n_checksum_failures;
TimestampTz last_checksum_failure;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
PgStat_Counter n_sessions;
PgStat_Counter total_session_time;
PgStat_Counter total_active_time;
PgStat_Counter total_idle_in_xact_time;
PgStat_Counter n_sessions_abandoned;
PgStat_Counter n_sessions_fatal;
PgStat_Counter n_sessions_killed;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
/*
* tables and functions must be last in the struct, because we don't write
* the pointers out to the stats file.
*/
HTAB *tables;
HTAB *functions;
} PgStat_StatDBEntry;
typedef struct PgStat_StatFuncEntry
{
Oid functionid;
PgStat_Counter f_numcalls;
PgStat_Counter f_total_time; /* times in microseconds */
PgStat_Counter f_self_time;
} PgStat_StatFuncEntry;
typedef struct PgStat_GlobalStats
{
TimestampTz stats_timestamp; /* time of stats file update */
PgStat_CheckpointerStats checkpointer;
PgStat_BgWriterStats bgwriter;
} PgStat_GlobalStats;
typedef struct PgStat_StatReplSlotEntry
{
NameData slotname;
PgStat_Counter spill_txns;
PgStat_Counter spill_count;
PgStat_Counter spill_bytes;
PgStat_Counter stream_txns;
PgStat_Counter stream_count;
PgStat_Counter stream_bytes;
PgStat_Counter total_txns;
PgStat_Counter total_bytes;
TimestampTz stat_reset_timestamp;
} PgStat_StatReplSlotEntry;
typedef struct PgStat_SLRUStats
{
PgStat_Counter blocks_zeroed;
PgStat_Counter blocks_hit;
PgStat_Counter blocks_read;
PgStat_Counter blocks_written;
PgStat_Counter blocks_exists;
PgStat_Counter flush;
PgStat_Counter truncate;
TimestampTz stat_reset_timestamp;
} PgStat_SLRUStats;
typedef struct PgStat_StatSubEntry
{
Oid subid; /* hash key (must be first) */
PgStat_Counter apply_error_count;
PgStat_Counter sync_error_count;
TimestampTz stat_reset_timestamp;
} PgStat_StatSubEntry;
typedef struct PgStat_StatTabEntry
{
Oid tableid;
PgStat_Counter numscans;
PgStat_Counter tuples_returned;
PgStat_Counter tuples_fetched;
PgStat_Counter tuples_inserted;
PgStat_Counter tuples_updated;
PgStat_Counter tuples_deleted;
PgStat_Counter tuples_hot_updated;
PgStat_Counter n_live_tuples;
PgStat_Counter n_dead_tuples;
PgStat_Counter changes_since_analyze;
PgStat_Counter inserts_since_vacuum;
PgStat_Counter blocks_fetched;
PgStat_Counter blocks_hit;
TimestampTz vacuum_timestamp; /* user initiated vacuum */
PgStat_Counter vacuum_count;
TimestampTz autovac_vacuum_timestamp; /* autovacuum initiated */
PgStat_Counter autovac_vacuum_count;
TimestampTz analyze_timestamp; /* user initiated */
PgStat_Counter analyze_count;
TimestampTz autovac_analyze_timestamp; /* autovacuum initiated */
PgStat_Counter autovac_analyze_count;
} PgStat_StatTabEntry;
typedef struct PgStat_WalStats
{
PgStat_Counter wal_records;
PgStat_Counter wal_fpi;
uint64 wal_bytes;
PgStat_Counter wal_buffers_full;
PgStat_Counter wal_write;
PgStat_Counter wal_sync;
PgStat_Counter wal_write_time;
PgStat_Counter wal_sync_time;
TimestampTz stat_reset_timestamp;
} PgStat_WalStats;
/*
* Functions in pgstat.c
*/
/* functions called from postmaster */
extern void pgstat_init(void);
extern void pgstat_reset_all(void);
extern int pgstat_start(void);
extern void allow_immediate_pgstat_restart(void);
2007-11-15 22:14:46 +01:00
#ifdef EXEC_BACKEND
extern void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn();
#endif
/* Functions for backend initialization */
extern void pgstat_initialize(void);
/* Functions called from backends */
extern void pgstat_report_stat(bool force);
extern void pgstat_vacuum_stat(void);
extern void pgstat_ping(void);
extern void pgstat_reset_counters(void);
extern void pgstat_reset(PgStat_Kind kind, Oid dboid, Oid objectid);
extern void pgstat_reset_of_kind(PgStat_Kind kind);
/* stats accessors */
extern void pgstat_clear_snapshot(void);
extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void);
extern PgStat_BgWriterStats *pgstat_fetch_stat_bgwriter(void);
extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void);
extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
extern PgStat_GlobalStats *pgstat_fetch_global(void);
extern PgStat_StatReplSlotEntry *pgstat_fetch_replslot(NameData slotname);
extern PgStat_StatSubEntry *pgstat_fetch_stat_subscription(Oid subid);
extern PgStat_SLRUStats *pgstat_fetch_slru(void);
extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
extern PgStat_WalStats *pgstat_fetch_stat_wal(void);
/*
* Functions in pgstat_archiver.c
*/
extern void pgstat_report_archiver(const char *xlog, bool failed);
/*
* Functions in pgstat_bgwriter.c
*/
extern void pgstat_report_bgwriter(void);
/*
* Functions in pgstat_checkpointer.c
*/
extern void pgstat_report_checkpointer(void);
/*
* Functions in pgstat_database.c
*/
extern void pgstat_drop_database(Oid databaseid);
extern void pgstat_report_autovac(Oid dboid);
extern void pgstat_report_recovery_conflict(int reason);
extern void pgstat_report_deadlock(void);
extern void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount);
extern void pgstat_report_checksum_failure(void);
extern void pgstat_report_connect(Oid dboid);
#define pgstat_count_buffer_read_time(n) \
(pgStatBlockReadTime += (n))
#define pgstat_count_buffer_write_time(n) \
(pgStatBlockWriteTime += (n))
#define pgstat_count_conn_active_time(n) \
(pgStatActiveTime += (n))
#define pgstat_count_conn_txn_idle_time(n) \
(pgStatTransactionIdleTime += (n))
/*
* Functions in pgstat_function.c
*/
pgstat: scaffolding for transactional stats creation / drop. One problematic part of the current statistics collector design is that there is no reliable way of getting rid of statistics entries. Because of that pgstat_vacuum_stat() (called by [auto-]vacuum) matches all stats for the current database with the catalog contents and tries to drop now-superfluous entries. That's quite expensive. What's worse, it doesn't work on physical replicas, despite physical replicas collection statistics entries. This commit introduces infrastructure to create / drop statistics entries transactionally, together with the underlying catalog objects (functions, relations, subscriptions). pgstat_xact.c maintains a list of stats entries created / dropped transactionally in the current transaction. To ensure the removal of statistics entries is durable dropped statistics entries are included in commit / abort (and prepare) records, which also ensures that stats entries are dropped on standbys. Statistics entries created separately from creating the underlying catalog object (e.g. when stats were previously lost due to an immediate restart) are *not* WAL logged. However that can only happen outside of the transaction creating the catalog object, so it does not lead to "leaked" statistics entries. For this to work, functions creating / dropping functions / relations / subscriptions need to call into pgstat. For subscriptions this was already done when dropping subscriptions, via pgstat_report_subscription_drop() (now renamed to pgstat_drop_subscription()). This commit does not actually drop stats yet, it just provides the infrastructure. It is however a largely independent piece of infrastructure, so committing it separately makes sense. Bumps XLOG_PAGE_MAGIC. Author: Andres Freund <andres@anarazel.de> Reviewed-By: Thomas Munro <thomas.munro@gmail.com> Reviewed-By: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Discussion: https://postgr.es/m/20220303021600.hs34ghqcw6zcokdh@alap3.anarazel.de
2022-04-07 03:22:22 +02:00
extern void pgstat_create_function(Oid proid);
extern void pgstat_drop_function(Oid proid);
struct FunctionCallInfoBaseData;
extern void pgstat_init_function_usage(struct FunctionCallInfoBaseData *fcinfo,
PgStat_FunctionCallUsage *fcu);
extern void pgstat_end_function_usage(PgStat_FunctionCallUsage *fcu,
bool finalize);
extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id);
/*
* Functions in pgstat_relation.c
*/
pgstat: scaffolding for transactional stats creation / drop. One problematic part of the current statistics collector design is that there is no reliable way of getting rid of statistics entries. Because of that pgstat_vacuum_stat() (called by [auto-]vacuum) matches all stats for the current database with the catalog contents and tries to drop now-superfluous entries. That's quite expensive. What's worse, it doesn't work on physical replicas, despite physical replicas collection statistics entries. This commit introduces infrastructure to create / drop statistics entries transactionally, together with the underlying catalog objects (functions, relations, subscriptions). pgstat_xact.c maintains a list of stats entries created / dropped transactionally in the current transaction. To ensure the removal of statistics entries is durable dropped statistics entries are included in commit / abort (and prepare) records, which also ensures that stats entries are dropped on standbys. Statistics entries created separately from creating the underlying catalog object (e.g. when stats were previously lost due to an immediate restart) are *not* WAL logged. However that can only happen outside of the transaction creating the catalog object, so it does not lead to "leaked" statistics entries. For this to work, functions creating / dropping functions / relations / subscriptions need to call into pgstat. For subscriptions this was already done when dropping subscriptions, via pgstat_report_subscription_drop() (now renamed to pgstat_drop_subscription()). This commit does not actually drop stats yet, it just provides the infrastructure. It is however a largely independent piece of infrastructure, so committing it separately makes sense. Bumps XLOG_PAGE_MAGIC. Author: Andres Freund <andres@anarazel.de> Reviewed-By: Thomas Munro <thomas.munro@gmail.com> Reviewed-By: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Discussion: https://postgr.es/m/20220303021600.hs34ghqcw6zcokdh@alap3.anarazel.de
2022-04-07 03:22:22 +02:00
extern void pgstat_create_relation(Relation rel);
extern void pgstat_drop_relation(Relation rel);
extern void pgstat_copy_relation_stats(Relation dstrel, Relation srcrel);
extern void pgstat_relation_init(Relation rel);
extern void pgstat_report_vacuum(Oid tableoid, bool shared,
PgStat_Counter livetuples, PgStat_Counter deadtuples);
extern void pgstat_report_analyze(Relation rel,
PgStat_Counter livetuples, PgStat_Counter deadtuples,
bool resetcounter);
#define pgstat_relation_should_count(rel) \
(likely((rel)->pgstat_info != NULL))
/* nontransactional event counts are simple enough to inline */
#define pgstat_count_heap_scan(rel) \
do { \
if (pgstat_relation_should_count(rel)) \
(rel)->pgstat_info->t_counts.t_numscans++; \
} while (0)
#define pgstat_count_heap_getnext(rel) \
do { \
if (pgstat_relation_should_count(rel)) \
(rel)->pgstat_info->t_counts.t_tuples_returned++; \
} while (0)
#define pgstat_count_heap_fetch(rel) \
do { \
if (pgstat_relation_should_count(rel)) \
(rel)->pgstat_info->t_counts.t_tuples_fetched++; \
} while (0)
#define pgstat_count_index_scan(rel) \
do { \
if (pgstat_relation_should_count(rel)) \
(rel)->pgstat_info->t_counts.t_numscans++; \
} while (0)
#define pgstat_count_index_tuples(rel, n) \
do { \
if (pgstat_relation_should_count(rel)) \
(rel)->pgstat_info->t_counts.t_tuples_returned += (n); \
} while (0)
#define pgstat_count_buffer_read(rel) \
do { \
if (pgstat_relation_should_count(rel)) \
(rel)->pgstat_info->t_counts.t_blocks_fetched++; \
} while (0)
#define pgstat_count_buffer_hit(rel) \
do { \
if (pgstat_relation_should_count(rel)) \
(rel)->pgstat_info->t_counts.t_blocks_hit++; \
} while (0)
extern void pgstat_count_heap_insert(Relation rel, PgStat_Counter n);
extern void pgstat_count_heap_update(Relation rel, bool hot);
extern void pgstat_count_heap_delete(Relation rel);
extern void pgstat_count_truncate(Relation rel);
extern void pgstat_update_heap_dead_tuples(Relation rel, int delta);
extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info,
void *recdata, uint32 len);
extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
void *recdata, uint32 len);
extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
/*
* Functions in pgstat_replslot.c
*/
extern void pgstat_reset_replslot(const char *name);
struct ReplicationSlot;
extern void pgstat_report_replslot(struct ReplicationSlot *slot, const PgStat_StatReplSlotEntry *repSlotStat);
extern void pgstat_create_replslot(struct ReplicationSlot *slot);
extern void pgstat_drop_replslot(struct ReplicationSlot *slot);
/*
* Functions in pgstat_slru.c
*/
extern void pgstat_reset_slru(const char *);
extern void pgstat_count_slru_page_zeroed(int slru_idx);
extern void pgstat_count_slru_page_hit(int slru_idx);
extern void pgstat_count_slru_page_read(int slru_idx);
extern void pgstat_count_slru_page_written(int slru_idx);
extern void pgstat_count_slru_page_exists(int slru_idx);
extern void pgstat_count_slru_flush(int slru_idx);
extern void pgstat_count_slru_truncate(int slru_idx);
extern const char *pgstat_slru_name(int slru_idx);
extern int pgstat_slru_index(const char *name);
/*
* Functions in pgstat_subscription.c
*/
extern void pgstat_report_subscription_error(Oid subid, bool is_apply_error);
pgstat: scaffolding for transactional stats creation / drop. One problematic part of the current statistics collector design is that there is no reliable way of getting rid of statistics entries. Because of that pgstat_vacuum_stat() (called by [auto-]vacuum) matches all stats for the current database with the catalog contents and tries to drop now-superfluous entries. That's quite expensive. What's worse, it doesn't work on physical replicas, despite physical replicas collection statistics entries. This commit introduces infrastructure to create / drop statistics entries transactionally, together with the underlying catalog objects (functions, relations, subscriptions). pgstat_xact.c maintains a list of stats entries created / dropped transactionally in the current transaction. To ensure the removal of statistics entries is durable dropped statistics entries are included in commit / abort (and prepare) records, which also ensures that stats entries are dropped on standbys. Statistics entries created separately from creating the underlying catalog object (e.g. when stats were previously lost due to an immediate restart) are *not* WAL logged. However that can only happen outside of the transaction creating the catalog object, so it does not lead to "leaked" statistics entries. For this to work, functions creating / dropping functions / relations / subscriptions need to call into pgstat. For subscriptions this was already done when dropping subscriptions, via pgstat_report_subscription_drop() (now renamed to pgstat_drop_subscription()). This commit does not actually drop stats yet, it just provides the infrastructure. It is however a largely independent piece of infrastructure, so committing it separately makes sense. Bumps XLOG_PAGE_MAGIC. Author: Andres Freund <andres@anarazel.de> Reviewed-By: Thomas Munro <thomas.munro@gmail.com> Reviewed-By: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Discussion: https://postgr.es/m/20220303021600.hs34ghqcw6zcokdh@alap3.anarazel.de
2022-04-07 03:22:22 +02:00
extern void pgstat_create_subscription(Oid subid);
extern void pgstat_drop_subscription(Oid subid);
/*
* Functions in pgstat_xact.c
*/
extern void AtEOXact_PgStat(bool isCommit, bool parallel);
extern void AtEOSubXact_PgStat(bool isCommit, int nestDepth);
extern void AtPrepare_PgStat(void);
extern void PostPrepare_PgStat(void);
pgstat: scaffolding for transactional stats creation / drop. One problematic part of the current statistics collector design is that there is no reliable way of getting rid of statistics entries. Because of that pgstat_vacuum_stat() (called by [auto-]vacuum) matches all stats for the current database with the catalog contents and tries to drop now-superfluous entries. That's quite expensive. What's worse, it doesn't work on physical replicas, despite physical replicas collection statistics entries. This commit introduces infrastructure to create / drop statistics entries transactionally, together with the underlying catalog objects (functions, relations, subscriptions). pgstat_xact.c maintains a list of stats entries created / dropped transactionally in the current transaction. To ensure the removal of statistics entries is durable dropped statistics entries are included in commit / abort (and prepare) records, which also ensures that stats entries are dropped on standbys. Statistics entries created separately from creating the underlying catalog object (e.g. when stats were previously lost due to an immediate restart) are *not* WAL logged. However that can only happen outside of the transaction creating the catalog object, so it does not lead to "leaked" statistics entries. For this to work, functions creating / dropping functions / relations / subscriptions need to call into pgstat. For subscriptions this was already done when dropping subscriptions, via pgstat_report_subscription_drop() (now renamed to pgstat_drop_subscription()). This commit does not actually drop stats yet, it just provides the infrastructure. It is however a largely independent piece of infrastructure, so committing it separately makes sense. Bumps XLOG_PAGE_MAGIC. Author: Andres Freund <andres@anarazel.de> Reviewed-By: Thomas Munro <thomas.munro@gmail.com> Reviewed-By: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Discussion: https://postgr.es/m/20220303021600.hs34ghqcw6zcokdh@alap3.anarazel.de
2022-04-07 03:22:22 +02:00
struct xl_xact_stats_item;
extern int pgstat_get_transactional_drops(bool isCommit, struct xl_xact_stats_item **items);
extern void pgstat_execute_transactional_drops(int ndrops, struct xl_xact_stats_item *items, bool is_redo);
/*
* Functions in pgstat_wal.c
*/
extern void pgstat_report_wal(bool force);
/*
* Variables in pgstat.c
*/
/* GUC parameters */
extern PGDLLIMPORT bool pgstat_track_counts;
extern PGDLLIMPORT int pgstat_track_functions;
extern char *pgstat_stat_directory;
extern char *pgstat_stat_tmpname;
extern char *pgstat_stat_filename;
/*
* Variables in pgstat_bgwriter.c
*/
/* updated directly by bgwriter and bufmgr */
extern PgStat_MsgBgWriter PendingBgWriterStats;
/*
* Variables in pgstat_checkpointer.c
*/
/*
* Checkpointer statistics counters are updated directly by checkpointer and
* bufmgr.
*/
extern PgStat_MsgCheckpointer PendingCheckpointerStats;
/*
* Variables in pgstat_database.c
*/
/* Updated by pgstat_count_buffer_*_time macros */
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
/*
* Updated by pgstat_count_conn_*_time macros, called by
* pgstat_report_activity().
*/
extern PgStat_Counter pgStatActiveTime;
extern PgStat_Counter pgStatTransactionIdleTime;
/* updated by the traffic cop and in errfinish() */
extern SessionEndType pgStatSessionEndCause;
/*
* Variables in pgstat_wal.c
*/
/* updated directly by backends and background processes */
extern PgStat_MsgWal WalStats;
#endif /* PGSTAT_H */