postgresql/src/include/pgstat.h

1387 lines
39 KiB
C
Raw Normal View History

/* ----------
* pgstat.h
*
* Definitions for the PostgreSQL statistics collector daemon.
*
* Copyright (c) 2001-2019, PostgreSQL Global Development Group
*
2010-09-20 22:08:53 +02:00
* src/include/pgstat.h
* ----------
*/
#ifndef PGSTAT_H
#define PGSTAT_H
#include "datatype/timestamp.h"
#include "fmgr.h"
#include "libpq/pqcomm.h"
#include "port/atomics.h"
#include "portability/instr_time.h"
#include "postmaster/pgarch.h"
#include "storage/proc.h"
#include "utils/hsearch.h"
#include "utils/relcache.h"
/* ----------
* Paths for the statistics files (relative to installation's $PGDATA).
* ----------
*/
#define PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat"
#define PGSTAT_STAT_PERMANENT_FILENAME "pg_stat/global.stat"
#define PGSTAT_STAT_PERMANENT_TMPFILE "pg_stat/global.tmp"
/* Default directory to store temporary statistics data in */
#define PG_STAT_TMP_DIR "pg_stat_tmp"
/* Values for track_functions GUC variable --- order is significant! */
typedef enum TrackFunctionsLevel
{
TRACK_FUNC_OFF,
TRACK_FUNC_PL,
TRACK_FUNC_ALL
2017-06-21 20:39:04 +02:00
} TrackFunctionsLevel;
/* ----------
* The types of backend -> collector messages
* ----------
*/
typedef enum StatMsgType
{
PGSTAT_MTYPE_DUMMY,
PGSTAT_MTYPE_INQUIRY,
PGSTAT_MTYPE_TABSTAT,
PGSTAT_MTYPE_TABPURGE,
PGSTAT_MTYPE_DROPDB,
PGSTAT_MTYPE_RESETCOUNTER,
PGSTAT_MTYPE_RESETSHAREDCOUNTER,
PGSTAT_MTYPE_RESETSINGLECOUNTER,
PGSTAT_MTYPE_AUTOVAC_START,
PGSTAT_MTYPE_VACUUM,
PGSTAT_MTYPE_ANALYZE,
PGSTAT_MTYPE_ARCHIVER,
PGSTAT_MTYPE_BGWRITER,
PGSTAT_MTYPE_FUNCSTAT,
PGSTAT_MTYPE_FUNCPURGE,
PGSTAT_MTYPE_RECOVERYCONFLICT,
PGSTAT_MTYPE_TEMPFILE,
PGSTAT_MTYPE_DEADLOCK,
PGSTAT_MTYPE_CHECKSUMFAILURE
} StatMsgType;
/* ----------
* The data type used for counters.
* ----------
*/
typedef int64 PgStat_Counter;
/* ----------
* PgStat_TableCounts The actual per-table counts kept by a backend
*
* This struct should contain only actual event counters, because we memcmp
* it against zeroes to detect whether there are any counts to transmit.
* It is a component of PgStat_TableStatus (within-backend state) and
* PgStat_TableEntry (the transmitted message format).
*
* Note: for a table, tuples_returned is the number of tuples successfully
* fetched by heap_getnext, while tuples_fetched is the number of tuples
* successfully fetched by heap_fetch under the control of bitmap indexscans.
* For an index, tuples_returned is the number of index entries returned by
* the index AM, while tuples_fetched is the number of tuples successfully
* fetched by heap_fetch under the control of simple indexscans for this index.
*
* tuples_inserted/updated/deleted/hot_updated count attempted actions,
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
* regardless of whether the transaction committed. delta_live_tuples,
* delta_dead_tuples, and changed_tuples are set depending on commit or abort.
* Note that delta_live_tuples and delta_dead_tuples can be negative!
* ----------
*/
typedef struct PgStat_TableCounts
{
PgStat_Counter t_numscans;
PgStat_Counter t_tuples_returned;
PgStat_Counter t_tuples_fetched;
PgStat_Counter t_tuples_inserted;
PgStat_Counter t_tuples_updated;
PgStat_Counter t_tuples_deleted;
PgStat_Counter t_tuples_hot_updated;
bool t_truncated;
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
PgStat_Counter t_delta_live_tuples;
PgStat_Counter t_delta_dead_tuples;
PgStat_Counter t_changed_tuples;
PgStat_Counter t_blocks_fetched;
PgStat_Counter t_blocks_hit;
} PgStat_TableCounts;
/* Possible targets for resetting cluster-wide shared values */
typedef enum PgStat_Shared_Reset_Target
{
RESET_ARCHIVER,
2010-02-26 03:01:40 +01:00
RESET_BGWRITER
} PgStat_Shared_Reset_Target;
/* Possible object types for resetting single counters */
typedef enum PgStat_Single_Reset_Type
{
RESET_TABLE,
RESET_FUNCTION
} PgStat_Single_Reset_Type;
/* ------------------------------------------------------------
* Structures kept in backend local memory while accumulating counts
* ------------------------------------------------------------
*/
/* ----------
* PgStat_TableStatus Per-table status within a backend
*
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
* Many of the event counters are nontransactional, ie, we count events
* in committed and aborted transactions alike. For these, we just count
* directly in the PgStat_TableStatus. However, delta_live_tuples,
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
* delta_dead_tuples, and changed_tuples must be derived from event counts
* with awareness of whether the transaction or subtransaction committed or
* aborted. Hence, we also keep a stack of per-(sub)transaction status
* records for every table modified in the current transaction. At commit
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
* or abort, we propagate tuples_inserted/updated/deleted up to the
* parent subtransaction level, or out to the parent PgStat_TableStatus,
* as appropriate.
* ----------
*/
typedef struct PgStat_TableStatus
{
2007-11-15 22:14:46 +01:00
Oid t_id; /* table's OID */
bool t_shared; /* is it a shared catalog? */
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
struct PgStat_TableXactStatus *trans; /* lowest subxact's counts */
PgStat_TableCounts t_counts; /* event counts to be sent */
} PgStat_TableStatus;
/* ----------
* PgStat_TableXactStatus Per-table, per-subtransaction status
* ----------
*/
typedef struct PgStat_TableXactStatus
{
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
PgStat_Counter tuples_inserted; /* tuples inserted in (sub)xact */
PgStat_Counter tuples_updated; /* tuples updated in (sub)xact */
PgStat_Counter tuples_deleted; /* tuples deleted in (sub)xact */
bool truncated; /* relation truncated in this (sub)xact */
PgStat_Counter inserted_pre_trunc; /* tuples inserted prior to truncate */
PgStat_Counter updated_pre_trunc; /* tuples updated prior to truncate */
PgStat_Counter deleted_pre_trunc; /* tuples deleted prior to truncate */
2007-11-15 22:14:46 +01:00
int nest_level; /* subtransaction nest level */
/* links to other structs for same relation: */
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
struct PgStat_TableXactStatus *upper; /* next higher subxact if any */
2007-11-15 22:14:46 +01:00
PgStat_TableStatus *parent; /* per-table status */
/* structs of same subxact level are linked here: */
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
struct PgStat_TableXactStatus *next; /* next of same subxact */
} PgStat_TableXactStatus;
/* ------------------------------------------------------------
* Message formats follow
* ------------------------------------------------------------
*/
/* ----------
* PgStat_MsgHdr The common message header
* ----------
*/
typedef struct PgStat_MsgHdr
{
2005-10-15 04:49:52 +02:00
StatMsgType m_type;
int m_size;
} PgStat_MsgHdr;
/* ----------
* Space available in a message. This will keep the UDP packets below 1K,
* which should fit unfragmented into the MTU of the loopback interface.
* (Larger values of PGSTAT_MAX_MSG_SIZE would work for that on most
* platforms, but we're being conservative here.)
* ----------
*/
#define PGSTAT_MAX_MSG_SIZE 1000
#define PGSTAT_MSG_PAYLOAD (PGSTAT_MAX_MSG_SIZE - sizeof(PgStat_MsgHdr))
/* ----------
* PgStat_MsgDummy A dummy message, ignored by the collector
* ----------
*/
typedef struct PgStat_MsgDummy
{
PgStat_MsgHdr m_hdr;
} PgStat_MsgDummy;
/* ----------
* PgStat_MsgInquiry Sent by a backend to ask the collector
Avoid useless closely-spaced writes of statistics files. The original intent in the stats collector was that we should not write out stats data oftener than every PGSTAT_STAT_INTERVAL msec. Backends will not make requests at all if they see the existing data is newer than that, and the stats collector is supposed to disregard requests having a cutoff_time older than its most recently written data, so that close-together requests don't result in multiple writes. But the latter part of that got broken in commit 187492b6c2e8cafc, so that if two backends concurrently decide the existing stats are too old, the collector would write the data twice. (In principle the collector's logic would still merge requests as long as the second one arrives before we've actually written data ... but since the message collection loop would write data immediately after processing a single inquiry message, that never happened in practice, and in any case the window in which it might work would be much shorter than PGSTAT_STAT_INTERVAL.) To fix, improve pgstat_recv_inquiry so that it checks whether the cutoff time is too old, and doesn't add a request to the queue if so. This means that we do not need DBWriteRequest.request_time, because the decision is taken before making a queue entry. And that means that we don't really need the DBWriteRequest data structure at all; an OID list of database OIDs will serve and allow removal of some rather verbose and crufty code. In passing, improve the comments in this area, which have been rather neglected. Also change backend_read_statsfile so that it's not silently relying on MyDatabaseId to have some particular value in the autovacuum launcher process. It accidentally worked as desired because MyDatabaseId is zero in that process; but that does not seem like a dependency we want, especially with no documentation about it. Although this patch is mine, it turns out I'd rediscovered a known bug, for which Tomas Vondra had already submitted a patch that's functionally equivalent to the non-cosmetic aspects of this patch. Thanks to Tomas for reviewing this version. Back-patch to 9.3 where the bug was introduced. Prior-Discussion: <1718942738eb65c8407fcd864883f4c8@fuzzy.cz> Patch: <4625.1464202586@sss.pgh.pa.us>
2016-05-31 21:54:46 +02:00
* to write the stats file(s).
*
* Ordinarily, an inquiry message prompts writing of the global stats file,
* the stats file for shared catalogs, and the stats file for the specified
* database. If databaseid is InvalidOid, only the first two are written.
*
* New file(s) will be written only if the existing file has a timestamp
* older than the specified cutoff_time; this prevents duplicated effort
* when multiple requests arrive at nearly the same time, assuming that
* backends send requests with cutoff_times a little bit in the past.
*
* clock_time should be the requestor's current local time; the collector
* uses this to check for the system clock going backward, but it has no
* effect unless that occurs. We assume clock_time >= cutoff_time, though.
* ----------
*/
typedef struct PgStat_MsgInquiry
{
PgStat_MsgHdr m_hdr;
Fix stats collector to recover nicely when system clock goes backwards. Formerly, if the system clock went backwards, the stats collector would fail to update the stats file any more until the clock reading again exceeds whatever timestamp was last written into the stats file. Such glitches in the clock's behavior are not terribly unlikely on machines not using NTP. Such a scenario has been observed to cause regression test failures in the buildfarm, and it could have bad effects on the behavior of autovacuum, so it seems prudent to install some defenses. We could directly detect the clock going backwards by adding GetCurrentTimestamp calls in the stats collector's main loop, but that would hurt performance on platforms where GetCurrentTimestamp is expensive. To minimize the performance hit in normal cases, adopt a more complicated scheme wherein backends check for clock skew when reading the stats file, and if they see it, signal the stats collector by sending an extra stats inquiry message. The stats collector does an extra GetCurrentTimestamp only when it receives an inquiry with an apparently out-of-order timestamp. To avoid unnecessary GetCurrentTimestamp calls, expand the inquiry messages to carry the backend's current clock reading as well as its stats cutoff time. The latter, being intentionally slightly in-the-past, would trigger more clock rechecks than we need if it were used for this purpose. We might want to backpatch this change at some point, but let's let it shake out in the buildfarm for awhile first.
2012-06-17 23:11:07 +02:00
TimestampTz clock_time; /* observed local clock time */
TimestampTz cutoff_time; /* minimum acceptable file timestamp */
Avoid useless closely-spaced writes of statistics files. The original intent in the stats collector was that we should not write out stats data oftener than every PGSTAT_STAT_INTERVAL msec. Backends will not make requests at all if they see the existing data is newer than that, and the stats collector is supposed to disregard requests having a cutoff_time older than its most recently written data, so that close-together requests don't result in multiple writes. But the latter part of that got broken in commit 187492b6c2e8cafc, so that if two backends concurrently decide the existing stats are too old, the collector would write the data twice. (In principle the collector's logic would still merge requests as long as the second one arrives before we've actually written data ... but since the message collection loop would write data immediately after processing a single inquiry message, that never happened in practice, and in any case the window in which it might work would be much shorter than PGSTAT_STAT_INTERVAL.) To fix, improve pgstat_recv_inquiry so that it checks whether the cutoff time is too old, and doesn't add a request to the queue if so. This means that we do not need DBWriteRequest.request_time, because the decision is taken before making a queue entry. And that means that we don't really need the DBWriteRequest data structure at all; an OID list of database OIDs will serve and allow removal of some rather verbose and crufty code. In passing, improve the comments in this area, which have been rather neglected. Also change backend_read_statsfile so that it's not silently relying on MyDatabaseId to have some particular value in the autovacuum launcher process. It accidentally worked as desired because MyDatabaseId is zero in that process; but that does not seem like a dependency we want, especially with no documentation about it. Although this patch is mine, it turns out I'd rediscovered a known bug, for which Tomas Vondra had already submitted a patch that's functionally equivalent to the non-cosmetic aspects of this patch. Thanks to Tomas for reviewing this version. Back-patch to 9.3 where the bug was introduced. Prior-Discussion: <1718942738eb65c8407fcd864883f4c8@fuzzy.cz> Patch: <4625.1464202586@sss.pgh.pa.us>
2016-05-31 21:54:46 +02:00
Oid databaseid; /* requested DB (InvalidOid => shared only) */
} PgStat_MsgInquiry;
/* ----------
* PgStat_TableEntry Per-table info in a MsgTabstat
* ----------
*/
typedef struct PgStat_TableEntry
{
Oid t_id;
PgStat_TableCounts t_counts;
} PgStat_TableEntry;
/* ----------
* PgStat_MsgTabstat Sent by the backend to report table
* and buffer access statistics.
* ----------
*/
#define PGSTAT_NUM_TABENTRIES \
((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int) - 2 * sizeof(PgStat_Counter)) \
/ sizeof(PgStat_TableEntry))
typedef struct PgStat_MsgTabstat
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_nentries;
int m_xact_commit;
int m_xact_rollback;
PgStat_Counter m_block_read_time; /* times in microseconds */
PgStat_Counter m_block_write_time;
PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES];
} PgStat_MsgTabstat;
/* ----------
* PgStat_MsgTabpurge Sent by the backend to tell the collector
* about dead tables.
* ----------
*/
#define PGSTAT_NUM_TABPURGE \
((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
/ sizeof(Oid))
typedef struct PgStat_MsgTabpurge
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_nentries;
Oid m_tableid[PGSTAT_NUM_TABPURGE];
} PgStat_MsgTabpurge;
/* ----------
* PgStat_MsgDropdb Sent by the backend to tell the collector
* about a dropped database
* ----------
*/
typedef struct PgStat_MsgDropdb
{
2005-10-15 04:49:52 +02:00
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
} PgStat_MsgDropdb;
/* ----------
* PgStat_MsgResetcounter Sent by the backend to tell the collector
* to reset counters
* ----------
*/
typedef struct PgStat_MsgResetcounter
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
} PgStat_MsgResetcounter;
/* ----------
2010-02-26 03:01:40 +01:00
* PgStat_MsgResetsharedcounter Sent by the backend to tell the collector
* to reset a shared counter
* ----------
*/
typedef struct PgStat_MsgResetsharedcounter
{
PgStat_MsgHdr m_hdr;
PgStat_Shared_Reset_Target m_resettarget;
} PgStat_MsgResetsharedcounter;
/* ----------
2010-02-26 03:01:40 +01:00
* PgStat_MsgResetsinglecounter Sent by the backend to tell the collector
* to reset a single counter
* ----------
*/
typedef struct PgStat_MsgResetsinglecounter
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
PgStat_Single_Reset_Type m_resettype;
Oid m_objectid;
} PgStat_MsgResetsinglecounter;
/* ----------
* PgStat_MsgAutovacStart Sent by the autovacuum daemon to signal
2005-10-15 04:49:52 +02:00
* that a database is going to be processed
* ----------
*/
typedef struct PgStat_MsgAutovacStart
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
2005-10-15 04:49:52 +02:00
TimestampTz m_start_time;
} PgStat_MsgAutovacStart;
/* ----------
* PgStat_MsgVacuum Sent by the backend or autovacuum daemon
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
* after VACUUM
* ----------
*/
typedef struct PgStat_MsgVacuum
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
Oid m_tableoid;
bool m_autovacuum;
TimestampTz m_vacuumtime;
PgStat_Counter m_live_tuples;
PgStat_Counter m_dead_tuples;
} PgStat_MsgVacuum;
/* ----------
* PgStat_MsgAnalyze Sent by the backend or autovacuum daemon
2005-10-15 04:49:52 +02:00
* after ANALYZE
* ----------
*/
typedef struct PgStat_MsgAnalyze
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
Oid m_tableoid;
2006-10-04 02:30:14 +02:00
bool m_autovacuum;
bool m_resetcounter;
2006-10-04 02:30:14 +02:00
TimestampTz m_analyzetime;
2005-10-15 04:49:52 +02:00
PgStat_Counter m_live_tuples;
PgStat_Counter m_dead_tuples;
} PgStat_MsgAnalyze;
/* ----------
* PgStat_MsgArchiver Sent by the archiver to update statistics.
* ----------
*/
typedef struct PgStat_MsgArchiver
{
PgStat_MsgHdr m_hdr;
bool m_failed; /* Failed attempt */
char m_xlog[MAX_XFN_CHARS + 1];
TimestampTz m_timestamp;
} PgStat_MsgArchiver;
/* ----------
2007-11-15 22:14:46 +01:00
* PgStat_MsgBgWriter Sent by the bgwriter to update statistics.
* ----------
*/
typedef struct PgStat_MsgBgWriter
{
PgStat_MsgHdr m_hdr;
2007-11-15 22:14:46 +01:00
PgStat_Counter m_timed_checkpoints;
PgStat_Counter m_requested_checkpoints;
PgStat_Counter m_buf_written_checkpoints;
PgStat_Counter m_buf_written_clean;
PgStat_Counter m_maxwritten_clean;
PgStat_Counter m_buf_written_backend;
PgStat_Counter m_buf_fsync_backend;
2007-11-15 22:14:46 +01:00
PgStat_Counter m_buf_alloc;
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
PgStat_Counter m_checkpoint_write_time; /* times in milliseconds */
PgStat_Counter m_checkpoint_sync_time;
} PgStat_MsgBgWriter;
/* ----------
* PgStat_MsgRecoveryConflict Sent by the backend upon recovery conflict
* ----------
*/
typedef struct PgStat_MsgRecoveryConflict
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_reason;
} PgStat_MsgRecoveryConflict;
/* ----------
* PgStat_MsgTempFile Sent by the backend upon creating a temp file
* ----------
*/
typedef struct PgStat_MsgTempFile
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
size_t m_filesize;
} PgStat_MsgTempFile;
/* ----------
* PgStat_FunctionCounts The actual per-function counts kept by a backend
*
* This struct should contain only actual event counters, because we memcmp
* it against zeroes to detect whether there are any counts to transmit.
*
* Note that the time counters are in instr_time format here. We convert to
* microseconds in PgStat_Counter format when transmitting to the collector.
* ----------
*/
typedef struct PgStat_FunctionCounts
{
PgStat_Counter f_numcalls;
instr_time f_total_time;
instr_time f_self_time;
} PgStat_FunctionCounts;
/* ----------
* PgStat_BackendFunctionEntry Entry in backend's per-function hash table
* ----------
*/
typedef struct PgStat_BackendFunctionEntry
{
Oid f_id;
PgStat_FunctionCounts f_counts;
} PgStat_BackendFunctionEntry;
/* ----------
* PgStat_FunctionEntry Per-function info in a MsgFuncstat
* ----------
*/
typedef struct PgStat_FunctionEntry
{
Oid f_id;
PgStat_Counter f_numcalls;
PgStat_Counter f_total_time; /* times in microseconds */
PgStat_Counter f_self_time;
} PgStat_FunctionEntry;
/* ----------
* PgStat_MsgFuncstat Sent by the backend to report function
* usage statistics.
* ----------
*/
#define PGSTAT_NUM_FUNCENTRIES \
((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
/ sizeof(PgStat_FunctionEntry))
typedef struct PgStat_MsgFuncstat
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_nentries;
PgStat_FunctionEntry m_entry[PGSTAT_NUM_FUNCENTRIES];
} PgStat_MsgFuncstat;
/* ----------
* PgStat_MsgFuncpurge Sent by the backend to tell the collector
* about dead functions.
* ----------
*/
#define PGSTAT_NUM_FUNCPURGE \
((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
/ sizeof(Oid))
typedef struct PgStat_MsgFuncpurge
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_nentries;
Oid m_functionid[PGSTAT_NUM_FUNCPURGE];
} PgStat_MsgFuncpurge;
/* ----------
* PgStat_MsgDeadlock Sent by the backend to tell the collector
* about a deadlock that occurred.
* ----------
*/
typedef struct PgStat_MsgDeadlock
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
} PgStat_MsgDeadlock;
/* ----------
* PgStat_MsgChecksumFailure Sent by the backend to tell the collector
* about checksum failures noticed.
* ----------
*/
typedef struct PgStat_MsgChecksumFailure
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
int m_failurecount;
} PgStat_MsgChecksumFailure;
/* ----------
* PgStat_Msg Union over all possible messages.
* ----------
*/
typedef union PgStat_Msg
{
PgStat_MsgHdr msg_hdr;
PgStat_MsgDummy msg_dummy;
PgStat_MsgInquiry msg_inquiry;
PgStat_MsgTabstat msg_tabstat;
PgStat_MsgTabpurge msg_tabpurge;
PgStat_MsgDropdb msg_dropdb;
PgStat_MsgResetcounter msg_resetcounter;
PgStat_MsgResetsharedcounter msg_resetsharedcounter;
PgStat_MsgResetsinglecounter msg_resetsinglecounter;
PgStat_MsgAutovacStart msg_autovacuum;
PgStat_MsgVacuum msg_vacuum;
PgStat_MsgAnalyze msg_analyze;
PgStat_MsgArchiver msg_archiver;
PgStat_MsgBgWriter msg_bgwriter;
PgStat_MsgFuncstat msg_funcstat;
PgStat_MsgFuncpurge msg_funcpurge;
PgStat_MsgRecoveryConflict msg_recoveryconflict;
PgStat_MsgDeadlock msg_deadlock;
} PgStat_Msg;
/* ------------------------------------------------------------
* Statistic collector data structures follow
*
* PGSTAT_FILE_FORMAT_ID should be changed whenever any of these
* data structures change.
* ------------------------------------------------------------
*/
#define PGSTAT_FILE_FORMAT_ID 0x01A5BC9D
/* ----------
* PgStat_StatDBEntry The collector's data per database
* ----------
*/
typedef struct PgStat_StatDBEntry
{
Oid databaseid;
PgStat_Counter n_xact_commit;
PgStat_Counter n_xact_rollback;
PgStat_Counter n_blocks_fetched;
PgStat_Counter n_blocks_hit;
PgStat_Counter n_tuples_returned;
PgStat_Counter n_tuples_fetched;
PgStat_Counter n_tuples_inserted;
PgStat_Counter n_tuples_updated;
PgStat_Counter n_tuples_deleted;
2005-10-15 04:49:52 +02:00
TimestampTz last_autovac_time;
PgStat_Counter n_conflict_tablespace;
PgStat_Counter n_conflict_lock;
PgStat_Counter n_conflict_snapshot;
PgStat_Counter n_conflict_bufferpin;
PgStat_Counter n_conflict_startup_deadlock;
PgStat_Counter n_temp_files;
PgStat_Counter n_temp_bytes;
PgStat_Counter n_deadlocks;
PgStat_Counter n_checksum_failures;
PgStat_Counter n_block_read_time; /* times in microseconds */
PgStat_Counter n_block_write_time;
TimestampTz stat_reset_timestamp;
TimestampTz stats_timestamp; /* time of db stats file update */
/*
* tables and functions must be last in the struct, because we don't write
* the pointers out to the stats file.
*/
HTAB *tables;
HTAB *functions;
} PgStat_StatDBEntry;
/* ----------
* PgStat_StatTabEntry The collector's data per table (or index)
* ----------
*/
typedef struct PgStat_StatTabEntry
{
Oid tableid;
PgStat_Counter numscans;
PgStat_Counter tuples_returned;
PgStat_Counter tuples_fetched;
PgStat_Counter tuples_inserted;
PgStat_Counter tuples_updated;
PgStat_Counter tuples_deleted;
PgStat_Counter tuples_hot_updated;
PgStat_Counter n_live_tuples;
PgStat_Counter n_dead_tuples;
Revise pgstat's tracking of tuple changes to improve the reliability of decisions about when to auto-analyze. The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples, where all three of these numbers could be bad estimates from ANALYZE itself. Even worse, in the presence of a steady flow of HOT updates and matching HOT-tuple reclamations, auto-analyze might never trigger at all, even if all three numbers are exactly right, because n_dead_tuples could hold steady. To fix, replace last_anl_tuples with an accurately tracked count of the total number of committed tuple inserts + updates + deletes since the last ANALYZE on the table. This can still be compared to the same threshold as before, but it's much more trustworthy than the old computation. Tracking this requires one more intra-transaction counter per modified table within backends, but no additional memory space in the stats collector. There probably isn't any measurable speed difference; if anything it might be a bit faster than before, since I was able to eliminate some per-tuple arithmetic operations in favor of adding sums once per (sub)transaction. Also, simplify the logic around pgstat vacuum and analyze reporting messages by not trying to fold VACUUM ANALYZE into a single pgstat message. The original thought behind this patch was to allow scheduling of analyzes on parent tables by artificially inflating their changes_since_analyze count. I've left that for a separate patch since this change seems to stand on its own merit.
2009-12-30 21:32:14 +01:00
PgStat_Counter changes_since_analyze;
PgStat_Counter blocks_fetched;
PgStat_Counter blocks_hit;
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
TimestampTz vacuum_timestamp; /* user initiated vacuum */
PgStat_Counter vacuum_count;
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
TimestampTz autovac_vacuum_timestamp; /* autovacuum initiated */
PgStat_Counter autovac_vacuum_count;
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
TimestampTz analyze_timestamp; /* user initiated */
PgStat_Counter analyze_count;
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
TimestampTz autovac_analyze_timestamp; /* autovacuum initiated */
PgStat_Counter autovac_analyze_count;
} PgStat_StatTabEntry;
/* ----------
* PgStat_StatFuncEntry The collector's data per function
* ----------
*/
typedef struct PgStat_StatFuncEntry
{
Oid functionid;
PgStat_Counter f_numcalls;
PgStat_Counter f_total_time; /* times in microseconds */
PgStat_Counter f_self_time;
} PgStat_StatFuncEntry;
/*
* Archiver statistics kept in the stats collector
*/
typedef struct PgStat_ArchiverStats
{
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
PgStat_Counter archived_count; /* archival successes */
char last_archived_wal[MAX_XFN_CHARS + 1]; /* last WAL file
* archived */
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
TimestampTz last_archived_timestamp; /* last archival success time */
PgStat_Counter failed_count; /* failed archival attempts */
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
char last_failed_wal[MAX_XFN_CHARS + 1]; /* WAL file involved in
* last failure */
TimestampTz last_failed_timestamp; /* last archival failure time */
TimestampTz stat_reset_timestamp;
} PgStat_ArchiverStats;
/*
* Global statistics kept in the stats collector
*/
typedef struct PgStat_GlobalStats
{
TimestampTz stats_timestamp; /* time of stats file update */
2007-11-15 22:14:46 +01:00
PgStat_Counter timed_checkpoints;
PgStat_Counter requested_checkpoints;
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
PgStat_Counter checkpoint_write_time; /* times in milliseconds */
PgStat_Counter checkpoint_sync_time;
2007-11-15 22:14:46 +01:00
PgStat_Counter buf_written_checkpoints;
PgStat_Counter buf_written_clean;
PgStat_Counter maxwritten_clean;
PgStat_Counter buf_written_backend;
PgStat_Counter buf_fsync_backend;
2007-11-15 22:14:46 +01:00
PgStat_Counter buf_alloc;
TimestampTz stat_reset_timestamp;
} PgStat_GlobalStats;
/* ----------
* Backend types
* ----------
*/
typedef enum BackendType
{
B_AUTOVAC_LAUNCHER,
B_AUTOVAC_WORKER,
B_BACKEND,
B_BG_WORKER,
B_BG_WRITER,
B_CHECKPOINTER,
B_STARTUP,
B_WAL_RECEIVER,
B_WAL_SENDER,
B_WAL_WRITER
} BackendType;
/* ----------
* Backend states
* ----------
*/
typedef enum BackendState
{
STATE_UNDEFINED,
STATE_IDLE,
STATE_RUNNING,
STATE_IDLEINTRANSACTION,
STATE_FASTPATH,
STATE_IDLEINTRANSACTION_ABORTED,
STATE_DISABLED
} BackendState;
/* ----------
* Wait Classes
* ----------
*/
Simplify LWLock tranche machinery by removing array_base/array_stride. array_base and array_stride were added so that we could identify the offset of an LWLock within a tranche, but this facility is only very marginally used apart from the main tranche. So, give every lock in the main tranche its own tranche ID and get rid of array_base, array_stride, and all that's attached. For debugging facilities (Trace_lwlocks and LWLOCK_STATS) print the pointer address of the LWLock using %p instead of the offset. This is arguably more useful, and certainly a lot cheaper. Drop the offset-within-tranche from the information reported to dtrace and from one can't-happen message inside lwlock.c. The main user-visible impact of this change is that pg_stat_activity will now report all waits for LWLocks as "LWLock" rather than reporting some as "LWLockTranche" and others as "LWLockNamed". The main motivation for this change is that the need to specify an array_base and an array_stride is awkward for parallel query. There is only a very limited supply of tranche IDs so we can't just keep allocating new ones, and if we try to use the same tranche IDs every time then we run into trouble when multiple parallel contexts are use simultaneously. So if we didn't get rid of this mechanism we'd have to make it even more complicated. By simplifying it in this way, we instead reduce the size of the generated code for lwlock.c by about 5%. Discussion: http://postgr.es/m/CA+TgmoYsFn6NUW1x0AZtupJGUAs1UDY4dJtCN47_Q6D0sP80PA@mail.gmail.com
2016-12-16 17:29:23 +01:00
#define PG_WAIT_LWLOCK 0x01000000U
#define PG_WAIT_LOCK 0x03000000U
#define PG_WAIT_BUFFER_PIN 0x04000000U
#define PG_WAIT_ACTIVITY 0x05000000U
#define PG_WAIT_CLIENT 0x06000000U
#define PG_WAIT_EXTENSION 0x07000000U
#define PG_WAIT_IPC 0x08000000U
#define PG_WAIT_TIMEOUT 0x09000000U
#define PG_WAIT_IO 0x0A000000U
/* ----------
* Wait Events - Activity
*
* Use this category when a process is waiting because it has no work to do,
* unless the "Client" or "Timeout" category describes the situation better.
* Typically, this should only be used for background processes.
* ----------
*/
typedef enum
{
WAIT_EVENT_ARCHIVER_MAIN = PG_WAIT_ACTIVITY,
WAIT_EVENT_AUTOVACUUM_MAIN,
WAIT_EVENT_BGWRITER_HIBERNATE,
WAIT_EVENT_BGWRITER_MAIN,
WAIT_EVENT_CHECKPOINTER_MAIN,
WAIT_EVENT_LOGICAL_APPLY_MAIN,
WAIT_EVENT_LOGICAL_LAUNCHER_MAIN,
WAIT_EVENT_PGSTAT_MAIN,
WAIT_EVENT_RECOVERY_WAL_ALL,
WAIT_EVENT_RECOVERY_WAL_STREAM,
WAIT_EVENT_SYSLOGGER_MAIN,
WAIT_EVENT_WAL_RECEIVER_MAIN,
WAIT_EVENT_WAL_SENDER_MAIN,
WAIT_EVENT_WAL_WRITER_MAIN
} WaitEventActivity;
/* ----------
* Wait Events - Client
*
* Use this category when a process is waiting to send data to or receive data
* from the frontend process to which it is connected. This is never used for
* a background process, which has no client connection.
* ----------
*/
typedef enum
{
WAIT_EVENT_CLIENT_READ = PG_WAIT_CLIENT,
WAIT_EVENT_CLIENT_WRITE,
WAIT_EVENT_LIBPQWALRECEIVER_CONNECT,
WAIT_EVENT_LIBPQWALRECEIVER_RECEIVE,
WAIT_EVENT_SSL_OPEN_SERVER,
WAIT_EVENT_WAL_RECEIVER_WAIT_START,
WAIT_EVENT_WAL_SENDER_WAIT_WAL,
WAIT_EVENT_WAL_SENDER_WRITE_DATA
} WaitEventClient;
/* ----------
* Wait Events - IPC
*
* Use this category when a process cannot complete the work it is doing because
* it is waiting for a notification from another process.
* ----------
*/
typedef enum
{
WAIT_EVENT_BGWORKER_SHUTDOWN = PG_WAIT_IPC,
WAIT_EVENT_BGWORKER_STARTUP,
WAIT_EVENT_BTREE_PAGE,
WAIT_EVENT_CLOG_GROUP_UPDATE,
WAIT_EVENT_CHECKPOINT_DONE,
WAIT_EVENT_CHECKPOINT_START,
WAIT_EVENT_EXECUTE_GATHER,
Add parallel-aware hash joins. Introduce parallel-aware hash joins that appear in EXPLAIN plans as Parallel Hash Join with Parallel Hash. While hash joins could already appear in parallel queries, they were previously always parallel-oblivious and had a partial subplan only on the outer side, meaning that the work of the inner subplan was duplicated in every worker. After this commit, the planner will consider using a partial subplan on the inner side too, using the Parallel Hash node to divide the work over the available CPU cores and combine its results in shared memory. If the join needs to be split into multiple batches in order to respect work_mem, then workers process different batches as much as possible and then work together on the remaining batches. The advantages of a parallel-aware hash join over a parallel-oblivious hash join used in a parallel query are that it: * avoids wasting memory on duplicated hash tables * avoids wasting disk space on duplicated batch files * divides the work of building the hash table over the CPUs One disadvantage is that there is some communication between the participating CPUs which might outweigh the benefits of parallelism in the case of small hash tables. This is avoided by the planner's existing reluctance to supply partial plans for small scans, but it may be necessary to estimate synchronization costs in future if that situation changes. Another is that outer batch 0 must be written to disk if multiple batches are required. A potential future advantage of parallel-aware hash joins is that right and full outer joins could be supported, since there is a single set of matched bits for each hashtable, but that is not yet implemented. A new GUC enable_parallel_hash is defined to control the feature, defaulting to on. Author: Thomas Munro Reviewed-By: Andres Freund, Robert Haas Tested-By: Rafia Sabih, Prabhat Sahu Discussion: https://postgr.es/m/CAEepm=2W=cOkiZxcg6qiFQP-dHUe09aqTrEMM7yJDrHMhDv_RA@mail.gmail.com https://postgr.es/m/CAEepm=37HKyJ4U6XOLi=JgfSHM3o6B-GaeO-6hkOmneTDkH+Uw@mail.gmail.com
2017-12-21 08:39:21 +01:00
WAIT_EVENT_HASH_BATCH_ALLOCATING,
WAIT_EVENT_HASH_BATCH_ELECTING,
WAIT_EVENT_HASH_BATCH_LOADING,
WAIT_EVENT_HASH_BUILD_ALLOCATING,
WAIT_EVENT_HASH_BUILD_ELECTING,
WAIT_EVENT_HASH_BUILD_HASHING_INNER,
WAIT_EVENT_HASH_BUILD_HASHING_OUTER,
WAIT_EVENT_HASH_GROW_BATCHES_ALLOCATING,
WAIT_EVENT_HASH_GROW_BATCHES_DECIDING,
Add parallel-aware hash joins. Introduce parallel-aware hash joins that appear in EXPLAIN plans as Parallel Hash Join with Parallel Hash. While hash joins could already appear in parallel queries, they were previously always parallel-oblivious and had a partial subplan only on the outer side, meaning that the work of the inner subplan was duplicated in every worker. After this commit, the planner will consider using a partial subplan on the inner side too, using the Parallel Hash node to divide the work over the available CPU cores and combine its results in shared memory. If the join needs to be split into multiple batches in order to respect work_mem, then workers process different batches as much as possible and then work together on the remaining batches. The advantages of a parallel-aware hash join over a parallel-oblivious hash join used in a parallel query are that it: * avoids wasting memory on duplicated hash tables * avoids wasting disk space on duplicated batch files * divides the work of building the hash table over the CPUs One disadvantage is that there is some communication between the participating CPUs which might outweigh the benefits of parallelism in the case of small hash tables. This is avoided by the planner's existing reluctance to supply partial plans for small scans, but it may be necessary to estimate synchronization costs in future if that situation changes. Another is that outer batch 0 must be written to disk if multiple batches are required. A potential future advantage of parallel-aware hash joins is that right and full outer joins could be supported, since there is a single set of matched bits for each hashtable, but that is not yet implemented. A new GUC enable_parallel_hash is defined to control the feature, defaulting to on. Author: Thomas Munro Reviewed-By: Andres Freund, Robert Haas Tested-By: Rafia Sabih, Prabhat Sahu Discussion: https://postgr.es/m/CAEepm=2W=cOkiZxcg6qiFQP-dHUe09aqTrEMM7yJDrHMhDv_RA@mail.gmail.com https://postgr.es/m/CAEepm=37HKyJ4U6XOLi=JgfSHM3o6B-GaeO-6hkOmneTDkH+Uw@mail.gmail.com
2017-12-21 08:39:21 +01:00
WAIT_EVENT_HASH_GROW_BATCHES_ELECTING,
WAIT_EVENT_HASH_GROW_BATCHES_FINISHING,
WAIT_EVENT_HASH_GROW_BATCHES_REPARTITIONING,
WAIT_EVENT_HASH_GROW_BUCKETS_ALLOCATING,
Add parallel-aware hash joins. Introduce parallel-aware hash joins that appear in EXPLAIN plans as Parallel Hash Join with Parallel Hash. While hash joins could already appear in parallel queries, they were previously always parallel-oblivious and had a partial subplan only on the outer side, meaning that the work of the inner subplan was duplicated in every worker. After this commit, the planner will consider using a partial subplan on the inner side too, using the Parallel Hash node to divide the work over the available CPU cores and combine its results in shared memory. If the join needs to be split into multiple batches in order to respect work_mem, then workers process different batches as much as possible and then work together on the remaining batches. The advantages of a parallel-aware hash join over a parallel-oblivious hash join used in a parallel query are that it: * avoids wasting memory on duplicated hash tables * avoids wasting disk space on duplicated batch files * divides the work of building the hash table over the CPUs One disadvantage is that there is some communication between the participating CPUs which might outweigh the benefits of parallelism in the case of small hash tables. This is avoided by the planner's existing reluctance to supply partial plans for small scans, but it may be necessary to estimate synchronization costs in future if that situation changes. Another is that outer batch 0 must be written to disk if multiple batches are required. A potential future advantage of parallel-aware hash joins is that right and full outer joins could be supported, since there is a single set of matched bits for each hashtable, but that is not yet implemented. A new GUC enable_parallel_hash is defined to control the feature, defaulting to on. Author: Thomas Munro Reviewed-By: Andres Freund, Robert Haas Tested-By: Rafia Sabih, Prabhat Sahu Discussion: https://postgr.es/m/CAEepm=2W=cOkiZxcg6qiFQP-dHUe09aqTrEMM7yJDrHMhDv_RA@mail.gmail.com https://postgr.es/m/CAEepm=37HKyJ4U6XOLi=JgfSHM3o6B-GaeO-6hkOmneTDkH+Uw@mail.gmail.com
2017-12-21 08:39:21 +01:00
WAIT_EVENT_HASH_GROW_BUCKETS_ELECTING,
WAIT_EVENT_HASH_GROW_BUCKETS_REINSERTING,
WAIT_EVENT_LOGICAL_SYNC_DATA,
WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE,
WAIT_EVENT_MQ_INTERNAL,
WAIT_EVENT_MQ_PUT_MESSAGE,
WAIT_EVENT_MQ_RECEIVE,
WAIT_EVENT_MQ_SEND,
WAIT_EVENT_PARALLEL_BITMAP_SCAN,
WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN,
WAIT_EVENT_PARALLEL_FINISH,
WAIT_EVENT_PROCARRAY_GROUP_UPDATE,
WAIT_EVENT_PROMOTE,
WAIT_EVENT_REPLICATION_ORIGIN_DROP,
WAIT_EVENT_REPLICATION_SLOT_DROP,
WAIT_EVENT_SAFE_SNAPSHOT,
WAIT_EVENT_SYNC_REP
} WaitEventIPC;
/* ----------
* Wait Events - Timeout
*
* Use this category when a process is waiting for a timeout to expire.
* ----------
*/
typedef enum
{
WAIT_EVENT_BASE_BACKUP_THROTTLE = PG_WAIT_TIMEOUT,
WAIT_EVENT_PG_SLEEP,
WAIT_EVENT_RECOVERY_APPLY_DELAY
} WaitEventTimeout;
/* ----------
* Wait Events - IO
*
* Use this category when a process is waiting for a IO.
* ----------
*/
typedef enum
{
WAIT_EVENT_BUFFILE_READ = PG_WAIT_IO,
WAIT_EVENT_BUFFILE_WRITE,
WAIT_EVENT_CONTROL_FILE_READ,
WAIT_EVENT_CONTROL_FILE_SYNC,
WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE,
WAIT_EVENT_CONTROL_FILE_WRITE,
WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE,
WAIT_EVENT_COPY_FILE_READ,
WAIT_EVENT_COPY_FILE_WRITE,
WAIT_EVENT_DATA_FILE_EXTEND,
WAIT_EVENT_DATA_FILE_FLUSH,
WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC,
WAIT_EVENT_DATA_FILE_PREFETCH,
WAIT_EVENT_DATA_FILE_READ,
WAIT_EVENT_DATA_FILE_SYNC,
WAIT_EVENT_DATA_FILE_TRUNCATE,
WAIT_EVENT_DATA_FILE_WRITE,
WAIT_EVENT_DSM_FILL_ZERO_WRITE,
WAIT_EVENT_LOCK_FILE_ADDTODATADIR_READ,
WAIT_EVENT_LOCK_FILE_ADDTODATADIR_SYNC,
WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE,
WAIT_EVENT_LOCK_FILE_CREATE_READ,
WAIT_EVENT_LOCK_FILE_CREATE_SYNC,
WAIT_EVENT_LOCK_FILE_CREATE_WRITE,
WAIT_EVENT_LOCK_FILE_RECHECKDATADIR_READ,
WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC,
WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC,
WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE,
WAIT_EVENT_LOGICAL_REWRITE_SYNC,
WAIT_EVENT_LOGICAL_REWRITE_TRUNCATE,
WAIT_EVENT_LOGICAL_REWRITE_WRITE,
WAIT_EVENT_RELATION_MAP_READ,
WAIT_EVENT_RELATION_MAP_SYNC,
WAIT_EVENT_RELATION_MAP_WRITE,
WAIT_EVENT_REORDER_BUFFER_READ,
WAIT_EVENT_REORDER_BUFFER_WRITE,
WAIT_EVENT_REORDER_LOGICAL_MAPPING_READ,
WAIT_EVENT_REPLICATION_SLOT_READ,
WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC,
WAIT_EVENT_REPLICATION_SLOT_SYNC,
WAIT_EVENT_REPLICATION_SLOT_WRITE,
WAIT_EVENT_SLRU_FLUSH_SYNC,
WAIT_EVENT_SLRU_READ,
WAIT_EVENT_SLRU_SYNC,
WAIT_EVENT_SLRU_WRITE,
WAIT_EVENT_SNAPBUILD_READ,
WAIT_EVENT_SNAPBUILD_SYNC,
WAIT_EVENT_SNAPBUILD_WRITE,
WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC,
WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE,
WAIT_EVENT_TIMELINE_HISTORY_READ,
WAIT_EVENT_TIMELINE_HISTORY_SYNC,
WAIT_EVENT_TIMELINE_HISTORY_WRITE,
WAIT_EVENT_TWOPHASE_FILE_READ,
WAIT_EVENT_TWOPHASE_FILE_SYNC,
WAIT_EVENT_TWOPHASE_FILE_WRITE,
WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ,
WAIT_EVENT_WAL_BOOTSTRAP_SYNC,
WAIT_EVENT_WAL_BOOTSTRAP_WRITE,
WAIT_EVENT_WAL_COPY_READ,
WAIT_EVENT_WAL_COPY_SYNC,
WAIT_EVENT_WAL_COPY_WRITE,
WAIT_EVENT_WAL_INIT_SYNC,
WAIT_EVENT_WAL_INIT_WRITE,
WAIT_EVENT_WAL_READ,
WAIT_EVENT_WAL_SYNC,
WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN,
WAIT_EVENT_WAL_WRITE
} WaitEventIO;
/* ----------
* Command type for progress reporting purposes
* ----------
*/
typedef enum ProgressCommandType
{
PROGRESS_COMMAND_INVALID,
PROGRESS_COMMAND_VACUUM,
PROGRESS_COMMAND_CLUSTER
2016-06-10 00:02:36 +02:00
} ProgressCommandType;
#define PGSTAT_NUM_PROGRESS_PARAM 10
/* ----------
* Shared-memory data structures
* ----------
*/
/*
* PgBackendSSLStatus
*
* For each backend, we keep the SSL status in a separate struct, that
* is only filled in if SSL is enabled.
*
* All char arrays must be null-terminated.
*/
typedef struct PgBackendSSLStatus
{
/* Information about SSL connection */
2015-05-24 03:35:49 +02:00
int ssl_bits;
bool ssl_compression;
char ssl_version[NAMEDATALEN];
char ssl_cipher[NAMEDATALEN];
char ssl_client_dn[NAMEDATALEN];
/*
* serial number is max "20 octets" per RFC 5280, so this size should be
* fine
*/
char ssl_client_serial[NAMEDATALEN];
char ssl_issuer_dn[NAMEDATALEN];
} PgBackendSSLStatus;
/* ----------
* PgBackendStatus
*
* Each live backend maintains a PgBackendStatus struct in shared memory
* showing its current activity. (The structs are allocated according to
* BackendId, but that is not critical.) Note that the collector process
* has no involvement in, or even access to, these structs.
*
* Each auxiliary process also maintains a PgBackendStatus struct in shared
* memory.
* ----------
*/
typedef struct PgBackendStatus
{
/*
* To avoid locking overhead, we use the following protocol: a backend
* increments st_changecount before modifying its entry, and again after
2006-10-04 02:30:14 +02:00
* finishing a modification. A would-be reader should note the value of
* st_changecount, copy the entry into private memory, then check
* st_changecount again. If the value hasn't changed, and if it's even,
* the copy is valid; otherwise start over. This makes updates cheap
* while reads are potentially expensive, but that's the tradeoff we want.
*
2015-05-24 03:35:49 +02:00
* The above protocol needs the memory barriers to ensure that the
* apparent order of execution is as it desires. Otherwise, for example,
* the CPU might rearrange the code so that st_changecount is incremented
* twice before the modification on a machine with weak memory ordering.
* This surprising result can lead to bugs.
*/
int st_changecount;
/* The entry is valid iff st_procpid > 0, unused if st_procpid == 0 */
int st_procpid;
/* Type of backends */
BackendType st_backendType;
/* Times when current backend, transaction, and activity started */
TimestampTz st_proc_start_timestamp;
TimestampTz st_xact_start_timestamp;
TimestampTz st_activity_start_timestamp;
TimestampTz st_state_start_timestamp;
/* Database OID, owning user's OID, connection client address */
Oid st_databaseid;
Oid st_userid;
SockAddr st_clientaddr;
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
char *st_clienthostname; /* MUST be null-terminated */
/* Information about SSL connection */
bool st_ssl;
PgBackendSSLStatus *st_sslstatus;
/* current state */
BackendState st_state;
/* application name; MUST be null-terminated */
2010-02-26 03:01:40 +01:00
char *st_appname;
/*
* Current command string; MUST be null-terminated. Note that this string
* possibly is truncated in the middle of a multi-byte character. As
* activity strings are stored more frequently than read, that allows to
* move the cost of correct truncation to the display side. Use
* pgstat_clip_activity() to truncate correctly.
*/
char *st_activity_raw;
/*
* Command progress reporting. Any command which wishes can advertise
* that it is running by setting st_progress_command,
2016-10-26 10:12:31 +02:00
* st_progress_command_target, and st_progress_param[].
* st_progress_command_target should be the OID of the relation which the
* command targets (we assume there's just one, as this is meant for
* utility commands), but the meaning of each element in the
* st_progress_param array is command-specific.
*/
ProgressCommandType st_progress_command;
Oid st_progress_command_target;
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM];
} PgBackendStatus;
/*
* Macros to load and store st_changecount with the memory barriers.
*
* pgstat_increment_changecount_before() and
* pgstat_increment_changecount_after() need to be called before and after
* PgBackendStatus entries are modified, respectively. This makes sure that
* st_changecount is incremented around the modification.
*
* Also pgstat_save_changecount_before() and pgstat_save_changecount_after()
* need to be called before and after PgBackendStatus entries are copied into
* private memory, respectively.
*/
#define pgstat_increment_changecount_before(beentry) \
do { \
beentry->st_changecount++; \
2015-05-24 03:35:49 +02:00
pg_write_barrier(); \
} while (0)
2015-05-24 03:35:49 +02:00
#define pgstat_increment_changecount_after(beentry) \
do { \
2015-05-24 03:35:49 +02:00
pg_write_barrier(); \
beentry->st_changecount++; \
2015-05-24 03:35:49 +02:00
Assert((beentry->st_changecount & 1) == 0); \
} while (0)
#define pgstat_save_changecount_before(beentry, save_changecount) \
do { \
2015-05-24 03:35:49 +02:00
save_changecount = beentry->st_changecount; \
pg_read_barrier(); \
} while (0)
#define pgstat_save_changecount_after(beentry, save_changecount) \
do { \
pg_read_barrier(); \
2015-05-24 03:35:49 +02:00
save_changecount = beentry->st_changecount; \
} while (0)
/* ----------
* LocalPgBackendStatus
*
* When we build the backend status array, we use LocalPgBackendStatus to be
* able to add new values to the struct when needed without adding new fields
* to the shared memory. It contains the backend status as a first member.
* ----------
*/
typedef struct LocalPgBackendStatus
{
/*
* Local version of the backend status entry.
*/
PgBackendStatus backendStatus;
/*
* The xid of the current transaction if available, InvalidTransactionId
* if not.
*/
TransactionId backend_xid;
/*
* The xmin of the current session if available, InvalidTransactionId if
* not.
*/
TransactionId backend_xmin;
} LocalPgBackendStatus;
/*
* Working state needed to accumulate per-function-call timing statistics.
*/
typedef struct PgStat_FunctionCallUsage
{
/* Link to function's hashtable entry (must still be there at exit!) */
/* NULL means we are not tracking the current function call */
PgStat_FunctionCounts *fs;
/* Total time previously charged to function, as of function start */
instr_time save_f_total_time;
/* Backend-wide total time as of function start */
instr_time save_total;
/* system clock as of function start */
instr_time f_start;
} PgStat_FunctionCallUsage;
/* ----------
* GUC parameters
* ----------
*/
extern bool pgstat_track_activities;
extern bool pgstat_track_counts;
extern int pgstat_track_functions;
extern PGDLLIMPORT int pgstat_track_activity_query_size;
extern char *pgstat_stat_directory;
extern char *pgstat_stat_tmpname;
extern char *pgstat_stat_filename;
/*
* BgWriter statistics counters are updated directly by bgwriter and bufmgr
*/
extern PgStat_MsgBgWriter BgWriterStats;
/*
* Updated by pgstat_count_buffer_*_time macros
*/
extern PgStat_Counter pgStatBlockReadTime;
extern PgStat_Counter pgStatBlockWriteTime;
/* ----------
* Functions called from postmaster
* ----------
*/
extern Size BackendStatusShmemSize(void);
extern void CreateSharedBackendStatus(void);
extern void pgstat_init(void);
extern int pgstat_start(void);
extern void pgstat_reset_all(void);
extern void allow_immediate_pgstat_restart(void);
2007-11-15 22:14:46 +01:00
#ifdef EXEC_BACKEND
extern void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn();
#endif
/* ----------
* Functions called from backends
* ----------
*/
extern void pgstat_ping(void);
extern void pgstat_report_stat(bool force);
extern void pgstat_vacuum_stat(void);
extern void pgstat_drop_database(Oid databaseid);
extern void pgstat_clear_snapshot(void);
extern void pgstat_reset_counters(void);
extern void pgstat_reset_shared_counters(const char *);
extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type type);
extern void pgstat_report_autovac(Oid dboid);
Fix VACUUM so that it always updates pg_class.reltuples/relpages. When we added the ability for vacuum to skip heap pages by consulting the visibility map, we made it just not update the reltuples/relpages statistics if it skipped any pages. But this could leave us with extremely out-of-date stats for a table that contains any unchanging areas, especially for TOAST tables which never get processed by ANALYZE. In particular this could result in autovacuum making poor decisions about when to process the table, as in recent report from Florian Helmberger. And in general it's a bad idea to not update the stats at all. Instead, use the previous values of reltuples/relpages as an estimate of the tuple density in unvisited pages. This approach results in a "moving average" estimate of reltuples, which should converge to the correct value over multiple VACUUM and ANALYZE cycles even when individual measurements aren't very good. This new method for updating reltuples is used by both VACUUM and ANALYZE, with the result that we no longer need the grotty interconnections that caused ANALYZE to not update the stats depending on what had happened in the parent VACUUM command. Also, fix the logic for skipping all-visible pages during VACUUM so that it looks ahead rather than behind to decide what to do, as per a suggestion from Greg Stark. This eliminates useless scanning of all-visible pages at the start of the relation or just after a not-all-visible page. In particular, the first few pages of the relation will not be invariably included in the scanned pages, which seems to help in not overweighting them in the reltuples estimate. Back-patch to 8.4, where the visibility map was introduced.
2011-05-30 23:05:26 +02:00
extern void pgstat_report_vacuum(Oid tableoid, bool shared,
PgStat_Counter livetuples, PgStat_Counter deadtuples);
Fix VACUUM so that it always updates pg_class.reltuples/relpages. When we added the ability for vacuum to skip heap pages by consulting the visibility map, we made it just not update the reltuples/relpages statistics if it skipped any pages. But this could leave us with extremely out-of-date stats for a table that contains any unchanging areas, especially for TOAST tables which never get processed by ANALYZE. In particular this could result in autovacuum making poor decisions about when to process the table, as in recent report from Florian Helmberger. And in general it's a bad idea to not update the stats at all. Instead, use the previous values of reltuples/relpages as an estimate of the tuple density in unvisited pages. This approach results in a "moving average" estimate of reltuples, which should converge to the correct value over multiple VACUUM and ANALYZE cycles even when individual measurements aren't very good. This new method for updating reltuples is used by both VACUUM and ANALYZE, with the result that we no longer need the grotty interconnections that caused ANALYZE to not update the stats depending on what had happened in the parent VACUUM command. Also, fix the logic for skipping all-visible pages during VACUUM so that it looks ahead rather than behind to decide what to do, as per a suggestion from Greg Stark. This eliminates useless scanning of all-visible pages at the start of the relation or just after a not-all-visible page. In particular, the first few pages of the relation will not be invariably included in the scanned pages, which seems to help in not overweighting them in the reltuples estimate. Back-patch to 8.4, where the visibility map was introduced.
2011-05-30 23:05:26 +02:00
extern void pgstat_report_analyze(Relation rel,
PgStat_Counter livetuples, PgStat_Counter deadtuples,
bool resetcounter);
extern void pgstat_report_recovery_conflict(int reason);
extern void pgstat_report_deadlock(void);
extern void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount);
extern void pgstat_report_checksum_failure(void);
extern void pgstat_initialize(void);
extern void pgstat_bestart(void);
extern void pgstat_report_activity(BackendState state, const char *cmd_str);
extern void pgstat_report_tempfile(size_t filesize);
extern void pgstat_report_appname(const char *appname);
extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
extern const char *pgstat_get_wait_event(uint32 wait_event_info);
extern const char *pgstat_get_wait_event_type(uint32 wait_event_info);
extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer,
int buflen);
extern const char *pgstat_get_backend_desc(BackendType backendType);
extern void pgstat_progress_start_command(ProgressCommandType cmdtype,
Oid relid);
extern void pgstat_progress_update_param(int index, int64 val);
extern void pgstat_progress_update_multi_param(int nparam, const int *index,
const int64 *val);
extern void pgstat_progress_end_command(void);
extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id);
extern void pgstat_initstats(Relation rel);
extern char *pgstat_clip_activity(const char *raw_activity);
/* ----------
* pgstat_report_wait_start() -
*
* Called from places where server process needs to wait. This is called
* to report wait event information. The wait information is stored
* as 4-bytes where first byte represents the wait event class (type of
* wait, for different types of wait, refer WaitClass) and the next
* 3-bytes represent the actual wait event. Currently 2-bytes are used
* for wait event which is sufficient for current usage, 1-byte is
* reserved for future usage.
*
* NB: this *must* be able to survive being called before MyProc has been
* initialized.
* ----------
*/
static inline void
pgstat_report_wait_start(uint32 wait_event_info)
{
volatile PGPROC *proc = MyProc;
if (!pgstat_track_activities || !proc)
return;
/*
* Since this is a four-byte field which is always read and written as
* four-bytes, updates are atomic.
*/
proc->wait_event_info = wait_event_info;
}
/* ----------
* pgstat_report_wait_end() -
*
* Called to report end of a wait.
*
* NB: this *must* be able to survive being called before MyProc has been
* initialized.
* ----------
*/
static inline void
pgstat_report_wait_end(void)
{
volatile PGPROC *proc = MyProc;
if (!pgstat_track_activities || !proc)
return;
/*
* Since this is a four-byte field which is always read and written as
* four-bytes, updates are atomic.
*/
proc->wait_event_info = 0;
}
/* nontransactional event counts are simple enough to inline */
#define pgstat_count_heap_scan(rel) \
do { \
if ((rel)->pgstat_info != NULL) \
(rel)->pgstat_info->t_counts.t_numscans++; \
} while (0)
#define pgstat_count_heap_getnext(rel) \
do { \
if ((rel)->pgstat_info != NULL) \
(rel)->pgstat_info->t_counts.t_tuples_returned++; \
} while (0)
#define pgstat_count_heap_fetch(rel) \
do { \
if ((rel)->pgstat_info != NULL) \
(rel)->pgstat_info->t_counts.t_tuples_fetched++; \
} while (0)
#define pgstat_count_index_scan(rel) \
do { \
if ((rel)->pgstat_info != NULL) \
(rel)->pgstat_info->t_counts.t_numscans++; \
} while (0)
#define pgstat_count_index_tuples(rel, n) \
do { \
if ((rel)->pgstat_info != NULL) \
(rel)->pgstat_info->t_counts.t_tuples_returned += (n); \
} while (0)
#define pgstat_count_buffer_read(rel) \
do { \
if ((rel)->pgstat_info != NULL) \
(rel)->pgstat_info->t_counts.t_blocks_fetched++; \
} while (0)
#define pgstat_count_buffer_hit(rel) \
do { \
if ((rel)->pgstat_info != NULL) \
(rel)->pgstat_info->t_counts.t_blocks_hit++; \
} while (0)
#define pgstat_count_buffer_read_time(n) \
(pgStatBlockReadTime += (n))
#define pgstat_count_buffer_write_time(n) \
(pgStatBlockWriteTime += (n))
extern void pgstat_count_heap_insert(Relation rel, PgStat_Counter n);
extern void pgstat_count_heap_update(Relation rel, bool hot);
extern void pgstat_count_heap_delete(Relation rel);
extern void pgstat_count_truncate(Relation rel);
extern void pgstat_update_heap_dead_tuples(Relation rel, int delta);
Change function call information to be variable length. Before this change FunctionCallInfoData, the struct arguments etc for V1 function calls are stored in, always had space for FUNC_MAX_ARGS/100 arguments, storing datums and their nullness in two arrays. For nearly every function call 100 arguments is far more than needed, therefore wasting memory. Arg and argnull being two separate arrays also guarantees that to access a single argument, two cachelines have to be touched. Change the layout so there's a single variable-length array with pairs of value / isnull. That drastically reduces memory consumption for most function calls (on x86-64 a two argument function now uses 64bytes, previously 936 bytes), and makes it very likely that argument value and its nullness are on the same cacheline. Arguments are stored in a new NullableDatum struct, which, due to padding, needs more memory per argument than before. But as usually far fewer arguments are stored, and individual arguments are cheaper to access, that's still a clear win. It's likely that there's other places where conversion to NullableDatum arrays would make sense, e.g. TupleTableSlots, but that's for another commit. Because the function call information is now variable-length allocations have to take the number of arguments into account. For heap allocations that can be done with SizeForFunctionCallInfoData(), for on-stack allocations there's a new LOCAL_FCINFO(name, nargs) macro that helps to allocate an appropriately sized and aligned variable. Some places with stack allocation function call information don't know the number of arguments at compile time, and currently variably sized stack allocations aren't allowed in postgres. Therefore allow for FUNC_MAX_ARGS space in these cases. They're not that common, so for now that seems acceptable. Because of the need to allocate FunctionCallInfo of the appropriate size, older extensions may need to update their code. To avoid subtle breakages, the FunctionCallInfoData struct has been renamed to FunctionCallInfoBaseData. Most code only references FunctionCallInfo, so that shouldn't cause much collateral damage. This change is also a prerequisite for more efficient expression JIT compilation (by allocating the function call information on the stack, allowing LLVM to optimize it away); previously the size of the call information caused problems inside LLVM's optimizer. Author: Andres Freund Reviewed-By: Tom Lane Discussion: https://postgr.es/m/20180605172952.x34m5uz6ju6enaem@alap3.anarazel.de
2019-01-26 23:17:52 +01:00
extern void pgstat_init_function_usage(FunctionCallInfo fcinfo,
PgStat_FunctionCallUsage *fcu);
extern void pgstat_end_function_usage(PgStat_FunctionCallUsage *fcu,
bool finalize);
extern void AtEOXact_PgStat(bool isCommit);
extern void AtEOSubXact_PgStat(bool isCommit, int nestDepth);
extern void AtPrepare_PgStat(void);
extern void PostPrepare_PgStat(void);
extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info,
2007-11-15 22:14:46 +01:00
void *recdata, uint32 len);
extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
2007-11-15 22:14:46 +01:00
void *recdata, uint32 len);
extern void pgstat_send_archiver(const char *xlog, bool failed);
extern void pgstat_send_bgwriter(void);
/* ----------
* Support functions for the SQL-callable functions to
* generate the pgstat* views.
* ----------
*/
extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid);
extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid);
extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
extern int pgstat_fetch_stat_numbackends(void);
extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void);
extern PgStat_GlobalStats *pgstat_fetch_global(void);
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
#endif /* PGSTAT_H */