2001-06-22 21:18:36 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat.c
|
|
|
|
*
|
|
|
|
* All the statistics collector stuff hacked up in one big, ugly file.
|
|
|
|
*
|
|
|
|
* TODO: - Separate collector, postmaster and backend stuff
|
|
|
|
* into different files.
|
|
|
|
*
|
|
|
|
* - Add some automatic call for pgstat vacuuming.
|
|
|
|
*
|
|
|
|
* - Add a pgstat config column to pg_database, so this
|
2004-05-28 07:13:32 +02:00
|
|
|
* entire thing can be enabled/disabled on a per db basis.
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2011-01-01 19:18:15 +01:00
|
|
|
* Copyright (c) 2001-2011, PostgreSQL Global Development Group
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/postmaster/pgstat.c
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2001-06-30 21:01:27 +02:00
|
|
|
#include "postgres.h"
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
#include <unistd.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <sys/socket.h>
|
2003-06-12 09:36:51 +02:00
|
|
|
#include <netdb.h>
|
2001-06-22 21:18:36 +02:00
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
#include <signal.h>
|
2004-06-03 04:08:07 +02:00
|
|
|
#include <time.h>
|
2006-06-29 22:00:08 +02:00
|
|
|
#ifdef HAVE_POLL_H
|
|
|
|
#include <poll.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_SYS_POLL_H
|
|
|
|
#include <sys/poll.h>
|
|
|
|
#endif
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2003-04-26 04:57:14 +02:00
|
|
|
#include "pgstat.h"
|
|
|
|
|
2001-08-04 02:14:43 +02:00
|
|
|
#include "access/heapam.h"
|
2006-07-13 18:49:20 +02:00
|
|
|
#include "access/transam.h"
|
2007-05-27 05:50:39 +02:00
|
|
|
#include "access/twophase_rmgr.h"
|
2004-05-30 00:48:23 +02:00
|
|
|
#include "access/xact.h"
|
2001-08-04 02:14:43 +02:00
|
|
|
#include "catalog/pg_database.h"
|
2008-05-15 02:17:41 +02:00
|
|
|
#include "catalog/pg_proc.h"
|
2006-07-13 18:49:20 +02:00
|
|
|
#include "libpq/ip.h"
|
2003-06-12 09:36:51 +02:00
|
|
|
#include "libpq/libpq.h"
|
2004-05-30 00:48:23 +02:00
|
|
|
#include "libpq/pqsignal.h"
|
2002-02-07 23:20:26 +01:00
|
|
|
#include "mb/pg_wchar.h"
|
2001-06-22 21:18:36 +02:00
|
|
|
#include "miscadmin.h"
|
2008-08-01 15:16:09 +02:00
|
|
|
#include "pg_trace.h"
|
2005-07-14 07:13:45 +02:00
|
|
|
#include "postmaster/autovacuum.h"
|
2005-04-08 02:55:07 +02:00
|
|
|
#include "postmaster/fork_process.h"
|
2004-05-30 00:48:23 +02:00
|
|
|
#include "postmaster/postmaster.h"
|
2001-06-22 21:18:36 +02:00
|
|
|
#include "storage/backendid.h"
|
2004-10-28 03:38:41 +02:00
|
|
|
#include "storage/fd.h"
|
2002-05-05 02:03:29 +02:00
|
|
|
#include "storage/ipc.h"
|
2003-11-07 22:55:50 +01:00
|
|
|
#include "storage/pg_shmem.h"
|
2004-05-30 00:48:23 +02:00
|
|
|
#include "storage/pmsignal.h"
|
2011-01-03 12:46:03 +01:00
|
|
|
#include "storage/procsignal.h"
|
2007-09-24 05:12:23 +02:00
|
|
|
#include "utils/guc.h"
|
2004-05-30 00:48:23 +02:00
|
|
|
#include "utils/memutils.h"
|
2001-08-04 02:14:43 +02:00
|
|
|
#include "utils/ps_status.h"
|
2008-06-19 02:46:06 +02:00
|
|
|
#include "utils/rel.h"
|
2008-03-26 22:10:39 +01:00
|
|
|
#include "utils/tqual.h"
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
|
2004-06-26 18:32:04 +02:00
|
|
|
/* ----------
|
2005-07-04 06:51:52 +02:00
|
|
|
* Paths for the statistics files (relative to installation's $PGDATA).
|
2004-06-26 18:32:04 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2008-08-05 14:09:30 +02:00
|
|
|
#define PGSTAT_STAT_PERMANENT_FILENAME "global/pgstat.stat"
|
|
|
|
#define PGSTAT_STAT_PERMANENT_TMPFILE "global/pgstat.tmp"
|
2004-06-26 18:32:04 +02:00
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* Timer definitions.
|
|
|
|
* ----------
|
|
|
|
*/
|
2008-11-03 02:17:08 +01:00
|
|
|
#define PGSTAT_STAT_INTERVAL 500 /* Minimum time between stats file
|
|
|
|
* updates; in milliseconds. */
|
|
|
|
|
|
|
|
#define PGSTAT_RETRY_DELAY 10 /* How long to wait between statistics
|
|
|
|
* update requests; in milliseconds. */
|
|
|
|
|
|
|
|
#define PGSTAT_MAX_WAIT_TIME 5000 /* Maximum time to wait for a stats
|
|
|
|
* file update; in milliseconds. */
|
2004-06-26 18:32:04 +02:00
|
|
|
|
2006-05-30 04:35:39 +02:00
|
|
|
#define PGSTAT_RESTART_INTERVAL 60 /* How often to attempt to restart a
|
|
|
|
* failed statistics collector; in
|
|
|
|
* seconds. */
|
2004-06-26 18:32:04 +02:00
|
|
|
|
2006-06-29 22:00:08 +02:00
|
|
|
#define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster
|
|
|
|
* death; in seconds. */
|
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
#define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
|
|
|
|
|
2004-06-26 18:32:04 +02:00
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* The initial size hints for the hash tables used in the collector.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
#define PGSTAT_DB_HASH_SIZE 16
|
|
|
|
#define PGSTAT_TAB_HASH_SIZE 512
|
2008-05-15 02:17:41 +02:00
|
|
|
#define PGSTAT_FUNCTION_HASH_SIZE 512
|
2004-06-26 18:32:04 +02:00
|
|
|
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/* ----------
|
2001-08-04 02:14:43 +02:00
|
|
|
* GUC parameters
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2007-09-24 05:12:23 +02:00
|
|
|
bool pgstat_track_activities = false;
|
|
|
|
bool pgstat_track_counts = false;
|
2008-05-15 02:17:41 +02:00
|
|
|
int pgstat_track_functions = TRACK_FUNC_OFF;
|
2008-06-30 12:58:47 +02:00
|
|
|
int pgstat_track_activity_query_size = 1024;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2008-08-15 10:37:41 +02:00
|
|
|
/* ----------
|
|
|
|
* Built from GUC parameter
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
char *pgstat_stat_filename = NULL;
|
|
|
|
char *pgstat_stat_tmpname = NULL;
|
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
/*
|
|
|
|
* BgWriter global statistics counters (unused in other processes).
|
|
|
|
* Stored directly in a stats message structure so it can be sent
|
|
|
|
* without needing to copy things around. We assume this inits to zeroes.
|
|
|
|
*/
|
|
|
|
PgStat_MsgBgWriter BgWriterStats;
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/* ----------
|
|
|
|
* Local data
|
|
|
|
* ----------
|
|
|
|
*/
|
2010-01-10 15:16:08 +01:00
|
|
|
NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET;
|
2006-06-19 03:51:22 +02:00
|
|
|
|
2003-06-12 09:36:51 +02:00
|
|
|
static struct sockaddr_storage pgStatAddr;
|
2006-06-19 03:51:22 +02:00
|
|
|
|
2003-04-26 04:57:14 +02:00
|
|
|
static time_t last_pgstat_start_time;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2006-01-03 17:42:17 +01:00
|
|
|
static bool pgStatRunningInCollector = false;
|
2003-07-22 21:00:12 +02:00
|
|
|
|
2005-07-29 21:30:09 +02:00
|
|
|
/*
|
2007-05-27 05:50:39 +02:00
|
|
|
* Structures in which backends store per-table info that's waiting to be
|
|
|
|
* sent to the collector.
|
2007-04-21 06:10:53 +02:00
|
|
|
*
|
2007-05-27 05:50:39 +02:00
|
|
|
* NOTE: once allocated, TabStatusArray structures are never moved or deleted
|
|
|
|
* for the life of the backend. Also, we zero out the t_id fields of the
|
|
|
|
* contained PgStat_TableStatus structs whenever they are not actively in use.
|
|
|
|
* This allows relcache pgstat_info pointers to be treated as long-lived data,
|
|
|
|
* avoiding repeated searches in pgstat_initstats() when a relation is
|
|
|
|
* repeatedly opened during a transaction.
|
|
|
|
*/
|
2007-11-15 22:14:46 +01:00
|
|
|
#define TABSTAT_QUANTUM 100 /* we alloc this many at a time */
|
2007-05-27 05:50:39 +02:00
|
|
|
|
|
|
|
typedef struct TabStatusArray
|
2005-07-29 21:30:09 +02:00
|
|
|
{
|
2007-05-27 05:50:39 +02:00
|
|
|
struct TabStatusArray *tsa_next; /* link to next array, if any */
|
2007-11-15 22:14:46 +01:00
|
|
|
int tsa_used; /* # entries currently used */
|
2007-05-27 05:50:39 +02:00
|
|
|
PgStat_TableStatus tsa_entries[TABSTAT_QUANTUM]; /* per-table data */
|
2007-11-15 23:25:18 +01:00
|
|
|
} TabStatusArray;
|
2007-05-27 05:50:39 +02:00
|
|
|
|
|
|
|
static TabStatusArray *pgStatTabList = NULL;
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2008-05-15 02:17:41 +02:00
|
|
|
/*
|
|
|
|
* Backends store per-function info that's waiting to be sent to the collector
|
|
|
|
* in this hash table (indexed by function OID).
|
|
|
|
*/
|
|
|
|
static HTAB *pgStatFunctions = NULL;
|
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
/*
|
|
|
|
* Indicates if backend has some function stats that it hasn't yet
|
|
|
|
* sent to the collector.
|
|
|
|
*/
|
|
|
|
static bool have_function_stats = false;
|
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
/*
|
|
|
|
* Tuple insertion/deletion counts for an open transaction can't be propagated
|
|
|
|
* into PgStat_TableStatus counters until we know if it is going to commit
|
|
|
|
* or abort. Hence, we keep these counts in per-subxact structs that live
|
|
|
|
* in TopTransactionContext. This data structure is designed on the assumption
|
|
|
|
* that subxacts won't usually modify very many tables.
|
|
|
|
*/
|
|
|
|
typedef struct PgStat_SubXactStatus
|
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
int nest_level; /* subtransaction nest level */
|
2007-05-27 05:50:39 +02:00
|
|
|
struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */
|
|
|
|
PgStat_TableXactStatus *first; /* head of list for this subxact */
|
2007-11-15 23:25:18 +01:00
|
|
|
} PgStat_SubXactStatus;
|
2003-07-22 21:00:12 +02:00
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
static PgStat_SubXactStatus *pgStatXactStack = NULL;
|
2005-07-29 21:30:09 +02:00
|
|
|
|
2001-10-25 07:50:21 +02:00
|
|
|
static int pgStatXactCommit = 0;
|
|
|
|
static int pgStatXactRollback = 0;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
/* Record that's written to 2PC state file when pgstat state is persisted */
|
|
|
|
typedef struct TwoPhasePgStatRecord
|
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
PgStat_Counter tuples_inserted; /* tuples inserted in xact */
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
PgStat_Counter tuples_updated; /* tuples updated in xact */
|
2007-11-15 22:14:46 +01:00
|
|
|
PgStat_Counter tuples_deleted; /* tuples deleted in xact */
|
|
|
|
Oid t_id; /* table's OID */
|
|
|
|
bool t_shared; /* is it a shared catalog? */
|
2007-11-15 23:25:18 +01:00
|
|
|
} TwoPhasePgStatRecord;
|
2007-05-27 05:50:39 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Info about current "snapshot" of stats file
|
|
|
|
*/
|
2007-02-08 00:11:30 +01:00
|
|
|
static MemoryContext pgStatLocalContext = NULL;
|
2001-10-25 07:50:21 +02:00
|
|
|
static HTAB *pgStatDBHash = NULL;
|
2006-06-19 03:51:22 +02:00
|
|
|
static PgBackendStatus *localBackendStatusTable = NULL;
|
|
|
|
static int localNumBackends = 0;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2007-03-30 20:34:56 +02:00
|
|
|
/*
|
|
|
|
* Cluster wide statistics, kept in the stats collector.
|
|
|
|
* Contains statistics that are not collected per database
|
|
|
|
* or per table.
|
|
|
|
*/
|
|
|
|
static PgStat_GlobalStats globalStats;
|
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
/* Last time the collector successfully wrote the stats file */
|
|
|
|
static TimestampTz last_statwrite;
|
2009-06-11 16:49:15 +02:00
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
/* Latest statistics request time from backends */
|
|
|
|
static TimestampTz last_statrequest;
|
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
static volatile bool need_exit = false;
|
2008-08-25 17:11:01 +02:00
|
|
|
static volatile bool got_SIGHUP = false;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2008-05-15 02:17:41 +02:00
|
|
|
/*
|
|
|
|
* Total time charged to functions so far in the current backend.
|
|
|
|
* We use this to help separate "self" and "other" time charges.
|
|
|
|
* (We assume this initializes to zero.)
|
|
|
|
*/
|
|
|
|
static instr_time total_func_time;
|
|
|
|
|
2006-01-04 22:06:32 +01:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/* ----------
|
|
|
|
* Local function forward declarations
|
|
|
|
* ----------
|
|
|
|
*/
|
2003-12-25 04:52:51 +01:00
|
|
|
#ifdef EXEC_BACKEND
|
2006-06-29 22:00:08 +02:00
|
|
|
static pid_t pgstat_forkexec(void);
|
2003-12-25 04:52:51 +01:00
|
|
|
#endif
|
2004-05-28 07:13:32 +02:00
|
|
|
|
|
|
|
NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]);
|
2004-06-14 20:08:19 +02:00
|
|
|
static void pgstat_exit(SIGNAL_ARGS);
|
2005-04-01 01:20:49 +02:00
|
|
|
static void pgstat_beshutdown_hook(int code, Datum arg);
|
2008-08-25 17:11:01 +02:00
|
|
|
static void pgstat_sighup_handler(SIGNAL_ARGS);
|
2001-10-25 07:50:21 +02:00
|
|
|
|
2005-07-29 21:30:09 +02:00
|
|
|
static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
|
2009-09-05 00:32:33 +02:00
|
|
|
static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry,
|
2010-02-26 03:01:40 +01:00
|
|
|
Oid tableoid, bool create);
|
2008-08-05 14:09:30 +02:00
|
|
|
static void pgstat_write_statsfile(bool permanent);
|
|
|
|
static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent);
|
2004-07-01 02:52:04 +02:00
|
|
|
static void backend_read_statsfile(void);
|
2006-06-19 03:51:22 +02:00
|
|
|
static void pgstat_read_current_status(void);
|
2007-05-27 05:50:39 +02:00
|
|
|
|
|
|
|
static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
|
2008-05-15 02:17:41 +02:00
|
|
|
static void pgstat_send_funcstats(void);
|
2007-01-12 00:06:03 +01:00
|
|
|
static HTAB *pgstat_collect_oids(Oid catalogid);
|
2001-10-25 07:50:21 +02:00
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
|
|
|
|
|
2007-02-08 00:11:30 +01:00
|
|
|
static void pgstat_setup_memcxt(void);
|
|
|
|
|
2005-07-14 07:13:45 +02:00
|
|
|
static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
|
2001-10-25 07:50:21 +02:00
|
|
|
static void pgstat_send(void *msg, int len);
|
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
static void pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len);
|
2001-10-25 07:50:21 +02:00
|
|
|
static void pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len);
|
|
|
|
static void pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len);
|
|
|
|
static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len);
|
|
|
|
static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len);
|
2010-01-19 15:11:32 +01:00
|
|
|
static void pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len);
|
2010-01-28 15:25:41 +01:00
|
|
|
static void pgstat_recv_resetsinglecounter(PgStat_MsgResetsinglecounter *msg, int len);
|
2005-07-14 07:13:45 +02:00
|
|
|
static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
|
|
|
|
static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
|
|
|
|
static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
|
2007-11-15 23:25:18 +01:00
|
|
|
static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
|
2008-05-15 02:17:41 +02:00
|
|
|
static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
|
|
|
|
static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
|
2011-01-03 12:46:03 +01:00
|
|
|
static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* ------------------------------------------------------------
|
|
|
|
* Public functions called from postmaster follow
|
|
|
|
* ------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_init() -
|
|
|
|
*
|
|
|
|
* Called from postmaster at startup. Create the resources required
|
2003-04-26 04:57:14 +02:00
|
|
|
* by the statistics collector process. If unable to do so, do not
|
|
|
|
* fail --- better to let the postmaster start with stats collection
|
|
|
|
* disabled.
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2003-04-26 04:57:14 +02:00
|
|
|
void
|
2001-06-22 21:18:36 +02:00
|
|
|
pgstat_init(void)
|
|
|
|
{
|
2003-08-04 02:43:34 +02:00
|
|
|
ACCEPT_TYPE_ARG3 alen;
|
|
|
|
struct addrinfo *addrs = NULL,
|
|
|
|
*addr,
|
|
|
|
hints;
|
2003-06-12 09:36:51 +02:00
|
|
|
int ret;
|
2004-08-29 07:07:03 +02:00
|
|
|
fd_set rset;
|
2004-03-23 00:55:29 +01:00
|
|
|
struct timeval tv;
|
2004-08-29 07:07:03 +02:00
|
|
|
char test_byte;
|
|
|
|
int sel_res;
|
2006-04-20 12:51:32 +02:00
|
|
|
int tries = 0;
|
2006-10-04 02:30:14 +02:00
|
|
|
|
2004-03-23 00:55:29 +01:00
|
|
|
#define TESTBYTEVAL ((char) 199)
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2001-08-05 04:06:50 +02:00
|
|
|
* Create the UDP socket for sending and receiving statistic messages
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2003-06-12 09:36:51 +02:00
|
|
|
hints.ai_flags = AI_PASSIVE;
|
|
|
|
hints.ai_family = PF_UNSPEC;
|
|
|
|
hints.ai_socktype = SOCK_DGRAM;
|
|
|
|
hints.ai_protocol = 0;
|
|
|
|
hints.ai_addrlen = 0;
|
|
|
|
hints.ai_addr = NULL;
|
|
|
|
hints.ai_canonname = NULL;
|
|
|
|
hints.ai_next = NULL;
|
2005-10-17 18:24:20 +02:00
|
|
|
ret = pg_getaddrinfo_all("localhost", NULL, &hints, &addrs);
|
2003-07-24 01:30:41 +02:00
|
|
|
if (ret || !addrs)
|
2003-06-12 09:36:51 +02:00
|
|
|
{
|
2003-07-22 21:00:12 +02:00
|
|
|
ereport(LOG,
|
2003-07-24 01:30:41 +02:00
|
|
|
(errmsg("could not resolve \"localhost\": %s",
|
2003-07-22 21:00:12 +02:00
|
|
|
gai_strerror(ret))));
|
2003-06-12 09:36:51 +02:00
|
|
|
goto startup_failed;
|
|
|
|
}
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-11-15 18:24:07 +01:00
|
|
|
/*
|
2005-10-17 18:24:20 +02:00
|
|
|
* On some platforms, pg_getaddrinfo_all() may return multiple addresses
|
|
|
|
* only one of which will actually work (eg, both IPv6 and IPv4 addresses
|
|
|
|
* when kernel will reject IPv6). Worse, the failure may occur at the
|
2005-11-22 19:17:34 +01:00
|
|
|
* bind() or perhaps even connect() stage. So we must loop through the
|
2005-10-17 18:24:20 +02:00
|
|
|
* results till we find a working combination. We will generate LOG
|
|
|
|
* messages, but no error, for bogus combinations.
|
2003-11-15 18:24:07 +01:00
|
|
|
*/
|
2003-07-24 01:30:41 +02:00
|
|
|
for (addr = addrs; addr; addr = addr->ai_next)
|
|
|
|
{
|
|
|
|
#ifdef HAVE_UNIX_SOCKETS
|
|
|
|
/* Ignore AF_UNIX sockets, if any are returned. */
|
|
|
|
if (addr->ai_family == AF_UNIX)
|
|
|
|
continue;
|
|
|
|
#endif
|
2004-08-29 07:07:03 +02:00
|
|
|
|
2006-04-20 12:51:32 +02:00
|
|
|
if (++tries > 1)
|
|
|
|
ereport(LOG,
|
2006-10-04 02:30:14 +02:00
|
|
|
(errmsg("trying another address for the statistics collector")));
|
|
|
|
|
2003-11-15 18:24:07 +01:00
|
|
|
/*
|
|
|
|
* Create the socket.
|
|
|
|
*/
|
2010-01-31 18:39:34 +01:00
|
|
|
if ((pgStatSock = socket(addr->ai_family, SOCK_DGRAM, 0)) == PGINVALID_SOCKET)
|
2003-11-15 18:24:07 +01:00
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_socket_access(),
|
2005-10-15 04:49:52 +02:00
|
|
|
errmsg("could not create socket for statistics collector: %m")));
|
2003-11-15 18:24:07 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Bind it to a kernel assigned port on localhost and get the assigned
|
|
|
|
* port via getsockname().
|
2003-11-15 18:24:07 +01:00
|
|
|
*/
|
|
|
|
if (bind(pgStatSock, addr->ai_addr, addr->ai_addrlen) < 0)
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_socket_access(),
|
2005-10-15 04:49:52 +02:00
|
|
|
errmsg("could not bind socket for statistics collector: %m")));
|
2003-11-15 18:24:07 +01:00
|
|
|
closesocket(pgStatSock);
|
2010-01-10 15:16:08 +01:00
|
|
|
pgStatSock = PGINVALID_SOCKET;
|
2003-11-15 18:24:07 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
alen = sizeof(pgStatAddr);
|
2004-08-29 07:07:03 +02:00
|
|
|
if (getsockname(pgStatSock, (struct sockaddr *) & pgStatAddr, &alen) < 0)
|
2003-11-15 18:24:07 +01:00
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_socket_access(),
|
|
|
|
errmsg("could not get address of socket for statistics collector: %m")));
|
|
|
|
closesocket(pgStatSock);
|
2010-01-10 15:16:08 +01:00
|
|
|
pgStatSock = PGINVALID_SOCKET;
|
2003-11-15 18:24:07 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Connect the socket to its own address. This saves a few cycles by
|
|
|
|
* not having to respecify the target address on every send. This also
|
|
|
|
* provides a kernel-level check that only packets from this same
|
|
|
|
* address will be received.
|
2003-11-15 18:24:07 +01:00
|
|
|
*/
|
2004-08-29 07:07:03 +02:00
|
|
|
if (connect(pgStatSock, (struct sockaddr *) & pgStatAddr, alen) < 0)
|
2003-11-15 18:24:07 +01:00
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_socket_access(),
|
2005-10-15 04:49:52 +02:00
|
|
|
errmsg("could not connect socket for statistics collector: %m")));
|
2003-11-15 18:24:07 +01:00
|
|
|
closesocket(pgStatSock);
|
2010-01-10 15:16:08 +01:00
|
|
|
pgStatSock = PGINVALID_SOCKET;
|
2003-11-15 18:24:07 +01:00
|
|
|
continue;
|
|
|
|
}
|
2003-06-12 09:36:51 +02:00
|
|
|
|
2004-03-23 00:55:29 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Try to send and receive a one-byte test message on the socket. This
|
|
|
|
* is to catch situations where the socket can be created but will not
|
|
|
|
* actually pass data (for instance, because kernel packet filtering
|
|
|
|
* rules prevent it).
|
2004-03-23 00:55:29 +01:00
|
|
|
*/
|
|
|
|
test_byte = TESTBYTEVAL;
|
2006-07-16 20:17:14 +02:00
|
|
|
|
|
|
|
retry1:
|
2004-03-23 00:55:29 +01:00
|
|
|
if (send(pgStatSock, &test_byte, 1, 0) != 1)
|
|
|
|
{
|
2006-07-16 20:17:14 +02:00
|
|
|
if (errno == EINTR)
|
|
|
|
goto retry1; /* if interrupted, just retry */
|
2004-03-23 00:55:29 +01:00
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_socket_access(),
|
|
|
|
errmsg("could not send test message on socket for statistics collector: %m")));
|
|
|
|
closesocket(pgStatSock);
|
2010-01-10 15:16:08 +01:00
|
|
|
pgStatSock = PGINVALID_SOCKET;
|
2004-03-23 00:55:29 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* There could possibly be a little delay before the message can be
|
|
|
|
* received. We arbitrarily allow up to half a second before deciding
|
|
|
|
* it's broken.
|
2004-03-23 00:55:29 +01:00
|
|
|
*/
|
|
|
|
for (;;) /* need a loop to handle EINTR */
|
|
|
|
{
|
|
|
|
FD_ZERO(&rset);
|
2010-07-06 21:19:02 +02:00
|
|
|
FD_SET(pgStatSock, &rset);
|
2009-06-11 16:49:15 +02:00
|
|
|
|
2004-03-23 00:55:29 +01:00
|
|
|
tv.tv_sec = 0;
|
|
|
|
tv.tv_usec = 500000;
|
2004-08-29 07:07:03 +02:00
|
|
|
sel_res = select(pgStatSock + 1, &rset, NULL, NULL, &tv);
|
2004-03-23 00:55:29 +01:00
|
|
|
if (sel_res >= 0 || errno != EINTR)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (sel_res < 0)
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_socket_access(),
|
2005-10-15 04:49:52 +02:00
|
|
|
errmsg("select() failed in statistics collector: %m")));
|
2004-03-23 00:55:29 +01:00
|
|
|
closesocket(pgStatSock);
|
2010-01-10 15:16:08 +01:00
|
|
|
pgStatSock = PGINVALID_SOCKET;
|
2004-03-23 00:55:29 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset))
|
|
|
|
{
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* This is the case we actually think is likely, so take pains to
|
|
|
|
* give a specific message for it.
|
2004-03-23 00:55:29 +01:00
|
|
|
*
|
|
|
|
* errno will not be set meaningfully here, so don't use it.
|
|
|
|
*/
|
|
|
|
ereport(LOG,
|
2004-12-20 20:17:56 +01:00
|
|
|
(errcode(ERRCODE_CONNECTION_FAILURE),
|
2004-03-23 00:55:29 +01:00
|
|
|
errmsg("test message did not get through on socket for statistics collector")));
|
|
|
|
closesocket(pgStatSock);
|
2010-01-10 15:16:08 +01:00
|
|
|
pgStatSock = PGINVALID_SOCKET;
|
2004-03-23 00:55:29 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
test_byte++; /* just make sure variable is changed */
|
|
|
|
|
2006-07-16 20:17:14 +02:00
|
|
|
retry2:
|
2004-03-23 00:55:29 +01:00
|
|
|
if (recv(pgStatSock, &test_byte, 1, 0) != 1)
|
|
|
|
{
|
2006-07-16 20:17:14 +02:00
|
|
|
if (errno == EINTR)
|
|
|
|
goto retry2; /* if interrupted, just retry */
|
2004-03-23 00:55:29 +01:00
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_socket_access(),
|
|
|
|
errmsg("could not receive test message on socket for statistics collector: %m")));
|
|
|
|
closesocket(pgStatSock);
|
2010-01-10 15:16:08 +01:00
|
|
|
pgStatSock = PGINVALID_SOCKET;
|
2004-03-23 00:55:29 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2004-08-29 07:07:03 +02:00
|
|
|
if (test_byte != TESTBYTEVAL) /* strictly paranoia ... */
|
2004-03-23 00:55:29 +01:00
|
|
|
{
|
|
|
|
ereport(LOG,
|
2004-12-20 20:17:56 +01:00
|
|
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
2004-03-23 00:55:29 +01:00
|
|
|
errmsg("incorrect test message transmission on socket for statistics collector")));
|
|
|
|
closesocket(pgStatSock);
|
2010-01-10 15:16:08 +01:00
|
|
|
pgStatSock = PGINVALID_SOCKET;
|
2004-03-23 00:55:29 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2003-11-15 18:24:07 +01:00
|
|
|
/* If we get here, we have a working socket */
|
|
|
|
break;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2003-11-15 18:24:07 +01:00
|
|
|
/* Did we find a working address? */
|
2010-01-31 18:39:34 +01:00
|
|
|
if (!addr || pgStatSock == PGINVALID_SOCKET)
|
2002-04-03 02:27:25 +02:00
|
|
|
goto startup_failed;
|
2001-08-05 04:06:50 +02:00
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Set the socket to non-blocking IO. This ensures that if the collector
|
2006-06-29 22:00:08 +02:00
|
|
|
* falls behind, statistics messages will be discarded; backends won't
|
|
|
|
* block waiting to send messages to the collector.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2005-03-25 01:34:31 +01:00
|
|
|
if (!pg_set_noblock(pgStatSock))
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2003-07-22 21:00:12 +02:00
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_socket_access(),
|
2004-08-29 07:07:03 +02:00
|
|
|
errmsg("could not set statistics collector socket to nonblocking mode: %m")));
|
2002-04-03 02:27:25 +02:00
|
|
|
goto startup_failed;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2005-10-17 18:24:20 +02:00
|
|
|
pg_freeaddrinfo_all(hints.ai_family, addrs);
|
2003-11-15 18:24:07 +01:00
|
|
|
|
2003-04-26 04:57:14 +02:00
|
|
|
return;
|
2002-04-03 02:27:25 +02:00
|
|
|
|
|
|
|
startup_failed:
|
2004-12-20 20:17:56 +01:00
|
|
|
ereport(LOG,
|
2005-10-15 04:49:52 +02:00
|
|
|
(errmsg("disabling statistics collector for lack of working socket")));
|
2004-12-20 20:17:56 +01:00
|
|
|
|
2003-07-24 01:30:41 +02:00
|
|
|
if (addrs)
|
2005-10-17 18:24:20 +02:00
|
|
|
pg_freeaddrinfo_all(hints.ai_family, addrs);
|
2003-06-12 09:36:51 +02:00
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock != PGINVALID_SOCKET)
|
2003-04-25 03:24:00 +02:00
|
|
|
closesocket(pgStatSock);
|
2010-01-10 15:16:08 +01:00
|
|
|
pgStatSock = PGINVALID_SOCKET;
|
2002-04-03 02:27:25 +02:00
|
|
|
|
2007-09-24 05:12:23 +02:00
|
|
|
/*
|
|
|
|
* Adjust GUC variables to suppress useless activity, and for debugging
|
2007-11-15 22:14:46 +01:00
|
|
|
* purposes (seeing track_counts off is a clue that we failed here). We
|
|
|
|
* use PGC_S_OVERRIDE because there is no point in trying to turn it back
|
|
|
|
* on from postgresql.conf without a restart.
|
2007-09-24 05:12:23 +02:00
|
|
|
*/
|
|
|
|
SetConfigOption("track_counts", "off", PGC_INTERNAL, PGC_S_OVERRIDE);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2005-08-11 23:11:50 +02:00
|
|
|
/*
|
|
|
|
* pgstat_reset_all() -
|
|
|
|
*
|
2007-09-24 05:12:23 +02:00
|
|
|
* Remove the stats file. This is currently used only if WAL
|
2005-08-11 23:11:50 +02:00
|
|
|
* recovery is needed after a crash.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_reset_all(void)
|
|
|
|
{
|
2008-08-15 10:37:41 +02:00
|
|
|
unlink(pgstat_stat_filename);
|
2008-08-05 14:09:30 +02:00
|
|
|
unlink(PGSTAT_STAT_PERMANENT_FILENAME);
|
2005-08-11 23:11:50 +02:00
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2003-12-25 04:52:51 +01:00
|
|
|
#ifdef EXEC_BACKEND
|
|
|
|
|
2004-05-28 07:13:32 +02:00
|
|
|
/*
|
2004-01-07 00:15:22 +01:00
|
|
|
* pgstat_forkexec() -
|
2003-12-25 04:52:51 +01:00
|
|
|
*
|
2006-06-29 22:00:08 +02:00
|
|
|
* Format up the arglist for, then fork and exec, statistics collector process
|
2003-12-25 04:52:51 +01:00
|
|
|
*/
|
2004-01-07 00:15:22 +01:00
|
|
|
static pid_t
|
2006-06-29 22:00:08 +02:00
|
|
|
pgstat_forkexec(void)
|
2003-12-25 04:52:51 +01:00
|
|
|
{
|
2004-08-29 07:07:03 +02:00
|
|
|
char *av[10];
|
2006-06-29 22:00:08 +02:00
|
|
|
int ac = 0;
|
2003-12-25 04:52:51 +01:00
|
|
|
|
|
|
|
av[ac++] = "postgres";
|
2006-06-29 22:00:08 +02:00
|
|
|
av[ac++] = "--forkcol";
|
2004-05-28 07:13:32 +02:00
|
|
|
av[ac++] = NULL; /* filled in by postmaster_forkexec */
|
|
|
|
|
|
|
|
av[ac] = NULL;
|
|
|
|
Assert(ac < lengthof(av));
|
2003-12-25 04:52:51 +01:00
|
|
|
|
2004-05-28 07:13:32 +02:00
|
|
|
return postmaster_forkexec(ac, av);
|
2003-12-25 04:52:51 +01:00
|
|
|
}
|
2004-08-29 07:07:03 +02:00
|
|
|
#endif /* EXEC_BACKEND */
|
2004-05-28 07:13:32 +02:00
|
|
|
|
2003-12-25 04:52:51 +01:00
|
|
|
|
2007-09-24 05:12:23 +02:00
|
|
|
/*
|
2001-06-22 21:18:36 +02:00
|
|
|
* pgstat_start() -
|
|
|
|
*
|
|
|
|
* Called from postmaster at startup or after an existing collector
|
2003-04-26 04:57:14 +02:00
|
|
|
* died. Attempt to fire up a fresh statistics collector.
|
2002-04-03 02:27:25 +02:00
|
|
|
*
|
2004-06-14 20:08:19 +02:00
|
|
|
* Returns PID of child process, or 0 if fail.
|
|
|
|
*
|
2003-04-26 04:57:14 +02:00
|
|
|
* Note: if fail, we will be called again from the postmaster main loop.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2004-06-14 20:08:19 +02:00
|
|
|
int
|
2001-10-21 05:25:36 +02:00
|
|
|
pgstat_start(void)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2003-04-26 04:57:14 +02:00
|
|
|
time_t curtime;
|
2004-06-14 20:08:19 +02:00
|
|
|
pid_t pgStatPid;
|
2003-04-26 04:57:14 +02:00
|
|
|
|
2001-07-05 17:19:40 +02:00
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Check that the socket is there, else pgstat_init failed and we can do
|
|
|
|
* nothing useful.
|
2001-07-05 17:19:40 +02:00
|
|
|
*/
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET)
|
2004-06-14 20:08:19 +02:00
|
|
|
return 0;
|
2001-07-05 17:19:40 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Do nothing if too soon since last collector start. This is a safety
|
|
|
|
* valve to protect against continuous respawn attempts if the collector
|
|
|
|
* is dying immediately at launch. Note that since we will be re-called
|
|
|
|
* from the postmaster main loop, we will get another chance later.
|
2003-04-26 04:57:14 +02:00
|
|
|
*/
|
|
|
|
curtime = time(NULL);
|
|
|
|
if ((unsigned int) (curtime - last_pgstat_start_time) <
|
|
|
|
(unsigned int) PGSTAT_RESTART_INTERVAL)
|
2004-06-14 20:08:19 +02:00
|
|
|
return 0;
|
2003-04-26 04:57:14 +02:00
|
|
|
last_pgstat_start_time = curtime;
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2004-06-14 20:08:19 +02:00
|
|
|
* Okay, fork off the collector.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2004-01-07 00:15:22 +01:00
|
|
|
#ifdef EXEC_BACKEND
|
2006-06-29 22:00:08 +02:00
|
|
|
switch ((pgStatPid = pgstat_forkexec()))
|
2004-01-07 00:15:22 +01:00
|
|
|
#else
|
2005-04-08 02:55:07 +02:00
|
|
|
switch ((pgStatPid = fork_process()))
|
2004-01-07 00:15:22 +01:00
|
|
|
#endif
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
|
|
|
case -1:
|
2003-07-22 21:00:12 +02:00
|
|
|
ereport(LOG,
|
2006-06-29 22:00:08 +02:00
|
|
|
(errmsg("could not fork statistics collector: %m")));
|
2004-06-14 20:08:19 +02:00
|
|
|
return 0;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2004-01-07 00:15:22 +01:00
|
|
|
#ifndef EXEC_BACKEND
|
2001-06-22 21:18:36 +02:00
|
|
|
case 0:
|
2004-01-07 00:15:22 +01:00
|
|
|
/* in postmaster child ... */
|
2004-05-30 00:48:23 +02:00
|
|
|
/* Close the postmaster's sockets */
|
2004-08-06 01:32:13 +02:00
|
|
|
ClosePostmasterPorts(false);
|
2004-01-07 00:15:22 +01:00
|
|
|
|
2006-01-04 22:06:32 +01:00
|
|
|
/* Lose the postmaster's on-exit routines */
|
|
|
|
on_exit_reset();
|
|
|
|
|
2004-01-07 00:15:22 +01:00
|
|
|
/* Drop our connection to postmaster's shared memory, as well */
|
|
|
|
PGSharedMemoryDetach();
|
|
|
|
|
2006-06-29 22:00:08 +02:00
|
|
|
PgstatCollectorMain(0, NULL);
|
2001-06-22 21:18:36 +02:00
|
|
|
break;
|
2004-01-07 00:15:22 +01:00
|
|
|
#endif
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
default:
|
2004-06-14 20:08:19 +02:00
|
|
|
return (int) pgStatPid;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2004-06-14 20:08:19 +02:00
|
|
|
/* shouldn't get here */
|
|
|
|
return 0;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2007-11-15 22:14:46 +01:00
|
|
|
void
|
|
|
|
allow_immediate_pgstat_restart(void)
|
2007-03-22 20:53:31 +01:00
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
last_pgstat_start_time = 0;
|
2007-03-22 20:53:31 +01:00
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/* ------------------------------------------------------------
|
|
|
|
* Public functions used by backends follow
|
2001-10-25 07:50:21 +02:00
|
|
|
*------------------------------------------------------------
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
2008-05-15 02:17:41 +02:00
|
|
|
* pgstat_report_stat() -
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2007-04-30 05:23:49 +02:00
|
|
|
* Called from tcop/postgres.c to send the so far collected per-table
|
2008-05-15 02:17:41 +02:00
|
|
|
* and function usage statistics to the collector. Note that this is
|
|
|
|
* called only when not within a transaction, so it is fair to use
|
|
|
|
* transaction stop time as an approximation of current time.
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
2008-05-15 02:17:41 +02:00
|
|
|
pgstat_report_stat(bool force)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2007-05-27 05:50:39 +02:00
|
|
|
/* we assume this inits to all zeroes: */
|
|
|
|
static const PgStat_TableCounts all_zeroes;
|
2007-11-15 22:14:46 +01:00
|
|
|
static TimestampTz last_report = 0;
|
2007-05-27 05:50:39 +02:00
|
|
|
|
2007-04-30 05:23:49 +02:00
|
|
|
TimestampTz now;
|
2007-05-27 05:50:39 +02:00
|
|
|
PgStat_MsgTabstat regular_msg;
|
|
|
|
PgStat_MsgTabstat shared_msg;
|
|
|
|
TabStatusArray *tsa;
|
|
|
|
int i;
|
2007-04-30 05:23:49 +02:00
|
|
|
|
|
|
|
/* Don't expend a clock check if nothing to do */
|
2008-11-03 02:17:08 +01:00
|
|
|
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0)
|
|
|
|
&& !have_function_stats)
|
2007-04-30 05:23:49 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
|
2007-04-30 18:37:08 +02:00
|
|
|
* msec since we last sent one, or the caller wants to force stats out.
|
2007-04-30 05:23:49 +02:00
|
|
|
*/
|
|
|
|
now = GetCurrentTransactionStopTimestamp();
|
2007-04-30 18:37:08 +02:00
|
|
|
if (!force &&
|
|
|
|
!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
|
2007-04-30 05:23:49 +02:00
|
|
|
return;
|
|
|
|
last_report = now;
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2007-05-27 05:50:39 +02:00
|
|
|
* Scan through the TabStatusArray struct(s) to find tables that actually
|
|
|
|
* have counts, and build messages to send. We have to separate shared
|
2007-11-15 22:14:46 +01:00
|
|
|
* relations from regular ones because the databaseid field in the message
|
|
|
|
* header has to depend on that.
|
2007-05-27 05:50:39 +02:00
|
|
|
*/
|
|
|
|
regular_msg.m_databaseid = MyDatabaseId;
|
|
|
|
shared_msg.m_databaseid = InvalidOid;
|
|
|
|
regular_msg.m_nentries = 0;
|
|
|
|
shared_msg.m_nentries = 0;
|
|
|
|
|
|
|
|
for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
|
|
|
|
{
|
|
|
|
for (i = 0; i < tsa->tsa_used; i++)
|
|
|
|
{
|
|
|
|
PgStat_TableStatus *entry = &tsa->tsa_entries[i];
|
|
|
|
PgStat_MsgTabstat *this_msg;
|
|
|
|
PgStat_TableEntry *this_ent;
|
|
|
|
|
|
|
|
/* Shouldn't have any pending transaction-dependent counts */
|
|
|
|
Assert(entry->trans == NULL);
|
|
|
|
|
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Ignore entries that didn't accumulate any actual counts, such
|
|
|
|
* as indexes that were opened by the planner but not used.
|
2007-05-27 05:50:39 +02:00
|
|
|
*/
|
|
|
|
if (memcmp(&entry->t_counts, &all_zeroes,
|
|
|
|
sizeof(PgStat_TableCounts)) == 0)
|
|
|
|
continue;
|
2007-11-15 22:14:46 +01:00
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
/*
|
|
|
|
* OK, insert data into the appropriate message, and send if full.
|
|
|
|
*/
|
|
|
|
this_msg = entry->t_shared ? &shared_msg : ®ular_msg;
|
|
|
|
this_ent = &this_msg->m_entry[this_msg->m_nentries];
|
|
|
|
this_ent->t_id = entry->t_id;
|
|
|
|
memcpy(&this_ent->t_counts, &entry->t_counts,
|
|
|
|
sizeof(PgStat_TableCounts));
|
|
|
|
if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES)
|
|
|
|
{
|
|
|
|
pgstat_send_tabstat(this_msg);
|
|
|
|
this_msg->m_nentries = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* zero out TableStatus structs after use */
|
|
|
|
MemSet(tsa->tsa_entries, 0,
|
|
|
|
tsa->tsa_used * sizeof(PgStat_TableStatus));
|
|
|
|
tsa->tsa_used = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Send partial messages. If force is true, make sure that any pending
|
|
|
|
* xact commit/abort gets counted, even if no table stats to send.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2007-05-27 05:50:39 +02:00
|
|
|
if (regular_msg.m_nentries > 0 ||
|
|
|
|
(force && (pgStatXactCommit > 0 || pgStatXactRollback > 0)))
|
|
|
|
pgstat_send_tabstat(®ular_msg);
|
|
|
|
if (shared_msg.m_nentries > 0)
|
|
|
|
pgstat_send_tabstat(&shared_msg);
|
2008-05-15 02:17:41 +02:00
|
|
|
|
|
|
|
/* Now, send function statistics */
|
|
|
|
pgstat_send_funcstats();
|
2007-04-21 06:10:53 +02:00
|
|
|
}
|
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
/*
|
2008-05-15 02:17:41 +02:00
|
|
|
* Subroutine for pgstat_report_stat: finish and send a tabstat message
|
2007-05-27 05:50:39 +02:00
|
|
|
*/
|
2007-04-21 06:10:53 +02:00
|
|
|
static void
|
2007-05-27 05:50:39 +02:00
|
|
|
pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
|
2007-04-21 06:10:53 +02:00
|
|
|
{
|
2007-05-27 05:50:39 +02:00
|
|
|
int n;
|
|
|
|
int len;
|
2003-08-12 18:21:18 +02:00
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
/* It's unlikely we'd get here with no socket, but maybe not impossible */
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET)
|
2007-05-27 05:50:39 +02:00
|
|
|
return;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
/*
|
|
|
|
* Report accumulated xact commit/rollback whenever we send a normal
|
|
|
|
* tabstat message
|
|
|
|
*/
|
|
|
|
if (OidIsValid(tsmsg->m_databaseid))
|
|
|
|
{
|
2003-08-12 18:21:18 +02:00
|
|
|
tsmsg->m_xact_commit = pgStatXactCommit;
|
|
|
|
tsmsg->m_xact_rollback = pgStatXactRollback;
|
2001-10-25 07:50:21 +02:00
|
|
|
pgStatXactCommit = 0;
|
2001-06-22 21:18:36 +02:00
|
|
|
pgStatXactRollback = 0;
|
2007-05-27 05:50:39 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
tsmsg->m_xact_commit = 0;
|
|
|
|
tsmsg->m_xact_rollback = 0;
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
n = tsmsg->m_nentries;
|
|
|
|
len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
|
|
|
|
n * sizeof(PgStat_TableEntry);
|
2005-07-29 21:30:09 +02:00
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT);
|
|
|
|
pgstat_send(tsmsg, len);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2008-05-15 02:17:41 +02:00
|
|
|
/*
|
|
|
|
* Subroutine for pgstat_report_stat: populate and send a function stat message
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_send_funcstats(void)
|
|
|
|
{
|
|
|
|
/* we assume this inits to all zeroes: */
|
|
|
|
static const PgStat_FunctionCounts all_zeroes;
|
|
|
|
|
|
|
|
PgStat_MsgFuncstat msg;
|
|
|
|
PgStat_BackendFunctionEntry *entry;
|
|
|
|
HASH_SEQ_STATUS fstat;
|
|
|
|
|
|
|
|
if (pgStatFunctions == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_FUNCSTAT);
|
|
|
|
msg.m_databaseid = MyDatabaseId;
|
|
|
|
msg.m_nentries = 0;
|
|
|
|
|
|
|
|
hash_seq_init(&fstat, pgStatFunctions);
|
|
|
|
while ((entry = (PgStat_BackendFunctionEntry *) hash_seq_search(&fstat)) != NULL)
|
|
|
|
{
|
|
|
|
PgStat_FunctionEntry *m_ent;
|
|
|
|
|
|
|
|
/* Skip it if no counts accumulated since last time */
|
|
|
|
if (memcmp(&entry->f_counts, &all_zeroes,
|
|
|
|
sizeof(PgStat_FunctionCounts)) == 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* need to convert format of time accumulators */
|
|
|
|
m_ent = &msg.m_entry[msg.m_nentries];
|
|
|
|
m_ent->f_id = entry->f_id;
|
|
|
|
m_ent->f_numcalls = entry->f_counts.f_numcalls;
|
|
|
|
m_ent->f_time = INSTR_TIME_GET_MICROSEC(entry->f_counts.f_time);
|
|
|
|
m_ent->f_time_self = INSTR_TIME_GET_MICROSEC(entry->f_counts.f_time_self);
|
|
|
|
|
|
|
|
if (++msg.m_nentries >= PGSTAT_NUM_FUNCENTRIES)
|
|
|
|
{
|
|
|
|
pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
|
|
|
|
msg.m_nentries * sizeof(PgStat_FunctionEntry));
|
|
|
|
msg.m_nentries = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* reset the entry's counts */
|
|
|
|
MemSet(&entry->f_counts, 0, sizeof(PgStat_FunctionCounts));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (msg.m_nentries > 0)
|
|
|
|
pgstat_send(&msg, offsetof(PgStat_MsgFuncstat, m_entry[0]) +
|
|
|
|
msg.m_nentries * sizeof(PgStat_FunctionEntry));
|
2008-11-03 02:17:08 +01:00
|
|
|
|
|
|
|
have_function_stats = false;
|
2008-05-15 02:17:41 +02:00
|
|
|
}
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/* ----------
|
2008-05-15 02:17:41 +02:00
|
|
|
* pgstat_vacuum_stat() -
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
|
|
|
* Will tell the collector about objects he can get rid of.
|
|
|
|
* ----------
|
|
|
|
*/
|
2006-01-18 21:35:06 +01:00
|
|
|
void
|
2008-05-15 02:17:41 +02:00
|
|
|
pgstat_vacuum_stat(void)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2007-01-12 00:06:03 +01:00
|
|
|
HTAB *htab;
|
2006-01-18 21:35:06 +01:00
|
|
|
PgStat_MsgTabpurge msg;
|
2008-05-15 02:17:41 +02:00
|
|
|
PgStat_MsgFuncpurge f_msg;
|
2001-10-25 07:50:21 +02:00
|
|
|
HASH_SEQ_STATUS hstat;
|
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
PgStat_StatTabEntry *tabentry;
|
2008-05-15 02:17:41 +02:00
|
|
|
PgStat_StatFuncEntry *funcentry;
|
2001-10-25 07:50:21 +02:00
|
|
|
int len;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET)
|
2006-01-18 21:35:06 +01:00
|
|
|
return;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If not done for this transaction, read the statistics collector stats
|
|
|
|
* file into some hash tables.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2004-07-01 02:52:04 +02:00
|
|
|
backend_read_statsfile();
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2006-01-18 21:35:06 +01:00
|
|
|
* Read pg_database and make a list of OIDs of all existing databases
|
|
|
|
*/
|
2007-01-12 00:06:03 +01:00
|
|
|
htab = pgstat_collect_oids(DatabaseRelationId);
|
2006-01-18 21:35:06 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Search the database hash table for dead databases and tell the
|
|
|
|
* collector to drop them.
|
|
|
|
*/
|
|
|
|
hash_seq_init(&hstat, pgStatDBHash);
|
|
|
|
while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
|
|
|
|
{
|
|
|
|
Oid dbid = dbentry->databaseid;
|
|
|
|
|
2007-01-12 00:06:03 +01:00
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
2007-06-07 20:53:17 +02:00
|
|
|
/* the DB entry for shared tables (with InvalidOid) is never dropped */
|
|
|
|
if (OidIsValid(dbid) &&
|
|
|
|
hash_search(htab, (void *) &dbid, HASH_FIND, NULL) == NULL)
|
2006-01-18 21:35:06 +01:00
|
|
|
pgstat_drop_database(dbid);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Clean up */
|
2007-01-12 00:06:03 +01:00
|
|
|
hash_destroy(htab);
|
2006-01-18 21:35:06 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Lookup our own database entry; if not found, nothing more to do.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2001-10-25 07:50:21 +02:00
|
|
|
dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
|
|
|
|
(void *) &MyDatabaseId,
|
|
|
|
HASH_FIND, NULL);
|
2006-01-18 21:35:06 +01:00
|
|
|
if (dbentry == NULL || dbentry->tables == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Similarly to above, make a list of all known relations in this DB.
|
|
|
|
*/
|
2007-01-12 00:06:03 +01:00
|
|
|
htab = pgstat_collect_oids(RelationRelationId);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize our messages table counter to zero
|
|
|
|
*/
|
|
|
|
msg.m_nentries = 0;
|
|
|
|
|
|
|
|
/*
|
2005-07-29 21:30:09 +02:00
|
|
|
* Check for all tables listed in stats hashtable if they still exist.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2001-10-25 07:50:21 +02:00
|
|
|
hash_seq_init(&hstat, dbentry->tables);
|
2001-10-05 19:28:13 +02:00
|
|
|
while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != NULL)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2007-01-12 00:06:03 +01:00
|
|
|
Oid tabid = tabentry->tableid;
|
|
|
|
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
|
|
|
if (hash_search(htab, (void *) &tabid, HASH_FIND, NULL) != NULL)
|
2001-06-22 21:18:36 +02:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
2006-01-18 21:35:06 +01:00
|
|
|
* Not there, so add this table's Oid to the message
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2007-01-12 00:06:03 +01:00
|
|
|
msg.m_tableid[msg.m_nentries++] = tabid;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2006-01-18 21:35:06 +01:00
|
|
|
* If the message is full, send it out and reinitialize to empty
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
if (msg.m_nentries >= PGSTAT_NUM_TABPURGE)
|
|
|
|
{
|
2001-11-26 23:31:08 +01:00
|
|
|
len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
|
2002-09-04 22:31:48 +02:00
|
|
|
+msg.m_nentries * sizeof(Oid);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
|
2005-07-29 21:30:09 +02:00
|
|
|
msg.m_databaseid = MyDatabaseId;
|
2001-06-22 21:18:36 +02:00
|
|
|
pgstat_send(&msg, len);
|
|
|
|
|
|
|
|
msg.m_nentries = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Send the rest
|
|
|
|
*/
|
|
|
|
if (msg.m_nentries > 0)
|
|
|
|
{
|
2001-11-26 23:31:08 +01:00
|
|
|
len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
|
2002-09-04 22:31:48 +02:00
|
|
|
+msg.m_nentries * sizeof(Oid);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
|
2005-05-11 03:41:41 +02:00
|
|
|
msg.m_databaseid = MyDatabaseId;
|
2001-06-22 21:18:36 +02:00
|
|
|
pgstat_send(&msg, len);
|
|
|
|
}
|
|
|
|
|
2006-01-18 21:35:06 +01:00
|
|
|
/* Clean up */
|
2007-01-12 00:06:03 +01:00
|
|
|
hash_destroy(htab);
|
2008-05-15 02:17:41 +02:00
|
|
|
|
|
|
|
/*
|
2008-12-08 16:44:54 +01:00
|
|
|
* Now repeat the above steps for functions. However, we needn't bother
|
|
|
|
* in the common case where no function stats are being collected.
|
2008-05-15 02:17:41 +02:00
|
|
|
*/
|
2008-12-08 16:44:54 +01:00
|
|
|
if (dbentry->functions != NULL &&
|
|
|
|
hash_get_num_entries(dbentry->functions) > 0)
|
|
|
|
{
|
|
|
|
htab = pgstat_collect_oids(ProcedureRelationId);
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2008-12-08 16:44:54 +01:00
|
|
|
pgstat_setheader(&f_msg.m_hdr, PGSTAT_MTYPE_FUNCPURGE);
|
|
|
|
f_msg.m_databaseid = MyDatabaseId;
|
|
|
|
f_msg.m_nentries = 0;
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2008-12-08 16:44:54 +01:00
|
|
|
hash_seq_init(&hstat, dbentry->functions);
|
|
|
|
while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&hstat)) != NULL)
|
|
|
|
{
|
|
|
|
Oid funcid = funcentry->functionid;
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2008-12-08 16:44:54 +01:00
|
|
|
CHECK_FOR_INTERRUPTS();
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2008-12-08 16:44:54 +01:00
|
|
|
if (hash_search(htab, (void *) &funcid, HASH_FIND, NULL) != NULL)
|
|
|
|
continue;
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2008-12-08 16:44:54 +01:00
|
|
|
/*
|
|
|
|
* Not there, so add this function's Oid to the message
|
|
|
|
*/
|
|
|
|
f_msg.m_functionid[f_msg.m_nentries++] = funcid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the message is full, send it out and reinitialize to empty
|
|
|
|
*/
|
|
|
|
if (f_msg.m_nentries >= PGSTAT_NUM_FUNCPURGE)
|
|
|
|
{
|
|
|
|
len = offsetof(PgStat_MsgFuncpurge, m_functionid[0])
|
|
|
|
+f_msg.m_nentries * sizeof(Oid);
|
|
|
|
|
|
|
|
pgstat_send(&f_msg, len);
|
|
|
|
|
|
|
|
f_msg.m_nentries = 0;
|
|
|
|
}
|
|
|
|
}
|
2008-05-15 02:17:41 +02:00
|
|
|
|
|
|
|
/*
|
2008-12-08 16:44:54 +01:00
|
|
|
* Send the rest
|
2008-05-15 02:17:41 +02:00
|
|
|
*/
|
2008-12-08 16:44:54 +01:00
|
|
|
if (f_msg.m_nentries > 0)
|
2008-05-15 02:17:41 +02:00
|
|
|
{
|
|
|
|
len = offsetof(PgStat_MsgFuncpurge, m_functionid[0])
|
|
|
|
+f_msg.m_nentries * sizeof(Oid);
|
|
|
|
|
|
|
|
pgstat_send(&f_msg, len);
|
|
|
|
}
|
|
|
|
|
2008-12-08 16:44:54 +01:00
|
|
|
hash_destroy(htab);
|
2008-05-15 02:17:41 +02:00
|
|
|
}
|
2007-01-12 00:06:03 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_collect_oids() -
|
|
|
|
*
|
2008-05-15 02:17:41 +02:00
|
|
|
* Collect the OIDs of all objects listed in the specified system catalog
|
|
|
|
* into a temporary hash table. Caller should hash_destroy the result
|
2010-02-26 03:01:40 +01:00
|
|
|
* when done with it. (However, we make the table in CurrentMemoryContext
|
2009-12-27 20:40:07 +01:00
|
|
|
* so that it will be freed properly in event of an error.)
|
2007-01-12 00:06:03 +01:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static HTAB *
|
|
|
|
pgstat_collect_oids(Oid catalogid)
|
|
|
|
{
|
|
|
|
HTAB *htab;
|
|
|
|
HASHCTL hash_ctl;
|
|
|
|
Relation rel;
|
|
|
|
HeapScanDesc scan;
|
|
|
|
HeapTuple tup;
|
|
|
|
|
|
|
|
memset(&hash_ctl, 0, sizeof(hash_ctl));
|
|
|
|
hash_ctl.keysize = sizeof(Oid);
|
|
|
|
hash_ctl.entrysize = sizeof(Oid);
|
|
|
|
hash_ctl.hash = oid_hash;
|
2009-12-27 20:40:07 +01:00
|
|
|
hash_ctl.hcxt = CurrentMemoryContext;
|
2007-01-12 00:06:03 +01:00
|
|
|
htab = hash_create("Temporary table of OIDs",
|
|
|
|
PGSTAT_TAB_HASH_SIZE,
|
|
|
|
&hash_ctl,
|
2009-12-27 20:40:07 +01:00
|
|
|
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
2007-01-12 00:06:03 +01:00
|
|
|
|
|
|
|
rel = heap_open(catalogid, AccessShareLock);
|
|
|
|
scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
|
|
|
|
while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
Oid thisoid = HeapTupleGetOid(tup);
|
2007-01-12 00:06:03 +01:00
|
|
|
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
|
|
|
(void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL);
|
|
|
|
}
|
|
|
|
heap_endscan(scan);
|
|
|
|
heap_close(rel, AccessShareLock);
|
|
|
|
|
|
|
|
return htab;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_drop_database() -
|
|
|
|
*
|
|
|
|
* Tell the collector that we just dropped a database.
|
2006-01-18 21:35:06 +01:00
|
|
|
* (If the message gets lost, we will still clean the dead DB eventually
|
2008-05-15 02:17:41 +02:00
|
|
|
* via future invocations of pgstat_vacuum_stat().)
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2007-02-09 17:12:19 +01:00
|
|
|
void
|
2001-06-22 21:18:36 +02:00
|
|
|
pgstat_drop_database(Oid databaseid)
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
PgStat_MsgDropdb msg;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET)
|
2001-06-22 21:18:36 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DROPDB);
|
2005-05-11 03:41:41 +02:00
|
|
|
msg.m_databaseid = databaseid;
|
2001-06-22 21:18:36 +02:00
|
|
|
pgstat_send(&msg, sizeof(msg));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-01-18 21:35:06 +01:00
|
|
|
/* ----------
|
|
|
|
* pgstat_drop_relation() -
|
|
|
|
*
|
|
|
|
* Tell the collector that we just dropped a relation.
|
|
|
|
* (If the message gets lost, we will still clean the dead entry eventually
|
2008-05-15 02:17:41 +02:00
|
|
|
* via future invocations of pgstat_vacuum_stat().)
|
2007-07-09 00:23:16 +02:00
|
|
|
*
|
|
|
|
* Currently not used for lack of any good place to call it; we rely
|
2008-05-15 02:17:41 +02:00
|
|
|
* entirely on pgstat_vacuum_stat() to clean out stats for dead rels.
|
2006-01-18 21:35:06 +01:00
|
|
|
* ----------
|
|
|
|
*/
|
2007-07-09 00:23:16 +02:00
|
|
|
#ifdef NOT_USED
|
2006-01-18 21:35:06 +01:00
|
|
|
void
|
|
|
|
pgstat_drop_relation(Oid relid)
|
|
|
|
{
|
|
|
|
PgStat_MsgTabpurge msg;
|
|
|
|
int len;
|
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET)
|
2006-01-18 21:35:06 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
msg.m_tableid[0] = relid;
|
|
|
|
msg.m_nentries = 1;
|
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
len = offsetof(PgStat_MsgTabpurge, m_tableid[0]) +sizeof(Oid);
|
2006-01-18 21:35:06 +01:00
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
|
|
|
|
msg.m_databaseid = MyDatabaseId;
|
|
|
|
pgstat_send(&msg, len);
|
|
|
|
}
|
2007-11-15 22:14:46 +01:00
|
|
|
#endif /* NOT_USED */
|
2006-01-18 21:35:06 +01:00
|
|
|
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_reset_counters() -
|
|
|
|
*
|
|
|
|
* Tell the statistics collector to reset counters for our database.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_reset_counters(void)
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
PgStat_MsgResetcounter msg;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET)
|
2001-06-22 21:18:36 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
if (!superuser())
|
2003-07-22 21:00:12 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2005-10-15 04:49:52 +02:00
|
|
|
errmsg("must be superuser to reset statistics counters")));
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETCOUNTER);
|
2005-05-11 03:41:41 +02:00
|
|
|
msg.m_databaseid = MyDatabaseId;
|
2001-06-22 21:18:36 +02:00
|
|
|
pgstat_send(&msg, sizeof(msg));
|
|
|
|
}
|
|
|
|
|
2010-01-19 15:11:32 +01:00
|
|
|
/* ----------
|
|
|
|
* pgstat_reset_shared_counters() -
|
|
|
|
*
|
|
|
|
* Tell the statistics collector to reset cluster-wide shared counters.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_reset_shared_counters(const char *target)
|
|
|
|
{
|
|
|
|
PgStat_MsgResetsharedcounter msg;
|
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET)
|
2010-01-19 15:11:32 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
if (!superuser())
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
|
|
|
errmsg("must be superuser to reset statistics counters")));
|
|
|
|
|
|
|
|
if (strcmp(target, "bgwriter") == 0)
|
|
|
|
msg.m_resettarget = RESET_BGWRITER;
|
|
|
|
else
|
|
|
|
ereport(ERROR,
|
2010-03-12 23:19:19 +01:00
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("unrecognized reset target: \"%s\"", target),
|
|
|
|
errhint("Target must be \"bgwriter\".")));
|
2010-01-19 15:11:32 +01:00
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
|
|
|
|
pgstat_send(&msg, sizeof(msg));
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2010-01-28 15:25:41 +01:00
|
|
|
/* ----------
|
|
|
|
* pgstat_reset_single_counter() -
|
|
|
|
*
|
|
|
|
* Tell the statistics collector to reset a single counter.
|
|
|
|
* ----------
|
|
|
|
*/
|
2010-02-26 03:01:40 +01:00
|
|
|
void
|
|
|
|
pgstat_reset_single_counter(Oid objoid, PgStat_Single_Reset_Type type)
|
2010-01-28 15:25:41 +01:00
|
|
|
{
|
|
|
|
PgStat_MsgResetsinglecounter msg;
|
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET)
|
2010-01-28 15:25:41 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
if (!superuser())
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
|
|
|
errmsg("must be superuser to reset statistics counters")));
|
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSINGLECOUNTER);
|
|
|
|
msg.m_databaseid = MyDatabaseId;
|
|
|
|
msg.m_resettype = type;
|
|
|
|
msg.m_objectid = objoid;
|
|
|
|
|
|
|
|
pgstat_send(&msg, sizeof(msg));
|
|
|
|
}
|
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_report_autovac() -
|
|
|
|
*
|
|
|
|
* Called from autovacuum.c to report startup of an autovacuum process.
|
|
|
|
* We are called before InitPostgres is done, so can't rely on MyDatabaseId;
|
|
|
|
* the db OID must be passed in, instead.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_report_autovac(Oid dboid)
|
|
|
|
{
|
|
|
|
PgStat_MsgAutovacStart msg;
|
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET)
|
2006-06-19 03:51:22 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_AUTOVAC_START);
|
|
|
|
msg.m_databaseid = dboid;
|
|
|
|
msg.m_start_time = GetCurrentTimestamp();
|
|
|
|
|
|
|
|
pgstat_send(&msg, sizeof(msg));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ---------
|
|
|
|
* pgstat_report_vacuum() -
|
|
|
|
*
|
|
|
|
* Tell the collector about the table we just vacuumed.
|
|
|
|
* ---------
|
|
|
|
*/
|
|
|
|
void
|
Fix VACUUM so that it always updates pg_class.reltuples/relpages.
When we added the ability for vacuum to skip heap pages by consulting the
visibility map, we made it just not update the reltuples/relpages
statistics if it skipped any pages. But this could leave us with extremely
out-of-date stats for a table that contains any unchanging areas,
especially for TOAST tables which never get processed by ANALYZE. In
particular this could result in autovacuum making poor decisions about when
to process the table, as in recent report from Florian Helmberger. And in
general it's a bad idea to not update the stats at all. Instead, use the
previous values of reltuples/relpages as an estimate of the tuple density
in unvisited pages. This approach results in a "moving average" estimate
of reltuples, which should converge to the correct value over multiple
VACUUM and ANALYZE cycles even when individual measurements aren't very
good.
This new method for updating reltuples is used by both VACUUM and ANALYZE,
with the result that we no longer need the grotty interconnections that
caused ANALYZE to not update the stats depending on what had happened
in the parent VACUUM command.
Also, fix the logic for skipping all-visible pages during VACUUM so that it
looks ahead rather than behind to decide what to do, as per a suggestion
from Greg Stark. This eliminates useless scanning of all-visible pages at
the start of the relation or just after a not-all-visible page. In
particular, the first few pages of the relation will not be invariably
included in the scanned pages, which seems to help in not overweighting
them in the reltuples estimate.
Back-patch to 8.4, where the visibility map was introduced.
2011-05-30 23:05:26 +02:00
|
|
|
pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter tuples)
|
2006-06-19 03:51:22 +02:00
|
|
|
{
|
|
|
|
PgStat_MsgVacuum msg;
|
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
|
2006-06-19 03:51:22 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUM);
|
|
|
|
msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
|
|
|
|
msg.m_tableoid = tableoid;
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
msg.m_autovacuum = IsAutoVacuumWorkerProcess();
|
2006-06-19 03:51:22 +02:00
|
|
|
msg.m_vacuumtime = GetCurrentTimestamp();
|
|
|
|
msg.m_tuples = tuples;
|
|
|
|
pgstat_send(&msg, sizeof(msg));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* --------
|
|
|
|
* pgstat_report_analyze() -
|
|
|
|
*
|
|
|
|
* Tell the collector about the table we just analyzed.
|
|
|
|
* --------
|
|
|
|
*/
|
|
|
|
void
|
Fix VACUUM so that it always updates pg_class.reltuples/relpages.
When we added the ability for vacuum to skip heap pages by consulting the
visibility map, we made it just not update the reltuples/relpages
statistics if it skipped any pages. But this could leave us with extremely
out-of-date stats for a table that contains any unchanging areas,
especially for TOAST tables which never get processed by ANALYZE. In
particular this could result in autovacuum making poor decisions about when
to process the table, as in recent report from Florian Helmberger. And in
general it's a bad idea to not update the stats at all. Instead, use the
previous values of reltuples/relpages as an estimate of the tuple density
in unvisited pages. This approach results in a "moving average" estimate
of reltuples, which should converge to the correct value over multiple
VACUUM and ANALYZE cycles even when individual measurements aren't very
good.
This new method for updating reltuples is used by both VACUUM and ANALYZE,
with the result that we no longer need the grotty interconnections that
caused ANALYZE to not update the stats depending on what had happened
in the parent VACUUM command.
Also, fix the logic for skipping all-visible pages during VACUUM so that it
looks ahead rather than behind to decide what to do, as per a suggestion
from Greg Stark. This eliminates useless scanning of all-visible pages at
the start of the relation or just after a not-all-visible page. In
particular, the first few pages of the relation will not be invariably
included in the scanned pages, which seems to help in not overweighting
them in the reltuples estimate.
Back-patch to 8.4, where the visibility map was introduced.
2011-05-30 23:05:26 +02:00
|
|
|
pgstat_report_analyze(Relation rel,
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
PgStat_Counter livetuples, PgStat_Counter deadtuples)
|
2006-06-19 03:51:22 +02:00
|
|
|
{
|
|
|
|
PgStat_MsgAnalyze msg;
|
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
|
2006-06-19 03:51:22 +02:00
|
|
|
return;
|
|
|
|
|
2008-04-03 18:27:25 +02:00
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* Unlike VACUUM, ANALYZE might be running inside a transaction that has
|
|
|
|
* already inserted and/or deleted rows in the target table. ANALYZE will
|
|
|
|
* have counted such rows as live or dead respectively. Because we will
|
|
|
|
* report our counts of such rows at transaction end, we should subtract
|
|
|
|
* off these counts from what we send to the collector now, else they'll
|
|
|
|
* be double-counted after commit. (This approach also ensures that the
|
|
|
|
* collector ends up with the right numbers if we abort instead of
|
|
|
|
* committing.)
|
2008-04-03 18:27:25 +02:00
|
|
|
*/
|
|
|
|
if (rel->pgstat_info != NULL)
|
|
|
|
{
|
|
|
|
PgStat_TableXactStatus *trans;
|
|
|
|
|
|
|
|
for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
|
|
|
|
{
|
|
|
|
livetuples -= trans->tuples_inserted - trans->tuples_deleted;
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
deadtuples -= trans->tuples_updated + trans->tuples_deleted;
|
2008-04-03 18:27:25 +02:00
|
|
|
}
|
|
|
|
/* count stuff inserted by already-aborted subxacts, too */
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples;
|
2008-04-03 18:27:25 +02:00
|
|
|
/* Since ANALYZE's counts are estimates, we could have underflowed */
|
|
|
|
livetuples = Max(livetuples, 0);
|
|
|
|
deadtuples = Max(deadtuples, 0);
|
|
|
|
}
|
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE);
|
2008-04-03 18:27:25 +02:00
|
|
|
msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId;
|
|
|
|
msg.m_tableoid = RelationGetRelid(rel);
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
msg.m_autovacuum = IsAutoVacuumWorkerProcess();
|
2006-06-19 03:51:22 +02:00
|
|
|
msg.m_analyzetime = GetCurrentTimestamp();
|
|
|
|
msg.m_live_tuples = livetuples;
|
|
|
|
msg.m_dead_tuples = deadtuples;
|
|
|
|
pgstat_send(&msg, sizeof(msg));
|
|
|
|
}
|
|
|
|
|
2011-01-03 12:46:03 +01:00
|
|
|
/* --------
|
|
|
|
* pgstat_report_recovery_conflict() -
|
|
|
|
*
|
2011-04-10 17:42:00 +02:00
|
|
|
* Tell the collector about a Hot Standby recovery conflict.
|
2011-01-03 12:46:03 +01:00
|
|
|
* --------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_report_recovery_conflict(int reason)
|
|
|
|
{
|
|
|
|
PgStat_MsgRecoveryConflict msg;
|
|
|
|
|
|
|
|
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
|
|
|
|
return;
|
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RECOVERYCONFLICT);
|
|
|
|
msg.m_databaseid = MyDatabaseId;
|
|
|
|
msg.m_reason = reason;
|
|
|
|
pgstat_send(&msg, sizeof(msg));
|
|
|
|
}
|
2006-06-19 03:51:22 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_ping() -
|
|
|
|
*
|
|
|
|
* Send some junk data to the collector to increase traffic.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_ping(void)
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
PgStat_MsgDummy msg;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET)
|
2001-06-22 21:18:36 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DUMMY);
|
|
|
|
pgstat_send(&msg, sizeof(msg));
|
|
|
|
}
|
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
/* ----------
|
|
|
|
* pgstat_send_inquiry() -
|
|
|
|
*
|
|
|
|
* Notify collector that we need fresh data.
|
|
|
|
* ts specifies the minimum acceptable timestamp for the stats file.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_send_inquiry(TimestampTz ts)
|
|
|
|
{
|
|
|
|
PgStat_MsgInquiry msg;
|
|
|
|
|
|
|
|
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY);
|
|
|
|
msg.inquiry_time = ts;
|
|
|
|
pgstat_send(&msg, sizeof(msg));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-05-15 02:17:41 +02:00
|
|
|
/*
|
|
|
|
* Initialize function call usage data.
|
|
|
|
* Called by the executor before invoking a function.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_init_function_usage(FunctionCallInfoData *fcinfo,
|
|
|
|
PgStat_FunctionCallUsage *fcu)
|
|
|
|
{
|
|
|
|
PgStat_BackendFunctionEntry *htabent;
|
2009-06-11 16:49:15 +02:00
|
|
|
bool found;
|
2008-05-15 02:17:41 +02:00
|
|
|
|
|
|
|
if (pgstat_track_functions <= fcinfo->flinfo->fn_stats)
|
|
|
|
{
|
|
|
|
/* stats not wanted */
|
|
|
|
fcu->fs = NULL;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!pgStatFunctions)
|
|
|
|
{
|
|
|
|
/* First time through - initialize function stat table */
|
|
|
|
HASHCTL hash_ctl;
|
|
|
|
|
|
|
|
memset(&hash_ctl, 0, sizeof(hash_ctl));
|
|
|
|
hash_ctl.keysize = sizeof(Oid);
|
|
|
|
hash_ctl.entrysize = sizeof(PgStat_BackendFunctionEntry);
|
|
|
|
hash_ctl.hash = oid_hash;
|
|
|
|
pgStatFunctions = hash_create("Function stat entries",
|
|
|
|
PGSTAT_FUNCTION_HASH_SIZE,
|
|
|
|
&hash_ctl,
|
|
|
|
HASH_ELEM | HASH_FUNCTION);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get the stats entry for this function, create if necessary */
|
|
|
|
htabent = hash_search(pgStatFunctions, &fcinfo->flinfo->fn_oid,
|
|
|
|
HASH_ENTER, &found);
|
|
|
|
if (!found)
|
|
|
|
MemSet(&htabent->f_counts, 0, sizeof(PgStat_FunctionCounts));
|
|
|
|
|
|
|
|
fcu->fs = &htabent->f_counts;
|
|
|
|
|
|
|
|
/* save stats for this function, later used to compensate for recursion */
|
|
|
|
fcu->save_f_time = htabent->f_counts.f_time;
|
|
|
|
|
|
|
|
/* save current backend-wide total time */
|
|
|
|
fcu->save_total = total_func_time;
|
|
|
|
|
|
|
|
/* get clock time as of function start */
|
|
|
|
INSTR_TIME_SET_CURRENT(fcu->f_start);
|
|
|
|
}
|
|
|
|
|
2010-08-08 18:27:06 +02:00
|
|
|
/*
|
|
|
|
* find_funcstat_entry - find any existing PgStat_BackendFunctionEntry entry
|
|
|
|
* for specified function
|
|
|
|
*
|
|
|
|
* If no entry, return NULL, don't create a new one
|
|
|
|
*/
|
|
|
|
PgStat_BackendFunctionEntry *
|
|
|
|
find_funcstat_entry(Oid func_id)
|
|
|
|
{
|
|
|
|
if (pgStatFunctions == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return (PgStat_BackendFunctionEntry *) hash_search(pgStatFunctions,
|
|
|
|
(void *) &func_id,
|
|
|
|
HASH_FIND, NULL);
|
|
|
|
}
|
|
|
|
|
2008-05-15 02:17:41 +02:00
|
|
|
/*
|
|
|
|
* Calculate function call usage and update stat counters.
|
|
|
|
* Called by the executor after invoking a function.
|
|
|
|
*
|
|
|
|
* In the case of a set-returning function that runs in value-per-call mode,
|
|
|
|
* we will see multiple pgstat_init_function_usage/pgstat_end_function_usage
|
|
|
|
* calls for what the user considers a single call of the function. The
|
|
|
|
* finalize flag should be TRUE on the last call.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_end_function_usage(PgStat_FunctionCallUsage *fcu, bool finalize)
|
|
|
|
{
|
|
|
|
PgStat_FunctionCounts *fs = fcu->fs;
|
|
|
|
instr_time f_total;
|
|
|
|
instr_time f_others;
|
|
|
|
instr_time f_self;
|
|
|
|
|
|
|
|
/* stats not wanted? */
|
|
|
|
if (fs == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* total elapsed time in this function call */
|
|
|
|
INSTR_TIME_SET_CURRENT(f_total);
|
|
|
|
INSTR_TIME_SUBTRACT(f_total, fcu->f_start);
|
|
|
|
|
|
|
|
/* self usage: elapsed minus anything already charged to other calls */
|
|
|
|
f_others = total_func_time;
|
|
|
|
INSTR_TIME_SUBTRACT(f_others, fcu->save_total);
|
|
|
|
f_self = f_total;
|
|
|
|
INSTR_TIME_SUBTRACT(f_self, f_others);
|
|
|
|
|
|
|
|
/* update backend-wide total time */
|
|
|
|
INSTR_TIME_ADD(total_func_time, f_self);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compute the new total f_time as the total elapsed time added to the
|
|
|
|
* pre-call value of f_time. This is necessary to avoid double-counting
|
|
|
|
* any time taken by recursive calls of myself. (We do not need any
|
2009-06-11 16:49:15 +02:00
|
|
|
* similar kluge for self time, since that already excludes any recursive
|
|
|
|
* calls.)
|
2008-05-15 02:17:41 +02:00
|
|
|
*/
|
|
|
|
INSTR_TIME_ADD(f_total, fcu->save_f_time);
|
|
|
|
|
|
|
|
/* update counters in function stats table */
|
|
|
|
if (finalize)
|
|
|
|
fs->f_numcalls++;
|
|
|
|
fs->f_time = f_total;
|
|
|
|
INSTR_TIME_ADD(fs->f_time_self, f_self);
|
2008-11-03 02:17:08 +01:00
|
|
|
|
|
|
|
/* indicate that we have something to send */
|
|
|
|
have_function_stats = true;
|
2008-05-15 02:17:41 +02:00
|
|
|
}
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_initstats() -
|
|
|
|
*
|
2007-05-27 05:50:39 +02:00
|
|
|
* Initialize a relcache entry to count access statistics.
|
|
|
|
* Called whenever a relation is opened.
|
2007-04-21 06:10:53 +02:00
|
|
|
*
|
|
|
|
* We assume that a relcache entry's pgstat_info field is zeroed by
|
|
|
|
* relcache.c when the relcache entry is made; thereafter it is long-lived
|
2007-05-27 05:50:39 +02:00
|
|
|
* data. We can avoid repeated searches of the TabStatus arrays when the
|
2007-04-21 06:10:53 +02:00
|
|
|
* same relation is touched repeatedly within a transaction.
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
2007-05-27 05:50:39 +02:00
|
|
|
pgstat_initstats(Relation rel)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
Oid rel_id = rel->rd_id;
|
2007-05-27 05:50:39 +02:00
|
|
|
char relkind = rel->rd_rel->relkind;
|
|
|
|
|
|
|
|
/* We only count stats for things that have storage */
|
|
|
|
if (!(relkind == RELKIND_RELATION ||
|
|
|
|
relkind == RELKIND_INDEX ||
|
2009-10-03 00:49:50 +02:00
|
|
|
relkind == RELKIND_TOASTVALUE ||
|
|
|
|
relkind == RELKIND_SEQUENCE))
|
2007-05-27 05:50:39 +02:00
|
|
|
{
|
|
|
|
rel->pgstat_info = NULL;
|
|
|
|
return;
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
|
2007-04-21 06:10:53 +02:00
|
|
|
{
|
2007-05-27 05:50:39 +02:00
|
|
|
/* We're not counting at all */
|
|
|
|
rel->pgstat_info = NULL;
|
2001-06-22 21:18:36 +02:00
|
|
|
return;
|
2007-04-21 06:10:53 +02:00
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2007-04-21 06:10:53 +02:00
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* If we already set up this relation in the current transaction, nothing
|
|
|
|
* to do.
|
2007-04-21 06:10:53 +02:00
|
|
|
*/
|
2007-05-27 05:50:39 +02:00
|
|
|
if (rel->pgstat_info != NULL &&
|
|
|
|
rel->pgstat_info->t_id == rel_id)
|
2007-04-21 06:10:53 +02:00
|
|
|
return;
|
2007-05-27 05:50:39 +02:00
|
|
|
|
|
|
|
/* Else find or make the PgStat_TableStatus entry, and update link */
|
|
|
|
rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* get_tabstat_entry - find or create a PgStat_TableStatus entry for rel
|
|
|
|
*/
|
|
|
|
static PgStat_TableStatus *
|
|
|
|
get_tabstat_entry(Oid rel_id, bool isshared)
|
|
|
|
{
|
|
|
|
PgStat_TableStatus *entry;
|
|
|
|
TabStatusArray *tsa;
|
|
|
|
TabStatusArray *prev_tsa;
|
|
|
|
int i;
|
2005-07-29 21:30:09 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2007-05-27 05:50:39 +02:00
|
|
|
* Search the already-used tabstat slots for this relation.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2007-05-27 05:50:39 +02:00
|
|
|
prev_tsa = NULL;
|
|
|
|
for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa = tsa->tsa_next)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2007-05-27 05:50:39 +02:00
|
|
|
for (i = 0; i < tsa->tsa_used; i++)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2007-05-27 05:50:39 +02:00
|
|
|
entry = &tsa->tsa_entries[i];
|
|
|
|
if (entry->t_id == rel_id)
|
|
|
|
return entry;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
if (tsa->tsa_used < TABSTAT_QUANTUM)
|
|
|
|
{
|
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* It must not be present, but we found a free slot instead. Fine,
|
|
|
|
* let's use this one. We assume the entry was already zeroed,
|
|
|
|
* either at creation or after last use.
|
2007-05-27 05:50:39 +02:00
|
|
|
*/
|
|
|
|
entry = &tsa->tsa_entries[tsa->tsa_used++];
|
|
|
|
entry->t_id = rel_id;
|
|
|
|
entry->t_shared = isshared;
|
|
|
|
return entry;
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2007-05-27 05:50:39 +02:00
|
|
|
* We ran out of tabstat slots, so allocate more. Be sure they're zeroed.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2007-05-27 05:50:39 +02:00
|
|
|
tsa = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext,
|
|
|
|
sizeof(TabStatusArray));
|
|
|
|
if (prev_tsa)
|
|
|
|
prev_tsa->tsa_next = tsa;
|
|
|
|
else
|
|
|
|
pgStatTabList = tsa;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the first entry of the new TabStatusArray.
|
|
|
|
*/
|
|
|
|
entry = &tsa->tsa_entries[tsa->tsa_used++];
|
|
|
|
entry->t_id = rel_id;
|
|
|
|
entry->t_shared = isshared;
|
|
|
|
return entry;
|
|
|
|
}
|
|
|
|
|
2010-08-08 18:27:06 +02:00
|
|
|
/*
|
|
|
|
* find_tabstat_entry - find any existing PgStat_TableStatus entry for rel
|
|
|
|
*
|
|
|
|
* If no entry, return NULL, don't create a new one
|
|
|
|
*/
|
|
|
|
PgStat_TableStatus *
|
|
|
|
find_tabstat_entry(Oid rel_id)
|
|
|
|
{
|
|
|
|
PgStat_TableStatus *entry;
|
|
|
|
TabStatusArray *tsa;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
|
|
|
|
{
|
|
|
|
for (i = 0; i < tsa->tsa_used; i++)
|
|
|
|
{
|
|
|
|
entry = &tsa->tsa_entries[i];
|
|
|
|
if (entry->t_id == rel_id)
|
|
|
|
return entry;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Not present */
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
/*
|
|
|
|
* get_tabstat_stack_level - add a new (sub)transaction stack entry if needed
|
|
|
|
*/
|
|
|
|
static PgStat_SubXactStatus *
|
|
|
|
get_tabstat_stack_level(int nest_level)
|
|
|
|
{
|
|
|
|
PgStat_SubXactStatus *xact_state;
|
|
|
|
|
|
|
|
xact_state = pgStatXactStack;
|
|
|
|
if (xact_state == NULL || xact_state->nest_level != nest_level)
|
|
|
|
{
|
|
|
|
xact_state = (PgStat_SubXactStatus *)
|
|
|
|
MemoryContextAlloc(TopTransactionContext,
|
|
|
|
sizeof(PgStat_SubXactStatus));
|
|
|
|
xact_state->nest_level = nest_level;
|
|
|
|
xact_state->prev = pgStatXactStack;
|
|
|
|
xact_state->first = NULL;
|
|
|
|
pgStatXactStack = xact_state;
|
|
|
|
}
|
|
|
|
return xact_state;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* add_tabstat_xact_level - add a new (sub)transaction state record
|
|
|
|
*/
|
|
|
|
static void
|
2007-11-15 23:25:18 +01:00
|
|
|
add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level)
|
2007-05-27 05:50:39 +02:00
|
|
|
{
|
|
|
|
PgStat_SubXactStatus *xact_state;
|
|
|
|
PgStat_TableXactStatus *trans;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* If this is the first rel to be modified at the current nest level, we
|
|
|
|
* first have to push a transaction stack entry.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2007-05-27 05:50:39 +02:00
|
|
|
xact_state = get_tabstat_stack_level(nest_level);
|
|
|
|
|
|
|
|
/* Now make a per-table stack entry */
|
|
|
|
trans = (PgStat_TableXactStatus *)
|
|
|
|
MemoryContextAllocZero(TopTransactionContext,
|
|
|
|
sizeof(PgStat_TableXactStatus));
|
|
|
|
trans->nest_level = nest_level;
|
|
|
|
trans->upper = pgstat_info->trans;
|
|
|
|
trans->parent = pgstat_info;
|
|
|
|
trans->next = xact_state->first;
|
|
|
|
xact_state->first = trans;
|
|
|
|
pgstat_info->trans = trans;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pgstat_count_heap_insert - count a tuple insertion
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_count_heap_insert(Relation rel)
|
|
|
|
{
|
|
|
|
PgStat_TableStatus *pgstat_info = rel->pgstat_info;
|
|
|
|
|
2010-10-12 20:44:25 +02:00
|
|
|
if (pgstat_info != NULL)
|
2007-05-27 05:50:39 +02:00
|
|
|
{
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
/* We have to log the effect at the proper transactional level */
|
2007-11-15 22:14:46 +01:00
|
|
|
int nest_level = GetCurrentTransactionNestLevel();
|
2007-05-27 05:50:39 +02:00
|
|
|
|
|
|
|
if (pgstat_info->trans == NULL ||
|
|
|
|
pgstat_info->trans->nest_level != nest_level)
|
|
|
|
add_tabstat_xact_level(pgstat_info, nest_level);
|
|
|
|
|
|
|
|
pgstat_info->trans->tuples_inserted++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pgstat_count_heap_update - count a tuple update
|
|
|
|
*/
|
|
|
|
void
|
2007-09-20 19:56:33 +02:00
|
|
|
pgstat_count_heap_update(Relation rel, bool hot)
|
2007-05-27 05:50:39 +02:00
|
|
|
{
|
|
|
|
PgStat_TableStatus *pgstat_info = rel->pgstat_info;
|
|
|
|
|
2010-10-12 20:44:25 +02:00
|
|
|
if (pgstat_info != NULL)
|
2007-05-27 05:50:39 +02:00
|
|
|
{
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
/* We have to log the effect at the proper transactional level */
|
2007-11-15 22:14:46 +01:00
|
|
|
int nest_level = GetCurrentTransactionNestLevel();
|
2007-05-27 05:50:39 +02:00
|
|
|
|
|
|
|
if (pgstat_info->trans == NULL ||
|
|
|
|
pgstat_info->trans->nest_level != nest_level)
|
|
|
|
add_tabstat_xact_level(pgstat_info, nest_level);
|
|
|
|
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
pgstat_info->trans->tuples_updated++;
|
|
|
|
|
|
|
|
/* t_tuples_hot_updated is nontransactional, so just advance it */
|
|
|
|
if (hot)
|
|
|
|
pgstat_info->t_counts.t_tuples_hot_updated++;
|
2007-05-27 05:50:39 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pgstat_count_heap_delete - count a tuple deletion
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_count_heap_delete(Relation rel)
|
|
|
|
{
|
|
|
|
PgStat_TableStatus *pgstat_info = rel->pgstat_info;
|
|
|
|
|
2010-10-12 20:44:25 +02:00
|
|
|
if (pgstat_info != NULL)
|
2007-05-27 05:50:39 +02:00
|
|
|
{
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
/* We have to log the effect at the proper transactional level */
|
2007-11-15 22:14:46 +01:00
|
|
|
int nest_level = GetCurrentTransactionNestLevel();
|
2007-05-27 05:50:39 +02:00
|
|
|
|
|
|
|
if (pgstat_info->trans == NULL ||
|
|
|
|
pgstat_info->trans->nest_level != nest_level)
|
|
|
|
add_tabstat_xact_level(pgstat_info, nest_level);
|
|
|
|
|
|
|
|
pgstat_info->trans->tuples_deleted++;
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2007-09-20 19:56:33 +02:00
|
|
|
/*
|
|
|
|
* pgstat_update_heap_dead_tuples - update dead-tuples count
|
|
|
|
*
|
|
|
|
* The semantics of this are that we are reporting the nontransactional
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
* recovery of "delta" dead tuples; so t_delta_dead_tuples decreases
|
2007-09-20 19:56:33 +02:00
|
|
|
* rather than increasing, and the change goes straight into the per-table
|
|
|
|
* counter, not into transactional state.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_update_heap_dead_tuples(Relation rel, int delta)
|
|
|
|
{
|
|
|
|
PgStat_TableStatus *pgstat_info = rel->pgstat_info;
|
|
|
|
|
2010-10-12 20:44:25 +02:00
|
|
|
if (pgstat_info != NULL)
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
pgstat_info->t_counts.t_delta_dead_tuples -= delta;
|
2007-09-20 19:56:33 +02:00
|
|
|
}
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/* ----------
|
2007-05-27 05:50:39 +02:00
|
|
|
* AtEOXact_PgStat
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2007-05-27 05:50:39 +02:00
|
|
|
* Called from access/transam/xact.c at top-level transaction commit/abort.
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
2007-05-27 05:50:39 +02:00
|
|
|
AtEOXact_PgStat(bool isCommit)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2007-05-27 05:50:39 +02:00
|
|
|
PgStat_SubXactStatus *xact_state;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2007-05-27 05:50:39 +02:00
|
|
|
* Count transaction commit or abort. (We use counters, not just bools,
|
|
|
|
* in case the reporting message isn't sent right away.)
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2007-05-27 05:50:39 +02:00
|
|
|
if (isCommit)
|
|
|
|
pgStatXactCommit++;
|
|
|
|
else
|
|
|
|
pgStatXactRollback++;
|
2004-10-28 03:38:41 +02:00
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
/*
|
|
|
|
* Transfer transactional insert/update counts into the base tabstat
|
2007-11-15 22:14:46 +01:00
|
|
|
* entries. We don't bother to free any of the transactional state, since
|
|
|
|
* it's all in TopTransactionContext and will go away anyway.
|
2007-05-27 05:50:39 +02:00
|
|
|
*/
|
|
|
|
xact_state = pgStatXactStack;
|
|
|
|
if (xact_state != NULL)
|
2003-08-12 18:21:18 +02:00
|
|
|
{
|
2007-05-27 05:50:39 +02:00
|
|
|
PgStat_TableXactStatus *trans;
|
|
|
|
|
|
|
|
Assert(xact_state->nest_level == 1);
|
|
|
|
Assert(xact_state->prev == NULL);
|
|
|
|
for (trans = xact_state->first; trans != NULL; trans = trans->next)
|
|
|
|
{
|
|
|
|
PgStat_TableStatus *tabstat;
|
|
|
|
|
|
|
|
Assert(trans->nest_level == 1);
|
|
|
|
Assert(trans->upper == NULL);
|
|
|
|
tabstat = trans->parent;
|
|
|
|
Assert(tabstat->trans == trans);
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
/* count attempted actions regardless of commit/abort */
|
|
|
|
tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
|
|
|
|
tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
|
|
|
|
tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
|
2007-05-27 05:50:39 +02:00
|
|
|
if (isCommit)
|
|
|
|
{
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
/* insert adds a live tuple, delete removes one */
|
|
|
|
tabstat->t_counts.t_delta_live_tuples +=
|
2007-05-27 19:28:36 +02:00
|
|
|
trans->tuples_inserted - trans->tuples_deleted;
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
/* update and delete each create a dead tuple */
|
|
|
|
tabstat->t_counts.t_delta_dead_tuples +=
|
|
|
|
trans->tuples_updated + trans->tuples_deleted;
|
|
|
|
/* insert, update, delete each count as one change event */
|
|
|
|
tabstat->t_counts.t_changed_tuples +=
|
|
|
|
trans->tuples_inserted + trans->tuples_updated +
|
|
|
|
trans->tuples_deleted;
|
2007-05-27 05:50:39 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* inserted tuples are dead, deleted tuples are unaffected */
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
tabstat->t_counts.t_delta_dead_tuples +=
|
|
|
|
trans->tuples_inserted + trans->tuples_updated;
|
|
|
|
/* an aborted xact generates no changed_tuple events */
|
2007-05-27 05:50:39 +02:00
|
|
|
}
|
|
|
|
tabstat->trans = NULL;
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
2007-05-27 05:50:39 +02:00
|
|
|
pgStatXactStack = NULL;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
/* Make sure any stats snapshot is thrown away */
|
|
|
|
pgstat_clear_snapshot();
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/* ----------
|
2007-05-27 05:50:39 +02:00
|
|
|
* AtEOSubXact_PgStat
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2007-05-27 05:50:39 +02:00
|
|
|
* Called from access/transam/xact.c at subtransaction commit/abort.
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
2007-05-27 05:50:39 +02:00
|
|
|
AtEOSubXact_PgStat(bool isCommit, int nestDepth)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2007-05-27 05:50:39 +02:00
|
|
|
PgStat_SubXactStatus *xact_state;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2007-05-27 05:50:39 +02:00
|
|
|
* Transfer transactional insert/update counts into the next higher
|
|
|
|
* subtransaction state.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2007-05-27 05:50:39 +02:00
|
|
|
xact_state = pgStatXactStack;
|
|
|
|
if (xact_state != NULL &&
|
|
|
|
xact_state->nest_level >= nestDepth)
|
|
|
|
{
|
|
|
|
PgStat_TableXactStatus *trans;
|
|
|
|
PgStat_TableXactStatus *next_trans;
|
|
|
|
|
|
|
|
/* delink xact_state from stack immediately to simplify reuse case */
|
|
|
|
pgStatXactStack = xact_state->prev;
|
|
|
|
|
|
|
|
for (trans = xact_state->first; trans != NULL; trans = next_trans)
|
|
|
|
{
|
|
|
|
PgStat_TableStatus *tabstat;
|
|
|
|
|
|
|
|
next_trans = trans->next;
|
|
|
|
Assert(trans->nest_level == nestDepth);
|
|
|
|
tabstat = trans->parent;
|
|
|
|
Assert(tabstat->trans == trans);
|
|
|
|
if (isCommit)
|
|
|
|
{
|
|
|
|
if (trans->upper && trans->upper->nest_level == nestDepth - 1)
|
|
|
|
{
|
|
|
|
trans->upper->tuples_inserted += trans->tuples_inserted;
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
trans->upper->tuples_updated += trans->tuples_updated;
|
2007-05-27 05:50:39 +02:00
|
|
|
trans->upper->tuples_deleted += trans->tuples_deleted;
|
|
|
|
tabstat->trans = trans->upper;
|
|
|
|
pfree(trans);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* When there isn't an immediate parent state, we can just
|
|
|
|
* reuse the record instead of going through a
|
2007-05-27 05:50:39 +02:00
|
|
|
* palloc/pfree pushup (this works since it's all in
|
2007-11-15 22:14:46 +01:00
|
|
|
* TopTransactionContext anyway). We have to re-link it
|
|
|
|
* into the parent level, though, and that might mean
|
2007-05-27 05:50:39 +02:00
|
|
|
* pushing a new entry into the pgStatXactStack.
|
|
|
|
*/
|
|
|
|
PgStat_SubXactStatus *upper_xact_state;
|
|
|
|
|
|
|
|
upper_xact_state = get_tabstat_stack_level(nestDepth - 1);
|
|
|
|
trans->next = upper_xact_state->first;
|
|
|
|
upper_xact_state->first = trans;
|
|
|
|
trans->nest_level = nestDepth - 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* On abort, update top-level tabstat counts, then forget the
|
|
|
|
* subtransaction
|
2007-05-27 05:50:39 +02:00
|
|
|
*/
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
|
|
|
|
/* count attempted actions regardless of commit/abort */
|
|
|
|
tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
|
|
|
|
tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
|
|
|
|
tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
|
|
|
|
/* inserted tuples are dead, deleted tuples are unaffected */
|
|
|
|
tabstat->t_counts.t_delta_dead_tuples +=
|
|
|
|
trans->tuples_inserted + trans->tuples_updated;
|
2007-05-27 05:50:39 +02:00
|
|
|
tabstat->trans = trans->upper;
|
|
|
|
pfree(trans);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pfree(xact_state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* AtPrepare_PgStat
|
|
|
|
* Save the transactional stats state at 2PC transaction prepare.
|
|
|
|
*
|
|
|
|
* In this phase we just generate 2PC records for all the pending
|
|
|
|
* transaction-dependent stats work.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
AtPrepare_PgStat(void)
|
|
|
|
{
|
|
|
|
PgStat_SubXactStatus *xact_state;
|
2004-10-28 03:38:41 +02:00
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
xact_state = pgStatXactStack;
|
|
|
|
if (xact_state != NULL)
|
2003-08-12 18:21:18 +02:00
|
|
|
{
|
2007-05-27 05:50:39 +02:00
|
|
|
PgStat_TableXactStatus *trans;
|
|
|
|
|
|
|
|
Assert(xact_state->nest_level == 1);
|
|
|
|
Assert(xact_state->prev == NULL);
|
|
|
|
for (trans = xact_state->first; trans != NULL; trans = trans->next)
|
|
|
|
{
|
|
|
|
PgStat_TableStatus *tabstat;
|
|
|
|
TwoPhasePgStatRecord record;
|
|
|
|
|
|
|
|
Assert(trans->nest_level == 1);
|
|
|
|
Assert(trans->upper == NULL);
|
|
|
|
tabstat = trans->parent;
|
|
|
|
Assert(tabstat->trans == trans);
|
|
|
|
|
|
|
|
record.tuples_inserted = trans->tuples_inserted;
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
record.tuples_updated = trans->tuples_updated;
|
2007-05-27 05:50:39 +02:00
|
|
|
record.tuples_deleted = trans->tuples_deleted;
|
|
|
|
record.t_id = tabstat->t_id;
|
|
|
|
record.t_shared = tabstat->t_shared;
|
|
|
|
|
|
|
|
RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0,
|
|
|
|
&record, sizeof(TwoPhasePgStatRecord));
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
/*
|
|
|
|
* PostPrepare_PgStat
|
|
|
|
* Clean up after successful PREPARE.
|
|
|
|
*
|
|
|
|
* All we need do here is unlink the transaction stats state from the
|
2007-11-15 22:14:46 +01:00
|
|
|
* nontransactional state. The nontransactional action counts will be
|
2007-05-27 05:50:39 +02:00
|
|
|
* reported to the stats collector immediately, while the effects on live
|
|
|
|
* and dead tuple counts are preserved in the 2PC state file.
|
|
|
|
*
|
|
|
|
* Note: AtEOXact_PgStat is not called during PREPARE.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
PostPrepare_PgStat(void)
|
|
|
|
{
|
|
|
|
PgStat_SubXactStatus *xact_state;
|
|
|
|
|
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* We don't bother to free any of the transactional state, since it's all
|
|
|
|
* in TopTransactionContext and will go away anyway.
|
2007-05-27 05:50:39 +02:00
|
|
|
*/
|
|
|
|
xact_state = pgStatXactStack;
|
|
|
|
if (xact_state != NULL)
|
|
|
|
{
|
|
|
|
PgStat_TableXactStatus *trans;
|
|
|
|
|
|
|
|
for (trans = xact_state->first; trans != NULL; trans = trans->next)
|
|
|
|
{
|
|
|
|
PgStat_TableStatus *tabstat;
|
|
|
|
|
|
|
|
tabstat = trans->parent;
|
|
|
|
tabstat->trans = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pgStatXactStack = NULL;
|
|
|
|
|
|
|
|
/* Make sure any stats snapshot is thrown away */
|
|
|
|
pgstat_clear_snapshot();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 2PC processing routine for COMMIT PREPARED case.
|
|
|
|
*
|
|
|
|
* Load the saved counts into our local pgstats state.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_twophase_postcommit(TransactionId xid, uint16 info,
|
|
|
|
void *recdata, uint32 len)
|
|
|
|
{
|
|
|
|
TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
|
|
|
|
PgStat_TableStatus *pgstat_info;
|
|
|
|
|
|
|
|
/* Find or create a tabstat entry for the rel */
|
|
|
|
pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
|
|
|
|
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
/* Same math as in AtEOXact_PgStat, commit case */
|
|
|
|
pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
|
|
|
|
pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
|
|
|
|
pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
|
|
|
|
pgstat_info->t_counts.t_delta_live_tuples +=
|
2007-05-27 19:28:36 +02:00
|
|
|
rec->tuples_inserted - rec->tuples_deleted;
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
pgstat_info->t_counts.t_delta_dead_tuples +=
|
|
|
|
rec->tuples_updated + rec->tuples_deleted;
|
|
|
|
pgstat_info->t_counts.t_changed_tuples +=
|
|
|
|
rec->tuples_inserted + rec->tuples_updated +
|
|
|
|
rec->tuples_deleted;
|
2007-05-27 05:50:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 2PC processing routine for ROLLBACK PREPARED case.
|
|
|
|
*
|
|
|
|
* Load the saved counts into our local pgstats state, but treat them
|
|
|
|
* as aborted.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_twophase_postabort(TransactionId xid, uint16 info,
|
|
|
|
void *recdata, uint32 len)
|
|
|
|
{
|
|
|
|
TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
|
|
|
|
PgStat_TableStatus *pgstat_info;
|
|
|
|
|
|
|
|
/* Find or create a tabstat entry for the rel */
|
|
|
|
pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
|
|
|
|
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
/* Same math as in AtEOXact_PgStat, abort case */
|
|
|
|
pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
|
|
|
|
pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
|
|
|
|
pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
|
|
|
|
pgstat_info->t_counts.t_delta_dead_tuples +=
|
|
|
|
rec->tuples_inserted + rec->tuples_updated;
|
2007-05-27 05:50:39 +02:00
|
|
|
}
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_fetch_stat_dbentry() -
|
|
|
|
*
|
|
|
|
* Support function for the SQL-callable pgstat* functions. Returns
|
|
|
|
* the collected statistics for one database or NULL. NULL doesn't mean
|
|
|
|
* that the database doesn't exist, it is just not yet known by the
|
|
|
|
* collector, so the caller is better off to report ZERO instead.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
PgStat_StatDBEntry *
|
|
|
|
pgstat_fetch_stat_dbentry(Oid dbid)
|
|
|
|
{
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If not done for this transaction, read the statistics collector stats
|
|
|
|
* file into some hash tables.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2004-07-01 02:52:04 +02:00
|
|
|
backend_read_statsfile();
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2005-05-11 03:41:41 +02:00
|
|
|
* Lookup the requested database; return NULL if not found
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2005-05-11 03:41:41 +02:00
|
|
|
return (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
|
|
|
|
(void *) &dbid,
|
|
|
|
HASH_FIND, NULL);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_fetch_stat_tabentry() -
|
|
|
|
*
|
|
|
|
* Support function for the SQL-callable pgstat* functions. Returns
|
|
|
|
* the collected statistics for one table or NULL. NULL doesn't mean
|
|
|
|
* that the table doesn't exist, it is just not yet known by the
|
|
|
|
* collector, so the caller is better off to report ZERO instead.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
PgStat_StatTabEntry *
|
|
|
|
pgstat_fetch_stat_tabentry(Oid relid)
|
|
|
|
{
|
2005-07-29 21:30:09 +02:00
|
|
|
Oid dbid;
|
2001-10-25 07:50:21 +02:00
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
PgStat_StatTabEntry *tabentry;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If not done for this transaction, read the statistics collector stats
|
|
|
|
* file into some hash tables.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2004-07-01 02:52:04 +02:00
|
|
|
backend_read_statsfile();
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2005-07-29 21:30:09 +02:00
|
|
|
* Lookup our database, then look in its table hash table.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2005-07-29 21:30:09 +02:00
|
|
|
dbid = MyDatabaseId;
|
2001-10-01 07:36:17 +02:00
|
|
|
dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
|
2005-07-29 21:30:09 +02:00
|
|
|
(void *) &dbid,
|
2001-10-05 19:28:13 +02:00
|
|
|
HASH_FIND, NULL);
|
2005-07-29 21:30:09 +02:00
|
|
|
if (dbentry != NULL && dbentry->tables != NULL)
|
|
|
|
{
|
|
|
|
tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
|
|
|
|
(void *) &relid,
|
|
|
|
HASH_FIND, NULL);
|
|
|
|
if (tabentry)
|
|
|
|
return tabentry;
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2005-07-29 21:30:09 +02:00
|
|
|
* If we didn't find it, maybe it's a shared table.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2005-07-29 21:30:09 +02:00
|
|
|
dbid = InvalidOid;
|
|
|
|
dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
|
|
|
|
(void *) &dbid,
|
|
|
|
HASH_FIND, NULL);
|
|
|
|
if (dbentry != NULL && dbentry->tables != NULL)
|
|
|
|
{
|
|
|
|
tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
|
|
|
|
(void *) &relid,
|
|
|
|
HASH_FIND, NULL);
|
|
|
|
if (tabentry)
|
|
|
|
return tabentry;
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2005-07-29 21:30:09 +02:00
|
|
|
return NULL;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-05-15 02:17:41 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_fetch_stat_funcentry() -
|
|
|
|
*
|
|
|
|
* Support function for the SQL-callable pgstat* functions. Returns
|
|
|
|
* the collected statistics for one function or NULL.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
PgStat_StatFuncEntry *
|
|
|
|
pgstat_fetch_stat_funcentry(Oid func_id)
|
|
|
|
{
|
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
PgStat_StatFuncEntry *funcentry = NULL;
|
|
|
|
|
|
|
|
/* load the stats file if needed */
|
|
|
|
backend_read_statsfile();
|
|
|
|
|
|
|
|
/* Lookup our database, then find the requested function. */
|
|
|
|
dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId);
|
|
|
|
if (dbentry != NULL && dbentry->functions != NULL)
|
|
|
|
{
|
|
|
|
funcentry = (PgStat_StatFuncEntry *) hash_search(dbentry->functions,
|
|
|
|
(void *) &func_id,
|
|
|
|
HASH_FIND, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return funcentry;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_fetch_stat_beentry() -
|
|
|
|
*
|
|
|
|
* Support function for the SQL-callable pgstat* functions. Returns
|
2006-06-19 03:51:22 +02:00
|
|
|
* our local copy of the current-activity entry for one backend.
|
|
|
|
*
|
|
|
|
* NB: caller is responsible for a check if the user is permitted to see
|
|
|
|
* this info (especially the querystring).
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2006-06-19 03:51:22 +02:00
|
|
|
PgBackendStatus *
|
2001-06-22 21:18:36 +02:00
|
|
|
pgstat_fetch_stat_beentry(int beid)
|
|
|
|
{
|
2006-06-19 03:51:22 +02:00
|
|
|
pgstat_read_current_status();
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
if (beid < 1 || beid > localNumBackends)
|
2001-06-22 21:18:36 +02:00
|
|
|
return NULL;
|
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
return &localBackendStatusTable[beid - 1];
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_fetch_stat_numbackends() -
|
|
|
|
*
|
|
|
|
* Support function for the SQL-callable pgstat* functions. Returns
|
|
|
|
* the maximum current backend id.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
pgstat_fetch_stat_numbackends(void)
|
|
|
|
{
|
2006-06-19 03:51:22 +02:00
|
|
|
pgstat_read_current_status();
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
return localNumBackends;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2007-03-30 20:34:56 +02:00
|
|
|
/*
|
|
|
|
* ---------
|
|
|
|
* pgstat_fetch_global() -
|
|
|
|
*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Support function for the SQL-callable pgstat* functions. Returns
|
|
|
|
* a pointer to the global statistics struct.
|
2007-03-30 20:34:56 +02:00
|
|
|
* ---------
|
|
|
|
*/
|
|
|
|
PgStat_GlobalStats *
|
|
|
|
pgstat_fetch_global(void)
|
|
|
|
{
|
|
|
|
backend_read_statsfile();
|
|
|
|
|
|
|
|
return &globalStats;
|
|
|
|
}
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/* ------------------------------------------------------------
|
2006-06-19 03:51:22 +02:00
|
|
|
* Functions for management of the shared-memory PgBackendStatus array
|
2001-06-22 21:18:36 +02:00
|
|
|
* ------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
static PgBackendStatus *BackendStatusArray = NULL;
|
|
|
|
static PgBackendStatus *MyBEEntry = NULL;
|
2011-02-17 22:03:28 +01:00
|
|
|
static char *BackendClientHostnameBuffer = NULL;
|
2009-11-29 00:38:08 +01:00
|
|
|
static char *BackendAppnameBuffer = NULL;
|
2009-06-11 16:49:15 +02:00
|
|
|
static char *BackendActivityBuffer = NULL;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Report shared-memory space needed by CreateSharedBackendStatus.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2006-06-19 03:51:22 +02:00
|
|
|
Size
|
|
|
|
BackendStatusShmemSize(void)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2006-06-19 03:51:22 +02:00
|
|
|
Size size;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2009-11-29 00:38:08 +01:00
|
|
|
size = mul_size(sizeof(PgBackendStatus), MaxBackends);
|
|
|
|
size = add_size(size,
|
|
|
|
mul_size(NAMEDATALEN, MaxBackends));
|
|
|
|
size = add_size(size,
|
2008-06-30 12:58:47 +02:00
|
|
|
mul_size(pgstat_track_activity_query_size, MaxBackends));
|
2011-02-17 22:03:28 +01:00
|
|
|
size = add_size(size,
|
|
|
|
mul_size(NAMEDATALEN, MaxBackends));
|
2006-06-19 03:51:22 +02:00
|
|
|
return size;
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
/*
|
2011-02-17 22:03:28 +01:00
|
|
|
* Initialize the shared status array and several string buffers
|
2009-11-29 00:38:08 +01:00
|
|
|
* during postmaster startup.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2006-06-19 03:51:22 +02:00
|
|
|
void
|
|
|
|
CreateSharedBackendStatus(void)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2008-06-30 12:58:47 +02:00
|
|
|
Size size;
|
2006-06-19 03:51:22 +02:00
|
|
|
bool found;
|
2008-06-30 12:58:47 +02:00
|
|
|
int i;
|
|
|
|
char *buffer;
|
2006-06-19 03:51:22 +02:00
|
|
|
|
|
|
|
/* Create or attach to the shared array */
|
2008-06-30 12:58:47 +02:00
|
|
|
size = mul_size(sizeof(PgBackendStatus), MaxBackends);
|
2006-06-19 03:51:22 +02:00
|
|
|
BackendStatusArray = (PgBackendStatus *)
|
|
|
|
ShmemInitStruct("Backend Status Array", size, &found);
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We're the first - initialize.
|
|
|
|
*/
|
|
|
|
MemSet(BackendStatusArray, 0, size);
|
|
|
|
}
|
2008-06-30 12:58:47 +02:00
|
|
|
|
2009-11-29 00:38:08 +01:00
|
|
|
/* Create or attach to the shared appname buffer */
|
|
|
|
size = mul_size(NAMEDATALEN, MaxBackends);
|
|
|
|
BackendAppnameBuffer = (char *)
|
|
|
|
ShmemInitStruct("Backend Application Name Buffer", size, &found);
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
{
|
|
|
|
MemSet(BackendAppnameBuffer, 0, size);
|
|
|
|
|
|
|
|
/* Initialize st_appname pointers. */
|
|
|
|
buffer = BackendAppnameBuffer;
|
|
|
|
for (i = 0; i < MaxBackends; i++)
|
|
|
|
{
|
|
|
|
BackendStatusArray[i].st_appname = buffer;
|
|
|
|
buffer += NAMEDATALEN;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-02-17 22:03:28 +01:00
|
|
|
/* Create or attach to the shared client hostname buffer */
|
|
|
|
size = mul_size(NAMEDATALEN, MaxBackends);
|
|
|
|
BackendClientHostnameBuffer = (char *)
|
|
|
|
ShmemInitStruct("Backend Client Host Name Buffer", size, &found);
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
{
|
|
|
|
MemSet(BackendClientHostnameBuffer, 0, size);
|
|
|
|
|
|
|
|
/* Initialize st_clienthostname pointers. */
|
|
|
|
buffer = BackendClientHostnameBuffer;
|
|
|
|
for (i = 0; i < MaxBackends; i++)
|
|
|
|
{
|
|
|
|
BackendStatusArray[i].st_clienthostname = buffer;
|
|
|
|
buffer += NAMEDATALEN;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-06-30 12:58:47 +02:00
|
|
|
/* Create or attach to the shared activity buffer */
|
|
|
|
size = mul_size(pgstat_track_activity_query_size, MaxBackends);
|
2009-06-11 16:49:15 +02:00
|
|
|
BackendActivityBuffer = (char *)
|
2008-06-30 12:58:47 +02:00
|
|
|
ShmemInitStruct("Backend Activity Buffer", size, &found);
|
|
|
|
|
|
|
|
if (!found)
|
|
|
|
{
|
|
|
|
MemSet(BackendActivityBuffer, 0, size);
|
|
|
|
|
|
|
|
/* Initialize st_activity pointers. */
|
|
|
|
buffer = BackendActivityBuffer;
|
2009-06-11 16:49:15 +02:00
|
|
|
for (i = 0; i < MaxBackends; i++)
|
|
|
|
{
|
2008-06-30 12:58:47 +02:00
|
|
|
BackendStatusArray[i].st_activity = buffer;
|
|
|
|
buffer += pgstat_track_activity_query_size;
|
|
|
|
}
|
|
|
|
}
|
2006-06-19 03:51:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-05-27 07:37:50 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_initialize() -
|
|
|
|
*
|
|
|
|
* Initialize pgstats state, and set up our on-proc-exit hook.
|
|
|
|
* Called from InitPostgres. MyBackendId must be set,
|
|
|
|
* but we must not have started any transaction yet (since the
|
|
|
|
* exit hook must run after the last transaction exit).
|
2009-08-12 22:53:31 +02:00
|
|
|
* NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
|
2007-05-27 07:37:50 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_initialize(void)
|
|
|
|
{
|
|
|
|
/* Initialize MyBEEntry */
|
|
|
|
Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends);
|
|
|
|
MyBEEntry = &BackendStatusArray[MyBackendId - 1];
|
|
|
|
|
|
|
|
/* Set up a process-exit hook to clean up */
|
|
|
|
on_shmem_exit(pgstat_beshutdown_hook, 0);
|
|
|
|
}
|
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_bestart() -
|
|
|
|
*
|
2007-05-27 07:37:50 +02:00
|
|
|
* Initialize this backend's entry in the PgBackendStatus array.
|
2009-11-29 00:38:08 +01:00
|
|
|
* Called from InitPostgres.
|
|
|
|
* MyDatabaseId, session userid, and application_name must be set
|
2007-05-27 07:37:50 +02:00
|
|
|
* (hence, this cannot be combined with pgstat_initialize).
|
2006-06-19 03:51:22 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_bestart(void)
|
|
|
|
{
|
|
|
|
TimestampTz proc_start_timestamp;
|
|
|
|
Oid userid;
|
|
|
|
SockAddr clientaddr;
|
2007-05-27 07:37:50 +02:00
|
|
|
volatile PgBackendStatus *beentry;
|
2006-06-19 03:51:22 +02:00
|
|
|
|
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* To minimize the time spent modifying the PgBackendStatus entry, fetch
|
|
|
|
* all the needed data first.
|
2006-06-21 00:52:00 +02:00
|
|
|
*
|
|
|
|
* If we have a MyProcPort, use its session start time (for consistency,
|
|
|
|
* and to save a kernel call).
|
2006-06-19 03:51:22 +02:00
|
|
|
*/
|
2006-06-21 00:52:00 +02:00
|
|
|
if (MyProcPort)
|
|
|
|
proc_start_timestamp = MyProcPort->SessionStartTime;
|
|
|
|
else
|
|
|
|
proc_start_timestamp = GetCurrentTimestamp();
|
2006-06-19 03:51:22 +02:00
|
|
|
userid = GetSessionUserId();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We may not have a MyProcPort (eg, if this is the autovacuum process).
|
|
|
|
* If so, use all-zeroes client address, which is dealt with specially in
|
|
|
|
* pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port.
|
|
|
|
*/
|
|
|
|
if (MyProcPort)
|
|
|
|
memcpy(&clientaddr, &MyProcPort->raddr, sizeof(clientaddr));
|
|
|
|
else
|
|
|
|
MemSet(&clientaddr, 0, sizeof(clientaddr));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize my status entry, following the protocol of bumping
|
2006-10-04 02:30:14 +02:00
|
|
|
* st_changecount before and after; and make sure it's even afterwards. We
|
|
|
|
* use a volatile pointer here to ensure the compiler doesn't try to get
|
|
|
|
* cute.
|
2006-06-19 03:51:22 +02:00
|
|
|
*/
|
|
|
|
beentry = MyBEEntry;
|
2006-10-04 02:30:14 +02:00
|
|
|
do
|
|
|
|
{
|
2006-06-19 03:51:22 +02:00
|
|
|
beentry->st_changecount++;
|
|
|
|
} while ((beentry->st_changecount & 1) == 0);
|
|
|
|
|
|
|
|
beentry->st_procpid = MyProcPid;
|
|
|
|
beentry->st_proc_start_timestamp = proc_start_timestamp;
|
|
|
|
beentry->st_activity_start_timestamp = 0;
|
2007-09-11 05:28:05 +02:00
|
|
|
beentry->st_xact_start_timestamp = 0;
|
2006-06-19 03:51:22 +02:00
|
|
|
beentry->st_databaseid = MyDatabaseId;
|
|
|
|
beentry->st_userid = userid;
|
|
|
|
beentry->st_clientaddr = clientaddr;
|
2011-02-17 22:03:28 +01:00
|
|
|
beentry->st_clienthostname[0] = '\0';
|
2006-08-19 03:36:34 +02:00
|
|
|
beentry->st_waiting = false;
|
2009-11-29 00:38:08 +01:00
|
|
|
beentry->st_appname[0] = '\0';
|
2006-06-19 03:51:22 +02:00
|
|
|
beentry->st_activity[0] = '\0';
|
2009-11-29 00:38:08 +01:00
|
|
|
/* Also make sure the last byte in each string area is always 0 */
|
2011-02-17 22:03:28 +01:00
|
|
|
beentry->st_clienthostname[NAMEDATALEN - 1] = '\0';
|
2009-11-29 00:38:08 +01:00
|
|
|
beentry->st_appname[NAMEDATALEN - 1] = '\0';
|
2008-06-30 12:58:47 +02:00
|
|
|
beentry->st_activity[pgstat_track_activity_query_size - 1] = '\0';
|
2006-06-19 03:51:22 +02:00
|
|
|
|
|
|
|
beentry->st_changecount++;
|
|
|
|
Assert((beentry->st_changecount & 1) == 0);
|
2009-11-29 00:38:08 +01:00
|
|
|
|
2011-02-17 22:03:28 +01:00
|
|
|
if (MyProcPort && MyProcPort->remote_hostname)
|
|
|
|
strlcpy(beentry->st_clienthostname, MyProcPort->remote_hostname, NAMEDATALEN);
|
|
|
|
|
2009-11-29 00:38:08 +01:00
|
|
|
/* Update app name to current GUC setting */
|
|
|
|
if (application_name)
|
|
|
|
pgstat_report_appname(application_name);
|
2006-06-19 03:51:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Shut down a single backend's statistics reporting at process exit.
|
|
|
|
*
|
|
|
|
* Flush any remaining statistics counts out to the collector.
|
|
|
|
* Without this, operations triggered during backend exit (such as
|
|
|
|
* temp table deletions) won't be counted.
|
|
|
|
*
|
|
|
|
* Lastly, clear out our entry in the PgBackendStatus array.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_beshutdown_hook(int code, Datum arg)
|
|
|
|
{
|
2006-08-28 21:38:09 +02:00
|
|
|
volatile PgBackendStatus *beentry = MyBEEntry;
|
2006-06-19 03:51:22 +02:00
|
|
|
|
2009-08-12 22:53:31 +02:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* If we got as far as discovering our own database ID, we can report what
|
|
|
|
* we did to the collector. Otherwise, we'd be sending an invalid
|
2009-08-12 22:53:31 +02:00
|
|
|
* database ID, so forget it. (This means that accesses to pg_database
|
|
|
|
* during failed backend starts might never get counted.)
|
|
|
|
*/
|
|
|
|
if (OidIsValid(MyDatabaseId))
|
|
|
|
pgstat_report_stat(true);
|
2006-06-19 03:51:22 +02:00
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Clear my status entry, following the protocol of bumping st_changecount
|
|
|
|
* before and after. We use a volatile pointer here to ensure the
|
|
|
|
* compiler doesn't try to get cute.
|
2006-06-19 03:51:22 +02:00
|
|
|
*/
|
|
|
|
beentry->st_changecount++;
|
|
|
|
|
|
|
|
beentry->st_procpid = 0; /* mark invalid */
|
|
|
|
|
|
|
|
beentry->st_changecount++;
|
|
|
|
Assert((beentry->st_changecount & 1) == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_report_activity() -
|
|
|
|
*
|
|
|
|
* Called from tcop/postgres.c to report what the backend is actually doing
|
|
|
|
* (usually "<IDLE>" or the start of the query to be executed).
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_report_activity(const char *cmd_str)
|
|
|
|
{
|
2006-08-28 21:38:09 +02:00
|
|
|
volatile PgBackendStatus *beentry = MyBEEntry;
|
2006-06-19 03:51:22 +02:00
|
|
|
TimestampTz start_timestamp;
|
|
|
|
int len;
|
|
|
|
|
2008-08-01 15:16:09 +02:00
|
|
|
TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str);
|
|
|
|
|
2007-09-24 05:12:23 +02:00
|
|
|
if (!pgstat_track_activities || !beentry)
|
2006-06-19 03:51:22 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* To minimize the time spent modifying the entry, fetch all the needed
|
|
|
|
* data first.
|
2006-06-19 03:51:22 +02:00
|
|
|
*/
|
2006-06-21 00:52:00 +02:00
|
|
|
start_timestamp = GetCurrentStatementStartTimestamp();
|
2006-06-19 03:51:22 +02:00
|
|
|
|
|
|
|
len = strlen(cmd_str);
|
2008-06-30 12:58:47 +02:00
|
|
|
len = pg_mbcliplen(cmd_str, len, pgstat_track_activity_query_size - 1);
|
2006-06-19 03:51:22 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Update my status entry, following the protocol of bumping
|
2006-10-04 02:30:14 +02:00
|
|
|
* st_changecount before and after. We use a volatile pointer here to
|
|
|
|
* ensure the compiler doesn't try to get cute.
|
2006-06-19 03:51:22 +02:00
|
|
|
*/
|
|
|
|
beentry->st_changecount++;
|
|
|
|
|
|
|
|
beentry->st_activity_start_timestamp = start_timestamp;
|
|
|
|
memcpy((char *) beentry->st_activity, cmd_str, len);
|
|
|
|
beentry->st_activity[len] = '\0';
|
|
|
|
|
|
|
|
beentry->st_changecount++;
|
|
|
|
Assert((beentry->st_changecount & 1) == 0);
|
|
|
|
}
|
|
|
|
|
2009-11-29 00:38:08 +01:00
|
|
|
/* ----------
|
|
|
|
* pgstat_report_appname() -
|
|
|
|
*
|
|
|
|
* Called to update our application name.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_report_appname(const char *appname)
|
|
|
|
{
|
|
|
|
volatile PgBackendStatus *beentry = MyBEEntry;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
if (!beentry)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* This should be unnecessary if GUC did its job, but be safe */
|
|
|
|
len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update my status entry, following the protocol of bumping
|
|
|
|
* st_changecount before and after. We use a volatile pointer here to
|
|
|
|
* ensure the compiler doesn't try to get cute.
|
|
|
|
*/
|
|
|
|
beentry->st_changecount++;
|
|
|
|
|
|
|
|
memcpy((char *) beentry->st_appname, appname, len);
|
|
|
|
beentry->st_appname[len] = '\0';
|
|
|
|
|
|
|
|
beentry->st_changecount++;
|
|
|
|
Assert((beentry->st_changecount & 1) == 0);
|
|
|
|
}
|
|
|
|
|
2006-12-06 19:06:48 +01:00
|
|
|
/*
|
2007-09-11 05:28:05 +02:00
|
|
|
* Report current transaction start timestamp as the specified value.
|
|
|
|
* Zero means there is no active transaction.
|
2006-12-06 19:06:48 +01:00
|
|
|
*/
|
|
|
|
void
|
2007-09-11 05:28:05 +02:00
|
|
|
pgstat_report_xact_timestamp(TimestampTz tstamp)
|
2006-12-06 19:06:48 +01:00
|
|
|
{
|
|
|
|
volatile PgBackendStatus *beentry = MyBEEntry;
|
|
|
|
|
2007-09-24 05:12:23 +02:00
|
|
|
if (!pgstat_track_activities || !beentry)
|
2006-12-06 19:06:48 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update my status entry, following the protocol of bumping
|
2007-11-15 22:14:46 +01:00
|
|
|
* st_changecount before and after. We use a volatile pointer here to
|
|
|
|
* ensure the compiler doesn't try to get cute.
|
2006-12-06 19:06:48 +01:00
|
|
|
*/
|
|
|
|
beentry->st_changecount++;
|
2007-09-11 05:28:05 +02:00
|
|
|
beentry->st_xact_start_timestamp = tstamp;
|
2006-12-06 19:06:48 +01:00
|
|
|
beentry->st_changecount++;
|
|
|
|
Assert((beentry->st_changecount & 1) == 0);
|
|
|
|
}
|
2006-06-19 03:51:22 +02:00
|
|
|
|
2006-08-19 03:36:34 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_report_waiting() -
|
|
|
|
*
|
|
|
|
* Called from lock manager to report beginning or end of a lock wait.
|
2006-08-28 21:38:09 +02:00
|
|
|
*
|
|
|
|
* NB: this *must* be able to survive being called before MyBEEntry has been
|
|
|
|
* initialized.
|
2006-08-19 03:36:34 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_report_waiting(bool waiting)
|
|
|
|
{
|
2006-08-28 21:38:09 +02:00
|
|
|
volatile PgBackendStatus *beentry = MyBEEntry;
|
2006-08-19 03:36:34 +02:00
|
|
|
|
2007-09-24 05:12:23 +02:00
|
|
|
if (!pgstat_track_activities || !beentry)
|
2006-08-19 03:36:34 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Since this is a single-byte field in a struct that only this process
|
|
|
|
* may modify, there seems no need to bother with the st_changecount
|
|
|
|
* protocol. The update must appear atomic in any case.
|
|
|
|
*/
|
|
|
|
beentry->st_waiting = waiting;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_read_current_status() -
|
|
|
|
*
|
|
|
|
* Copy the current contents of the PgBackendStatus array to local memory,
|
|
|
|
* if not already done in this transaction.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_read_current_status(void)
|
|
|
|
{
|
|
|
|
volatile PgBackendStatus *beentry;
|
2007-02-08 00:11:30 +01:00
|
|
|
PgBackendStatus *localtable;
|
2006-06-19 03:51:22 +02:00
|
|
|
PgBackendStatus *localentry;
|
2009-11-29 00:38:08 +01:00
|
|
|
char *localappname,
|
|
|
|
*localactivity;
|
2006-06-19 03:51:22 +02:00
|
|
|
int i;
|
|
|
|
|
|
|
|
Assert(!pgStatRunningInCollector);
|
2007-02-08 00:11:30 +01:00
|
|
|
if (localBackendStatusTable)
|
2006-06-19 03:51:22 +02:00
|
|
|
return; /* already done */
|
|
|
|
|
2007-02-08 00:11:30 +01:00
|
|
|
pgstat_setup_memcxt();
|
|
|
|
|
|
|
|
localtable = (PgBackendStatus *)
|
|
|
|
MemoryContextAlloc(pgStatLocalContext,
|
2006-06-19 03:51:22 +02:00
|
|
|
sizeof(PgBackendStatus) * MaxBackends);
|
2009-11-29 00:38:08 +01:00
|
|
|
localappname = (char *)
|
|
|
|
MemoryContextAlloc(pgStatLocalContext,
|
|
|
|
NAMEDATALEN * MaxBackends);
|
2008-06-30 12:58:47 +02:00
|
|
|
localactivity = (char *)
|
|
|
|
MemoryContextAlloc(pgStatLocalContext,
|
|
|
|
pgstat_track_activity_query_size * MaxBackends);
|
2006-06-19 03:51:22 +02:00
|
|
|
localNumBackends = 0;
|
|
|
|
|
|
|
|
beentry = BackendStatusArray;
|
2007-02-08 00:11:30 +01:00
|
|
|
localentry = localtable;
|
2006-06-19 03:51:22 +02:00
|
|
|
for (i = 1; i <= MaxBackends; i++)
|
|
|
|
{
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Follow the protocol of retrying if st_changecount changes while we
|
|
|
|
* copy the entry, or if it's odd. (The check for odd is needed to
|
|
|
|
* cover the case where we are able to completely copy the entry while
|
|
|
|
* the source backend is between increment steps.) We use a volatile
|
|
|
|
* pointer here to ensure the compiler doesn't try to get cute.
|
2006-06-19 03:51:22 +02:00
|
|
|
*/
|
|
|
|
for (;;)
|
|
|
|
{
|
2006-10-04 02:30:14 +02:00
|
|
|
int save_changecount = beentry->st_changecount;
|
2006-06-19 03:51:22 +02:00
|
|
|
|
2008-06-30 12:58:47 +02:00
|
|
|
localentry->st_procpid = beentry->st_procpid;
|
|
|
|
if (localentry->st_procpid > 0)
|
|
|
|
{
|
|
|
|
memcpy(localentry, (char *) beentry, sizeof(PgBackendStatus));
|
2009-06-11 16:49:15 +02:00
|
|
|
|
2008-06-30 12:58:47 +02:00
|
|
|
/*
|
|
|
|
* strcpy is safe even if the string is modified concurrently,
|
|
|
|
* because there's always a \0 at the end of the buffer.
|
|
|
|
*/
|
2009-11-29 00:38:08 +01:00
|
|
|
strcpy(localappname, (char *) beentry->st_appname);
|
|
|
|
localentry->st_appname = localappname;
|
2008-06-30 12:58:47 +02:00
|
|
|
strcpy(localactivity, (char *) beentry->st_activity);
|
|
|
|
localentry->st_activity = localactivity;
|
|
|
|
}
|
2006-06-19 03:51:22 +02:00
|
|
|
|
|
|
|
if (save_changecount == beentry->st_changecount &&
|
|
|
|
(save_changecount & 1) == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Make sure we can break out of loop if stuck... */
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
}
|
|
|
|
|
|
|
|
beentry++;
|
|
|
|
/* Only valid entries get included into the local array */
|
|
|
|
if (localentry->st_procpid > 0)
|
|
|
|
{
|
|
|
|
localentry++;
|
2009-11-29 00:38:08 +01:00
|
|
|
localappname += NAMEDATALEN;
|
2008-06-30 12:58:47 +02:00
|
|
|
localactivity += pgstat_track_activity_query_size;
|
2006-06-19 03:51:22 +02:00
|
|
|
localNumBackends++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-02-08 00:11:30 +01:00
|
|
|
/* Set the pointer only after completion of a valid table */
|
|
|
|
localBackendStatusTable = localtable;
|
2006-06-19 03:51:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-03-21 22:08:31 +01:00
|
|
|
/* ----------
|
|
|
|
* pgstat_get_backend_current_activity() -
|
|
|
|
*
|
|
|
|
* Return a string representing the current activity of the backend with
|
2009-06-11 16:49:15 +02:00
|
|
|
* the specified PID. This looks directly at the BackendStatusArray,
|
2008-03-21 22:08:31 +01:00
|
|
|
* and so will provide current information regardless of the age of our
|
|
|
|
* transaction's snapshot of the status array.
|
|
|
|
*
|
|
|
|
* It is the caller's responsibility to invoke this only for backends whose
|
2009-06-11 16:49:15 +02:00
|
|
|
* state is expected to remain stable while the result is in use. The
|
2008-03-21 22:08:31 +01:00
|
|
|
* only current use is in deadlock reporting, where we can expect that
|
|
|
|
* the target backend is blocked on a lock. (There are corner cases
|
|
|
|
* where the target's wait could get aborted while we are looking at it,
|
|
|
|
* but the very worst consequence is to return a pointer to a string
|
|
|
|
* that's been changed, so we won't worry too much.)
|
|
|
|
*
|
|
|
|
* Note: return strings for special cases match pg_stat_get_backend_activity.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
const char *
|
2008-03-24 19:22:36 +01:00
|
|
|
pgstat_get_backend_current_activity(int pid, bool checkUser)
|
2008-03-21 22:08:31 +01:00
|
|
|
{
|
|
|
|
PgBackendStatus *beentry;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
beentry = BackendStatusArray;
|
|
|
|
for (i = 1; i <= MaxBackends; i++)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Although we expect the target backend's entry to be stable, that
|
|
|
|
* doesn't imply that anyone else's is. To avoid identifying the
|
|
|
|
* wrong backend, while we check for a match to the desired PID we
|
|
|
|
* must follow the protocol of retrying if st_changecount changes
|
|
|
|
* while we examine the entry, or if it's odd. (This might be
|
|
|
|
* unnecessary, since fetching or storing an int is almost certainly
|
2009-06-11 16:49:15 +02:00
|
|
|
* atomic, but let's play it safe.) We use a volatile pointer here to
|
|
|
|
* ensure the compiler doesn't try to get cute.
|
2008-03-21 22:08:31 +01:00
|
|
|
*/
|
|
|
|
volatile PgBackendStatus *vbeentry = beentry;
|
2009-06-11 16:49:15 +02:00
|
|
|
bool found;
|
2008-03-21 22:08:31 +01:00
|
|
|
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
int save_changecount = vbeentry->st_changecount;
|
|
|
|
|
|
|
|
found = (vbeentry->st_procpid == pid);
|
|
|
|
|
|
|
|
if (save_changecount == vbeentry->st_changecount &&
|
|
|
|
(save_changecount & 1) == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Make sure we can break out of loop if stuck... */
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (found)
|
|
|
|
{
|
|
|
|
/* Now it is safe to use the non-volatile pointer */
|
2008-03-24 19:22:36 +01:00
|
|
|
if (checkUser && !superuser() && beentry->st_userid != GetUserId())
|
2008-03-21 22:08:31 +01:00
|
|
|
return "<insufficient privilege>";
|
|
|
|
else if (*(beentry->st_activity) == '\0')
|
|
|
|
return "<command string not enabled>";
|
|
|
|
else
|
|
|
|
return beentry->st_activity;
|
|
|
|
}
|
|
|
|
|
|
|
|
beentry++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we get here, caller is in error ... */
|
|
|
|
return "<backend information not available>";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
/* ------------------------------------------------------------
|
|
|
|
* Local support functions follow
|
|
|
|
* ------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_setheader() -
|
|
|
|
*
|
|
|
|
* Set common header fields in a statistics message
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype)
|
|
|
|
{
|
|
|
|
hdr->m_type = mtype;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_send() -
|
|
|
|
*
|
|
|
|
* Send out one statistics message to the collector
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_send(void *msg, int len)
|
|
|
|
{
|
2006-07-16 20:17:14 +02:00
|
|
|
int rc;
|
|
|
|
|
2010-01-31 18:39:34 +01:00
|
|
|
if (pgStatSock == PGINVALID_SOCKET)
|
2006-06-19 03:51:22 +02:00
|
|
|
return;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2001-10-25 07:50:21 +02:00
|
|
|
((PgStat_MsgHdr *) msg)->m_size = len;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2006-07-16 20:17:14 +02:00
|
|
|
/* We'll retry after EINTR, but ignore all other failures */
|
|
|
|
do
|
|
|
|
{
|
|
|
|
rc = send(pgStatSock, msg, len, 0);
|
|
|
|
} while (rc < 0 && errno == EINTR);
|
|
|
|
|
2005-08-30 04:47:37 +02:00
|
|
|
#ifdef USE_ASSERT_CHECKING
|
2006-07-16 20:17:14 +02:00
|
|
|
/* In debug builds, log send failures ... */
|
|
|
|
if (rc < 0)
|
2005-08-30 04:47:37 +02:00
|
|
|
elog(LOG, "could not send to statistics collector: %m");
|
|
|
|
#endif
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2007-03-30 20:34:56 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_send_bgwriter() -
|
|
|
|
*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Send bgwriter statistics to the collector
|
2007-03-30 20:34:56 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_send_bgwriter(void)
|
|
|
|
{
|
2007-05-27 05:50:39 +02:00
|
|
|
/* We assume this initializes to zeroes */
|
|
|
|
static const PgStat_MsgBgWriter all_zeroes;
|
|
|
|
|
2007-03-30 20:34:56 +02:00
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* This function can be called even if nothing at all has happened. In
|
|
|
|
* this case, avoid sending a completely empty message to the stats
|
|
|
|
* collector.
|
2007-03-30 20:34:56 +02:00
|
|
|
*/
|
2007-05-27 05:50:39 +02:00
|
|
|
if (memcmp(&BgWriterStats, &all_zeroes, sizeof(PgStat_MsgBgWriter)) == 0)
|
2007-03-30 20:34:56 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prepare and send the message
|
|
|
|
*/
|
|
|
|
pgstat_setheader(&BgWriterStats.m_hdr, PGSTAT_MTYPE_BGWRITER);
|
|
|
|
pgstat_send(&BgWriterStats, sizeof(BgWriterStats));
|
|
|
|
|
|
|
|
/*
|
2007-05-27 05:50:39 +02:00
|
|
|
* Clear out the statistics buffer, so it can be re-used.
|
2007-03-30 20:34:56 +02:00
|
|
|
*/
|
2007-05-27 05:50:39 +02:00
|
|
|
MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
|
2007-03-30 20:34:56 +02:00
|
|
|
}
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2004-05-28 07:13:32 +02:00
|
|
|
/* ----------
|
|
|
|
* PgstatCollectorMain() -
|
|
|
|
*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Start up the statistics collector process. This is the body of the
|
2006-06-29 22:00:08 +02:00
|
|
|
* postmaster child process.
|
2004-05-28 07:13:32 +02:00
|
|
|
*
|
|
|
|
* The argc/argv parameters are valid only in EXEC_BACKEND case.
|
|
|
|
* ----------
|
|
|
|
*/
|
2003-12-25 04:52:51 +01:00
|
|
|
NON_EXEC_STATIC void
|
2004-05-28 07:13:32 +02:00
|
|
|
PgstatCollectorMain(int argc, char *argv[])
|
2003-12-25 04:52:51 +01:00
|
|
|
{
|
2006-06-29 22:00:08 +02:00
|
|
|
int len;
|
2003-12-25 04:52:51 +01:00
|
|
|
PgStat_Msg msg;
|
2006-10-04 02:30:14 +02:00
|
|
|
|
2007-01-26 21:06:52 +01:00
|
|
|
#ifndef WIN32
|
2006-06-29 22:00:08 +02:00
|
|
|
#ifdef HAVE_POLL
|
|
|
|
struct pollfd input_fd;
|
|
|
|
#else
|
|
|
|
struct timeval sel_timeout;
|
2003-12-25 04:52:51 +01:00
|
|
|
fd_set rfds;
|
2007-01-26 21:06:52 +01:00
|
|
|
#endif
|
2006-06-29 22:00:08 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
|
2006-01-04 22:06:32 +01:00
|
|
|
|
2004-05-28 07:13:32 +02:00
|
|
|
MyProcPid = getpid(); /* reset MyProcPid */
|
|
|
|
|
2007-11-15 22:14:46 +01:00
|
|
|
MyStartTime = time(NULL); /* record Start Time for logging */
|
2007-08-03 01:39:45 +02:00
|
|
|
|
2006-11-21 21:59:53 +01:00
|
|
|
/*
|
|
|
|
* If possible, make this process a group leader, so that the postmaster
|
2007-11-15 22:14:46 +01:00
|
|
|
* can signal any child processes too. (pgstat probably never has any
|
|
|
|
* child processes, but for consistency we make all postmaster child
|
|
|
|
* processes do this.)
|
2006-11-21 21:59:53 +01:00
|
|
|
*/
|
|
|
|
#ifdef HAVE_SETSID
|
|
|
|
if (setsid() < 0)
|
|
|
|
elog(FATAL, "setsid() failed: %m");
|
|
|
|
#endif
|
|
|
|
|
2004-05-28 07:13:32 +02:00
|
|
|
/*
|
2006-06-29 22:00:08 +02:00
|
|
|
* Ignore all signals usually bound to some action in the postmaster,
|
2008-11-03 02:17:08 +01:00
|
|
|
* except SIGQUIT.
|
2004-05-28 07:13:32 +02:00
|
|
|
*/
|
2008-08-25 17:11:01 +02:00
|
|
|
pqsignal(SIGHUP, pgstat_sighup_handler);
|
2004-05-28 07:13:32 +02:00
|
|
|
pqsignal(SIGINT, SIG_IGN);
|
|
|
|
pqsignal(SIGTERM, SIG_IGN);
|
2004-11-17 01:14:14 +01:00
|
|
|
pqsignal(SIGQUIT, pgstat_exit);
|
2008-11-03 02:17:08 +01:00
|
|
|
pqsignal(SIGALRM, SIG_IGN);
|
2004-05-28 07:13:32 +02:00
|
|
|
pqsignal(SIGPIPE, SIG_IGN);
|
|
|
|
pqsignal(SIGUSR1, SIG_IGN);
|
|
|
|
pqsignal(SIGUSR2, SIG_IGN);
|
|
|
|
pqsignal(SIGCHLD, SIG_DFL);
|
|
|
|
pqsignal(SIGTTIN, SIG_DFL);
|
|
|
|
pqsignal(SIGTTOU, SIG_DFL);
|
|
|
|
pqsignal(SIGCONT, SIG_DFL);
|
|
|
|
pqsignal(SIGWINCH, SIG_DFL);
|
|
|
|
PG_SETMASK(&UnBlockSig);
|
|
|
|
|
2001-08-04 02:14:43 +02:00
|
|
|
/*
|
|
|
|
* Identify myself via ps
|
|
|
|
*/
|
2006-06-28 00:16:44 +02:00
|
|
|
init_ps_display("stats collector process", "", "", "");
|
2001-08-04 02:14:43 +02:00
|
|
|
|
2006-01-04 22:06:32 +01:00
|
|
|
/*
|
|
|
|
* Arrange to write the initial status file right away
|
|
|
|
*/
|
2008-11-03 02:17:08 +01:00
|
|
|
last_statrequest = GetCurrentTimestamp();
|
|
|
|
last_statwrite = last_statrequest - 1;
|
2001-08-04 02:14:43 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Read in an existing statistics stats file or initialize the stats to
|
|
|
|
* zero.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2006-01-03 17:42:17 +01:00
|
|
|
pgStatRunningInCollector = true;
|
2008-08-05 14:09:30 +02:00
|
|
|
pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2006-06-29 22:00:08 +02:00
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Setup the descriptor set for select(2). Since only one bit in the set
|
|
|
|
* ever changes, we need not repeat FD_ZERO each time.
|
2006-06-29 22:00:08 +02:00
|
|
|
*/
|
2007-01-26 21:06:52 +01:00
|
|
|
#if !defined(HAVE_POLL) && !defined(WIN32)
|
2006-06-29 22:00:08 +02:00
|
|
|
FD_ZERO(&rfds);
|
|
|
|
#endif
|
2001-08-05 04:06:50 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2006-06-29 22:00:08 +02:00
|
|
|
* Loop to process messages until we get SIGQUIT or detect ungraceful
|
|
|
|
* death of our parent postmaster.
|
|
|
|
*
|
2006-10-04 02:30:14 +02:00
|
|
|
* For performance reasons, we don't want to do a PostmasterIsAlive() test
|
2008-11-03 02:17:08 +01:00
|
|
|
* after every message; instead, do it only when select()/poll() is
|
2009-06-11 16:49:15 +02:00
|
|
|
* interrupted by timeout. In essence, we'll stay alive as long as
|
2008-11-03 02:17:08 +01:00
|
|
|
* backends keep sending us stuff often, even if the postmaster is gone.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
for (;;)
|
|
|
|
{
|
2006-10-04 02:30:14 +02:00
|
|
|
int got_data;
|
2006-06-29 22:00:08 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Quit if we get SIGQUIT from the postmaster.
|
|
|
|
*/
|
|
|
|
if (need_exit)
|
|
|
|
break;
|
|
|
|
|
2008-08-25 17:11:01 +02:00
|
|
|
/*
|
|
|
|
* Reload configuration if we got SIGHUP from the postmaster.
|
|
|
|
*/
|
|
|
|
if (got_SIGHUP)
|
|
|
|
{
|
|
|
|
ProcessConfigFile(PGC_SIGHUP);
|
|
|
|
got_SIGHUP = false;
|
|
|
|
}
|
|
|
|
|
2006-01-04 22:06:32 +01:00
|
|
|
/*
|
2008-11-03 02:17:08 +01:00
|
|
|
* Write the stats file if a new request has arrived that is not
|
|
|
|
* satisfied by existing file.
|
2006-01-04 22:06:32 +01:00
|
|
|
*/
|
2008-11-03 02:17:08 +01:00
|
|
|
if (last_statwrite < last_statrequest)
|
2008-08-05 14:09:30 +02:00
|
|
|
pgstat_write_statsfile(false);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2006-06-29 22:00:08 +02:00
|
|
|
* Wait for a message to arrive; but not for more than
|
|
|
|
* PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will
|
|
|
|
* shut down after an ungraceful postmaster termination; so it needn't
|
2006-10-04 02:30:14 +02:00
|
|
|
* be very fast. However, on some systems SIGQUIT won't interrupt the
|
|
|
|
* poll/select call, so this also limits speed of response to SIGQUIT,
|
|
|
|
* which is more important.)
|
2006-06-29 22:00:08 +02:00
|
|
|
*
|
2007-11-15 22:14:46 +01:00
|
|
|
* We use poll(2) if available, otherwise select(2). Win32 has its own
|
|
|
|
* implementation.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2007-01-26 21:06:52 +01:00
|
|
|
#ifndef WIN32
|
2006-06-29 22:00:08 +02:00
|
|
|
#ifdef HAVE_POLL
|
|
|
|
input_fd.fd = pgStatSock;
|
|
|
|
input_fd.events = POLLIN | POLLERR;
|
|
|
|
input_fd.revents = 0;
|
|
|
|
|
|
|
|
if (poll(&input_fd, 1, PGSTAT_SELECT_TIMEOUT * 1000) < 0)
|
|
|
|
{
|
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_socket_access(),
|
|
|
|
errmsg("poll() failed in statistics collector: %m")));
|
|
|
|
}
|
|
|
|
|
|
|
|
got_data = (input_fd.revents != 0);
|
|
|
|
#else /* !HAVE_POLL */
|
|
|
|
|
2010-07-06 21:19:02 +02:00
|
|
|
FD_SET(pgStatSock, &rfds);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2006-06-29 22:00:08 +02:00
|
|
|
* timeout struct is modified by select() on some operating systems,
|
|
|
|
* so re-fill it each time.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2006-06-29 22:00:08 +02:00
|
|
|
sel_timeout.tv_sec = PGSTAT_SELECT_TIMEOUT;
|
|
|
|
sel_timeout.tv_usec = 0;
|
|
|
|
|
|
|
|
if (select(pgStatSock + 1, &rfds, NULL, NULL, &sel_timeout) < 0)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2001-08-05 04:06:50 +02:00
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
2004-10-25 02:46:43 +02:00
|
|
|
ereport(ERROR,
|
2003-07-22 21:00:12 +02:00
|
|
|
(errcode_for_socket_access(),
|
2005-10-15 04:49:52 +02:00
|
|
|
errmsg("select() failed in statistics collector: %m")));
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2006-06-29 22:00:08 +02:00
|
|
|
got_data = FD_ISSET(pgStatSock, &rfds);
|
|
|
|
#endif /* HAVE_POLL */
|
2007-11-15 22:14:46 +01:00
|
|
|
#else /* WIN32 */
|
2007-01-26 21:06:52 +01:00
|
|
|
got_data = pgwin32_waitforsinglesocket(pgStatSock, FD_READ,
|
2007-11-15 22:14:46 +01:00
|
|
|
PGSTAT_SELECT_TIMEOUT * 1000);
|
2007-01-26 21:06:52 +01:00
|
|
|
#endif
|
2006-06-29 22:00:08 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2006-06-29 22:00:08 +02:00
|
|
|
* If there is a message on the socket, read it and check for
|
|
|
|
* validity.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2006-06-29 22:00:08 +02:00
|
|
|
if (got_data)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2006-06-29 22:00:08 +02:00
|
|
|
len = recv(pgStatSock, (char *) &msg,
|
|
|
|
sizeof(PgStat_Msg), 0);
|
|
|
|
if (len < 0)
|
2006-07-16 20:17:14 +02:00
|
|
|
{
|
|
|
|
if (errno == EINTR)
|
|
|
|
continue;
|
2006-06-29 22:00:08 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_socket_access(),
|
|
|
|
errmsg("could not read statistics message: %m")));
|
2006-07-16 20:17:14 +02:00
|
|
|
}
|
2006-06-29 22:00:08 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2006-06-29 22:00:08 +02:00
|
|
|
* We ignore messages that are smaller than our common header
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2006-06-29 22:00:08 +02:00
|
|
|
if (len < sizeof(PgStat_MsgHdr))
|
|
|
|
continue;
|
2001-10-25 07:50:21 +02:00
|
|
|
|
2001-08-05 04:06:50 +02:00
|
|
|
/*
|
2006-06-29 22:00:08 +02:00
|
|
|
* The received length must match the length in the header
|
2001-08-05 04:06:50 +02:00
|
|
|
*/
|
2006-06-29 22:00:08 +02:00
|
|
|
if (msg.msg_hdr.m_size != len)
|
|
|
|
continue;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2006-06-29 22:00:08 +02:00
|
|
|
* O.K. - we accept this message. Process it.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
switch (msg.msg_hdr.m_type)
|
|
|
|
{
|
|
|
|
case PGSTAT_MTYPE_DUMMY:
|
|
|
|
break;
|
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
case PGSTAT_MTYPE_INQUIRY:
|
|
|
|
pgstat_recv_inquiry((PgStat_MsgInquiry *) &msg, len);
|
|
|
|
break;
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
case PGSTAT_MTYPE_TABSTAT:
|
2006-06-29 22:00:08 +02:00
|
|
|
pgstat_recv_tabstat((PgStat_MsgTabstat *) &msg, len);
|
2001-06-22 21:18:36 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case PGSTAT_MTYPE_TABPURGE:
|
2006-06-29 22:00:08 +02:00
|
|
|
pgstat_recv_tabpurge((PgStat_MsgTabpurge *) &msg, len);
|
2001-06-22 21:18:36 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case PGSTAT_MTYPE_DROPDB:
|
2006-06-29 22:00:08 +02:00
|
|
|
pgstat_recv_dropdb((PgStat_MsgDropdb *) &msg, len);
|
2001-06-22 21:18:36 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case PGSTAT_MTYPE_RESETCOUNTER:
|
2001-10-25 07:50:21 +02:00
|
|
|
pgstat_recv_resetcounter((PgStat_MsgResetcounter *) &msg,
|
2006-06-29 22:00:08 +02:00
|
|
|
len);
|
2001-06-22 21:18:36 +02:00
|
|
|
break;
|
|
|
|
|
2010-01-19 15:11:32 +01:00
|
|
|
case PGSTAT_MTYPE_RESETSHAREDCOUNTER:
|
|
|
|
pgstat_recv_resetsharedcounter(
|
2010-02-26 03:01:40 +01:00
|
|
|
(PgStat_MsgResetsharedcounter *) &msg,
|
|
|
|
len);
|
2010-01-19 15:11:32 +01:00
|
|
|
break;
|
|
|
|
|
2010-01-28 15:25:41 +01:00
|
|
|
case PGSTAT_MTYPE_RESETSINGLECOUNTER:
|
|
|
|
pgstat_recv_resetsinglecounter(
|
2010-02-26 03:01:40 +01:00
|
|
|
(PgStat_MsgResetsinglecounter *) &msg,
|
|
|
|
len);
|
2010-01-28 15:25:41 +01:00
|
|
|
break;
|
|
|
|
|
2005-07-14 07:13:45 +02:00
|
|
|
case PGSTAT_MTYPE_AUTOVAC_START:
|
2006-06-29 22:00:08 +02:00
|
|
|
pgstat_recv_autovac((PgStat_MsgAutovacStart *) &msg, len);
|
2005-07-14 07:13:45 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case PGSTAT_MTYPE_VACUUM:
|
2006-06-29 22:00:08 +02:00
|
|
|
pgstat_recv_vacuum((PgStat_MsgVacuum *) &msg, len);
|
2005-07-14 07:13:45 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case PGSTAT_MTYPE_ANALYZE:
|
2006-06-29 22:00:08 +02:00
|
|
|
pgstat_recv_analyze((PgStat_MsgAnalyze *) &msg, len);
|
2005-07-14 07:13:45 +02:00
|
|
|
break;
|
|
|
|
|
2007-03-30 20:34:56 +02:00
|
|
|
case PGSTAT_MTYPE_BGWRITER:
|
2007-11-15 23:25:18 +01:00
|
|
|
pgstat_recv_bgwriter((PgStat_MsgBgWriter *) &msg, len);
|
2007-03-30 20:34:56 +02:00
|
|
|
break;
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
case PGSTAT_MTYPE_FUNCSTAT:
|
|
|
|
pgstat_recv_funcstat((PgStat_MsgFuncstat *) &msg, len);
|
|
|
|
break;
|
2008-05-15 02:17:41 +02:00
|
|
|
|
|
|
|
case PGSTAT_MTYPE_FUNCPURGE:
|
|
|
|
pgstat_recv_funcpurge((PgStat_MsgFuncpurge *) &msg, len);
|
|
|
|
break;
|
|
|
|
|
2011-01-03 12:46:03 +01:00
|
|
|
case PGSTAT_MTYPE_RECOVERYCONFLICT:
|
|
|
|
pgstat_recv_recoveryconflict((PgStat_MsgRecoveryConflict *) &msg, len);
|
|
|
|
break;
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* We can only get here if the select/poll timeout elapsed. Check
|
|
|
|
* for postmaster death.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
Introduce a pipe between postmaster and each backend, which can be used to
detect postmaster death. Postmaster keeps the write-end of the pipe open,
so when it dies, children get EOF in the read-end. That can conveniently
be waited for in select(), which allows eliminating some of the polling
loops that check for postmaster death. This patch doesn't yet change all
the loops to use the new mechanism, expect a follow-on patch to do that.
This changes the interface to WaitLatch, so that it takes as argument a
bitmask of events that it waits for. Possible events are latch set, timeout,
postmaster death, and socket becoming readable or writeable.
The pipe method behaves slightly differently from the kill() method
previously used in PostmasterIsAlive() in the case that postmaster has died,
but its parent has not yet read its exit code with waitpid(). The pipe
returns EOF as soon as the process dies, but kill() continues to return
true until waitpid() has been called (IOW while the process is a zombie).
Because of that, change PostmasterIsAlive() to use the pipe too, otherwise
WaitLatch() would return immediately with WL_POSTMASTER_DEATH, while
PostmasterIsAlive() would claim it's still alive. That could easily lead to
busy-waiting while postmaster is in zombie state.
Peter Geoghegan with further changes by me, reviewed by Fujii Masao and
Florian Pflug.
2011-07-08 17:27:49 +02:00
|
|
|
if (!PostmasterIsAlive())
|
2006-06-29 22:00:08 +02:00
|
|
|
break;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
2006-10-04 02:30:14 +02:00
|
|
|
} /* end of message-processing loop */
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2006-06-29 22:00:08 +02:00
|
|
|
/*
|
|
|
|
* Save the final stats to reuse at next startup.
|
|
|
|
*/
|
2008-08-05 14:09:30 +02:00
|
|
|
pgstat_write_statsfile(true);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2006-06-29 22:00:08 +02:00
|
|
|
exit(0);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2006-06-29 22:00:08 +02:00
|
|
|
|
|
|
|
/* SIGQUIT signal handler for collector process */
|
2004-06-14 20:08:19 +02:00
|
|
|
static void
|
|
|
|
pgstat_exit(SIGNAL_ARGS)
|
|
|
|
{
|
2006-06-29 22:00:08 +02:00
|
|
|
need_exit = true;
|
2004-06-14 20:08:19 +02:00
|
|
|
}
|
|
|
|
|
2008-08-25 17:11:01 +02:00
|
|
|
/* SIGHUP handler for collector process */
|
|
|
|
static void
|
|
|
|
pgstat_sighup_handler(SIGNAL_ARGS)
|
|
|
|
{
|
|
|
|
got_SIGHUP = true;
|
|
|
|
}
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2005-05-11 03:41:41 +02:00
|
|
|
/*
|
|
|
|
* Lookup the hash table entry for the specified database. If no hash
|
2005-07-29 21:30:09 +02:00
|
|
|
* table entry exists, initialize it, if the create parameter is true.
|
|
|
|
* Else, return NULL.
|
2005-05-11 03:41:41 +02:00
|
|
|
*/
|
|
|
|
static PgStat_StatDBEntry *
|
2005-07-29 21:30:09 +02:00
|
|
|
pgstat_get_db_entry(Oid databaseid, bool create)
|
2005-05-11 03:41:41 +02:00
|
|
|
{
|
|
|
|
PgStat_StatDBEntry *result;
|
2005-10-15 04:49:52 +02:00
|
|
|
bool found;
|
|
|
|
HASHACTION action = (create ? HASH_ENTER : HASH_FIND);
|
2005-05-11 03:41:41 +02:00
|
|
|
|
|
|
|
/* Lookup or create the hash table entry for this database */
|
|
|
|
result = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
|
|
|
|
&databaseid,
|
2005-07-29 21:30:09 +02:00
|
|
|
action, &found);
|
|
|
|
|
|
|
|
if (!create && !found)
|
|
|
|
return NULL;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2005-05-11 03:41:41 +02:00
|
|
|
/* If not found, initialize the new one. */
|
2001-06-22 21:18:36 +02:00
|
|
|
if (!found)
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
HASHCTL hash_ctl;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2005-05-11 03:41:41 +02:00
|
|
|
result->tables = NULL;
|
2008-05-15 02:17:41 +02:00
|
|
|
result->functions = NULL;
|
2005-05-11 03:41:41 +02:00
|
|
|
result->n_xact_commit = 0;
|
|
|
|
result->n_xact_rollback = 0;
|
|
|
|
result->n_blocks_fetched = 0;
|
|
|
|
result->n_blocks_hit = 0;
|
2007-03-16 18:57:36 +01:00
|
|
|
result->n_tuples_returned = 0;
|
|
|
|
result->n_tuples_fetched = 0;
|
|
|
|
result->n_tuples_inserted = 0;
|
|
|
|
result->n_tuples_updated = 0;
|
|
|
|
result->n_tuples_deleted = 0;
|
2005-07-14 07:13:45 +02:00
|
|
|
result->last_autovac_time = 0;
|
2011-01-03 12:46:03 +01:00
|
|
|
result->n_conflict_tablespace = 0;
|
|
|
|
result->n_conflict_lock = 0;
|
|
|
|
result->n_conflict_snapshot = 0;
|
|
|
|
result->n_conflict_bufferpin = 0;
|
|
|
|
result->n_conflict_startup_deadlock = 0;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2011-02-10 15:09:35 +01:00
|
|
|
result->stat_reset_timestamp = GetCurrentTimestamp();
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
memset(&hash_ctl, 0, sizeof(hash_ctl));
|
2001-10-25 07:50:21 +02:00
|
|
|
hash_ctl.keysize = sizeof(Oid);
|
2001-10-01 07:36:17 +02:00
|
|
|
hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
|
2005-04-14 22:32:43 +02:00
|
|
|
hash_ctl.hash = oid_hash;
|
2005-05-11 03:41:41 +02:00
|
|
|
result->tables = hash_create("Per-database table",
|
2005-10-15 04:49:52 +02:00
|
|
|
PGSTAT_TAB_HASH_SIZE,
|
|
|
|
&hash_ctl,
|
|
|
|
HASH_ELEM | HASH_FUNCTION);
|
2008-05-15 02:17:41 +02:00
|
|
|
|
|
|
|
hash_ctl.keysize = sizeof(Oid);
|
|
|
|
hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
|
|
|
|
hash_ctl.hash = oid_hash;
|
|
|
|
result->functions = hash_create("Per-database function",
|
|
|
|
PGSTAT_FUNCTION_HASH_SIZE,
|
|
|
|
&hash_ctl,
|
|
|
|
HASH_ELEM | HASH_FUNCTION);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
2005-05-11 03:41:41 +02:00
|
|
|
return result;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-09-05 00:32:33 +02:00
|
|
|
/*
|
|
|
|
* Lookup the hash table entry for the specified table. If no hash
|
|
|
|
* table entry exists, initialize it, if the create parameter is true.
|
|
|
|
* Else, return NULL.
|
|
|
|
*/
|
|
|
|
static PgStat_StatTabEntry *
|
|
|
|
pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
|
|
|
|
{
|
|
|
|
PgStat_StatTabEntry *result;
|
|
|
|
bool found;
|
|
|
|
HASHACTION action = (create ? HASH_ENTER : HASH_FIND);
|
|
|
|
|
|
|
|
/* Lookup or create the hash table entry for this table */
|
|
|
|
result = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
|
|
|
|
&tableoid,
|
|
|
|
action, &found);
|
|
|
|
|
|
|
|
if (!create && !found)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* If not found, initialize the new one. */
|
|
|
|
if (!found)
|
|
|
|
{
|
|
|
|
result->numscans = 0;
|
|
|
|
result->tuples_returned = 0;
|
|
|
|
result->tuples_fetched = 0;
|
|
|
|
result->tuples_inserted = 0;
|
|
|
|
result->tuples_updated = 0;
|
|
|
|
result->tuples_deleted = 0;
|
|
|
|
result->tuples_hot_updated = 0;
|
|
|
|
result->n_live_tuples = 0;
|
|
|
|
result->n_dead_tuples = 0;
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
result->changes_since_analyze = 0;
|
2009-09-05 00:32:33 +02:00
|
|
|
result->blocks_fetched = 0;
|
|
|
|
result->blocks_hit = 0;
|
|
|
|
result->vacuum_timestamp = 0;
|
2010-08-21 12:59:17 +02:00
|
|
|
result->vacuum_count = 0;
|
2011-03-07 17:17:06 +01:00
|
|
|
result->autovac_vacuum_timestamp = 0;
|
2010-08-21 12:59:17 +02:00
|
|
|
result->autovac_vacuum_count = 0;
|
2011-03-07 17:17:06 +01:00
|
|
|
result->analyze_timestamp = 0;
|
2010-08-21 12:59:17 +02:00
|
|
|
result->analyze_count = 0;
|
2011-03-07 17:17:06 +01:00
|
|
|
result->autovac_analyze_timestamp = 0;
|
2010-08-21 12:59:17 +02:00
|
|
|
result->autovac_analyze_count = 0;
|
2009-09-05 00:32:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_write_statsfile() -
|
|
|
|
*
|
|
|
|
* Tell the news.
|
2008-08-05 14:09:30 +02:00
|
|
|
* If writing to the permanent file (happens when the collector is
|
|
|
|
* shutting down only), remove the temporary file so that backends
|
|
|
|
* starting up under a new postmaster can't read the old data before
|
|
|
|
* the new collector is ready.
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
2008-08-05 14:09:30 +02:00
|
|
|
pgstat_write_statsfile(bool permanent)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
HASH_SEQ_STATUS hstat;
|
|
|
|
HASH_SEQ_STATUS tstat;
|
2008-05-15 02:17:41 +02:00
|
|
|
HASH_SEQ_STATUS fstat;
|
2001-10-25 07:50:21 +02:00
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
PgStat_StatTabEntry *tabentry;
|
2008-05-15 02:17:41 +02:00
|
|
|
PgStat_StatFuncEntry *funcentry;
|
2001-10-25 07:50:21 +02:00
|
|
|
FILE *fpout;
|
2005-07-14 07:13:45 +02:00
|
|
|
int32 format_id;
|
2009-06-11 16:49:15 +02:00
|
|
|
const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname;
|
|
|
|
const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2001-10-25 07:50:21 +02:00
|
|
|
* Open the statistics temp file to write out the current values.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2008-11-03 02:17:08 +01:00
|
|
|
fpout = AllocateFile(tmpfile, PG_BINARY_W);
|
2001-06-22 21:18:36 +02:00
|
|
|
if (fpout == NULL)
|
|
|
|
{
|
2003-07-22 21:00:12 +02:00
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
2005-10-15 04:49:52 +02:00
|
|
|
errmsg("could not open temporary statistics file \"%s\": %m",
|
2008-08-05 14:09:30 +02:00
|
|
|
tmpfile)));
|
2001-06-22 21:18:36 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
/*
|
|
|
|
* Set the timestamp of the stats file.
|
|
|
|
*/
|
|
|
|
globalStats.stats_timestamp = GetCurrentTimestamp();
|
|
|
|
|
2005-07-14 07:13:45 +02:00
|
|
|
/*
|
|
|
|
* Write the file header --- currently just a format ID.
|
|
|
|
*/
|
|
|
|
format_id = PGSTAT_FILE_FORMAT_ID;
|
|
|
|
fwrite(&format_id, sizeof(format_id), 1, fpout);
|
|
|
|
|
2007-03-30 20:34:56 +02:00
|
|
|
/*
|
|
|
|
* Write global stats struct
|
|
|
|
*/
|
|
|
|
fwrite(&globalStats, sizeof(globalStats), 1, fpout);
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
|
|
|
* Walk through the database table.
|
|
|
|
*/
|
|
|
|
hash_seq_init(&hstat, pgStatDBHash);
|
2001-10-05 19:28:13 +02:00
|
|
|
while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Write out the DB entry including the number of live backends. We
|
2009-06-11 16:49:15 +02:00
|
|
|
* don't write the tables or functions pointers, since they're of no
|
|
|
|
* use to any other process.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
fputc('D', fpout);
|
2006-01-18 21:35:06 +01:00
|
|
|
fwrite(dbentry, offsetof(PgStat_StatDBEntry, tables), 1, fpout);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2006-01-18 21:35:06 +01:00
|
|
|
* Walk through the database's access stats per table.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
hash_seq_init(&tstat, dbentry->tables);
|
2001-10-05 19:28:13 +02:00
|
|
|
while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
|
|
|
fputc('T', fpout);
|
|
|
|
fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
|
|
|
|
}
|
2001-10-25 07:50:21 +02:00
|
|
|
|
2008-05-15 02:17:41 +02:00
|
|
|
/*
|
|
|
|
* Walk through the database's function stats table.
|
|
|
|
*/
|
|
|
|
hash_seq_init(&fstat, dbentry->functions);
|
|
|
|
while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL)
|
|
|
|
{
|
|
|
|
fputc('F', fpout);
|
|
|
|
fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
|
|
|
|
}
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
|
|
|
* Mark the end of this DB
|
|
|
|
*/
|
|
|
|
fputc('d', fpout);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2001-10-25 07:50:21 +02:00
|
|
|
* No more output to be done. Close the temp file and replace the old
|
2006-01-18 21:35:06 +01:00
|
|
|
* pgstat.stat with it. The ferror() check replaces testing for error
|
|
|
|
* after each individual fputc or fwrite above.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
fputc('E', fpout);
|
2006-01-18 21:35:06 +01:00
|
|
|
|
|
|
|
if (ferror(fpout))
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
2006-10-04 02:30:14 +02:00
|
|
|
errmsg("could not write temporary statistics file \"%s\": %m",
|
2008-08-05 14:09:30 +02:00
|
|
|
tmpfile)));
|
2008-11-03 02:17:08 +01:00
|
|
|
FreeFile(fpout);
|
2008-08-05 14:09:30 +02:00
|
|
|
unlink(tmpfile);
|
2006-01-18 21:35:06 +01:00
|
|
|
}
|
2008-11-03 02:17:08 +01:00
|
|
|
else if (FreeFile(fpout) < 0)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2003-07-22 21:00:12 +02:00
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
2005-10-15 04:49:52 +02:00
|
|
|
errmsg("could not close temporary statistics file \"%s\": %m",
|
2008-08-05 14:09:30 +02:00
|
|
|
tmpfile)));
|
|
|
|
unlink(tmpfile);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
2008-08-05 14:09:30 +02:00
|
|
|
else if (rename(tmpfile, statfile) < 0)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2006-01-18 21:35:06 +01:00
|
|
|
ereport(LOG,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
|
2008-08-05 14:09:30 +02:00
|
|
|
tmpfile, statfile)));
|
|
|
|
unlink(tmpfile);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
2008-11-03 02:17:08 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Successful write, so update last_statwrite.
|
|
|
|
*/
|
|
|
|
last_statwrite = globalStats.stats_timestamp;
|
|
|
|
|
|
|
|
/*
|
2010-07-06 21:19:02 +02:00
|
|
|
* If there is clock skew between backends and the collector, we could
|
|
|
|
* receive a stats request time that's in the future. If so, complain
|
|
|
|
* and reset last_statrequest. Resetting ensures that no inquiry
|
|
|
|
* message can cause more than one stats file write to occur.
|
2008-11-03 02:17:08 +01:00
|
|
|
*/
|
2010-03-12 23:19:19 +01:00
|
|
|
if (last_statrequest > last_statwrite)
|
|
|
|
{
|
2010-03-24 17:07:10 +01:00
|
|
|
char *reqtime;
|
|
|
|
char *mytime;
|
|
|
|
|
|
|
|
/* Copy because timestamptz_to_str returns a static buffer */
|
|
|
|
reqtime = pstrdup(timestamptz_to_str(last_statrequest));
|
|
|
|
mytime = pstrdup(timestamptz_to_str(last_statwrite));
|
|
|
|
elog(LOG, "last_statrequest %s is later than collector's time %s",
|
|
|
|
reqtime, mytime);
|
|
|
|
pfree(reqtime);
|
|
|
|
pfree(mytime);
|
|
|
|
|
2010-03-12 23:19:19 +01:00
|
|
|
last_statrequest = last_statwrite;
|
|
|
|
}
|
2008-11-03 02:17:08 +01:00
|
|
|
}
|
2008-08-05 14:09:30 +02:00
|
|
|
|
|
|
|
if (permanent)
|
2008-08-15 10:37:41 +02:00
|
|
|
unlink(pgstat_stat_filename);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_read_statsfile() -
|
|
|
|
*
|
2006-06-19 03:51:22 +02:00
|
|
|
* Reads in an existing statistics collector file and initializes the
|
|
|
|
* databases' hash table (whose entries point to the tables' hash tables).
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2007-02-08 00:11:30 +01:00
|
|
|
static HTAB *
|
2008-08-05 14:09:30 +02:00
|
|
|
pgstat_read_statsfile(Oid onlydb, bool permanent)
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
PgStat_StatDBEntry dbbuf;
|
|
|
|
PgStat_StatTabEntry *tabentry;
|
|
|
|
PgStat_StatTabEntry tabbuf;
|
2008-05-15 02:17:41 +02:00
|
|
|
PgStat_StatFuncEntry funcbuf;
|
|
|
|
PgStat_StatFuncEntry *funcentry;
|
2001-10-25 07:50:21 +02:00
|
|
|
HASHCTL hash_ctl;
|
2007-02-08 00:11:30 +01:00
|
|
|
HTAB *dbhash;
|
2001-10-25 07:50:21 +02:00
|
|
|
HTAB *tabhash = NULL;
|
2008-05-15 02:17:41 +02:00
|
|
|
HTAB *funchash = NULL;
|
2001-10-25 07:50:21 +02:00
|
|
|
FILE *fpin;
|
2005-07-14 07:13:45 +02:00
|
|
|
int32 format_id;
|
2001-10-25 07:50:21 +02:00
|
|
|
bool found;
|
2009-06-11 16:49:15 +02:00
|
|
|
const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
|
2001-10-25 07:50:21 +02:00
|
|
|
|
|
|
|
/*
|
2007-02-08 00:11:30 +01:00
|
|
|
* The tables will live in pgStatLocalContext.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2007-02-08 00:11:30 +01:00
|
|
|
pgstat_setup_memcxt();
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Create the DB hashtable
|
|
|
|
*/
|
|
|
|
memset(&hash_ctl, 0, sizeof(hash_ctl));
|
2001-10-25 07:50:21 +02:00
|
|
|
hash_ctl.keysize = sizeof(Oid);
|
2001-10-01 07:36:17 +02:00
|
|
|
hash_ctl.entrysize = sizeof(PgStat_StatDBEntry);
|
2005-04-14 22:32:43 +02:00
|
|
|
hash_ctl.hash = oid_hash;
|
2007-02-08 00:11:30 +01:00
|
|
|
hash_ctl.hcxt = pgStatLocalContext;
|
|
|
|
dbhash = hash_create("Databases hash", PGSTAT_DB_HASH_SIZE, &hash_ctl,
|
|
|
|
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2007-03-30 20:34:56 +02:00
|
|
|
/*
|
|
|
|
* Clear out global statistics so they start from zero in case we can't
|
|
|
|
* load an existing statsfile.
|
|
|
|
*/
|
|
|
|
memset(&globalStats, 0, sizeof(globalStats));
|
|
|
|
|
2011-02-10 15:09:35 +01:00
|
|
|
/*
|
|
|
|
* Set the current timestamp (will be kept only in case we can't load an
|
|
|
|
* existing statsfile.
|
|
|
|
*/
|
|
|
|
globalStats.stat_reset_timestamp = GetCurrentTimestamp();
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Try to open the status file. If it doesn't exist, the backends simply
|
|
|
|
* return zero for anything and the collector simply starts from scratch
|
|
|
|
* with empty counters.
|
2010-03-12 23:19:19 +01:00
|
|
|
*
|
|
|
|
* ENOENT is a possibility if the stats collector is not running or has
|
|
|
|
* not yet written the stats file the first time. Any other failure
|
|
|
|
* condition is suspicious.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2008-08-05 14:09:30 +02:00
|
|
|
if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
|
2010-03-12 23:19:19 +01:00
|
|
|
{
|
|
|
|
if (errno != ENOENT)
|
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not open statistics file \"%s\": %m",
|
|
|
|
statfile)));
|
2007-02-08 00:11:30 +01:00
|
|
|
return dbhash;
|
2010-03-12 23:19:19 +01:00
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2005-07-14 07:13:45 +02:00
|
|
|
/*
|
|
|
|
* Verify it's of the expected format.
|
|
|
|
*/
|
|
|
|
if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id)
|
|
|
|
|| format_id != PGSTAT_FILE_FORMAT_ID)
|
|
|
|
{
|
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
2010-03-12 23:19:19 +01:00
|
|
|
(errmsg("corrupted statistics file \"%s\"", statfile)));
|
2005-07-14 07:13:45 +02:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2007-03-30 20:34:56 +02:00
|
|
|
/*
|
|
|
|
* Read global stats struct
|
|
|
|
*/
|
|
|
|
if (fread(&globalStats, 1, sizeof(globalStats), fpin) != sizeof(globalStats))
|
|
|
|
{
|
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
2010-03-12 23:19:19 +01:00
|
|
|
(errmsg("corrupted statistics file \"%s\"", statfile)));
|
2007-03-30 20:34:56 +02:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2001-10-25 07:50:21 +02:00
|
|
|
* We found an existing collector stats file. Read it and put all the
|
|
|
|
* hashtable entries into place.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
switch (fgetc(fpin))
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
/*
|
|
|
|
* 'D' A PgStat_StatDBEntry struct describing a database
|
2008-05-15 02:17:41 +02:00
|
|
|
* follows. Subsequently, zero to many 'T' and 'F' entries
|
|
|
|
* will follow until a 'd' is encountered.
|
2001-10-25 07:50:21 +02:00
|
|
|
*/
|
2001-06-22 21:18:36 +02:00
|
|
|
case 'D':
|
2006-01-18 21:35:06 +01:00
|
|
|
if (fread(&dbbuf, 1, offsetof(PgStat_StatDBEntry, tables),
|
|
|
|
fpin) != offsetof(PgStat_StatDBEntry, tables))
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2003-07-22 21:00:12 +02:00
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
2010-03-12 23:19:19 +01:00
|
|
|
(errmsg("corrupted statistics file \"%s\"",
|
|
|
|
statfile)));
|
2004-10-28 03:38:41 +02:00
|
|
|
goto done;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add to the DB hash
|
|
|
|
*/
|
2007-02-08 00:11:30 +01:00
|
|
|
dbentry = (PgStat_StatDBEntry *) hash_search(dbhash,
|
2005-10-15 04:49:52 +02:00
|
|
|
(void *) &dbbuf.databaseid,
|
2003-07-22 21:00:12 +02:00
|
|
|
HASH_ENTER,
|
|
|
|
&found);
|
2001-06-22 21:18:36 +02:00
|
|
|
if (found)
|
|
|
|
{
|
2003-07-22 21:00:12 +02:00
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
2010-03-12 23:19:19 +01:00
|
|
|
(errmsg("corrupted statistics file \"%s\"",
|
|
|
|
statfile)));
|
2004-10-28 03:38:41 +02:00
|
|
|
goto done;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(dbentry, &dbbuf, sizeof(PgStat_StatDBEntry));
|
2001-10-25 07:50:21 +02:00
|
|
|
dbentry->tables = NULL;
|
2008-05-15 02:17:41 +02:00
|
|
|
dbentry->functions = NULL;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2005-07-29 21:30:09 +02:00
|
|
|
* Don't collect tables if not the requested DB (or the
|
|
|
|
* shared-table info)
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2005-07-29 21:30:09 +02:00
|
|
|
if (onlydb != InvalidOid)
|
|
|
|
{
|
|
|
|
if (dbbuf.databaseid != onlydb &&
|
|
|
|
dbbuf.databaseid != InvalidOid)
|
2005-10-15 04:49:52 +02:00
|
|
|
break;
|
2005-07-29 21:30:09 +02:00
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
memset(&hash_ctl, 0, sizeof(hash_ctl));
|
2001-10-25 07:50:21 +02:00
|
|
|
hash_ctl.keysize = sizeof(Oid);
|
2001-10-01 07:36:17 +02:00
|
|
|
hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
|
2005-04-14 22:32:43 +02:00
|
|
|
hash_ctl.hash = oid_hash;
|
2007-02-08 00:11:30 +01:00
|
|
|
hash_ctl.hcxt = pgStatLocalContext;
|
2004-10-28 03:38:41 +02:00
|
|
|
dbentry->tables = hash_create("Per-database table",
|
|
|
|
PGSTAT_TAB_HASH_SIZE,
|
|
|
|
&hash_ctl,
|
2007-11-15 22:14:46 +01:00
|
|
|
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2008-05-15 02:17:41 +02:00
|
|
|
hash_ctl.keysize = sizeof(Oid);
|
|
|
|
hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
|
|
|
|
hash_ctl.hash = oid_hash;
|
|
|
|
hash_ctl.hcxt = pgStatLocalContext;
|
|
|
|
dbentry->functions = hash_create("Per-database function",
|
|
|
|
PGSTAT_FUNCTION_HASH_SIZE,
|
|
|
|
&hash_ctl,
|
|
|
|
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
2009-06-11 16:49:15 +02:00
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/*
|
2008-05-15 02:17:41 +02:00
|
|
|
* Arrange that following records add entries to this
|
|
|
|
* database's hash tables.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
tabhash = dbentry->tables;
|
2008-05-15 02:17:41 +02:00
|
|
|
funchash = dbentry->functions;
|
2001-06-22 21:18:36 +02:00
|
|
|
break;
|
|
|
|
|
2001-10-25 07:50:21 +02:00
|
|
|
/*
|
|
|
|
* 'd' End of this database.
|
|
|
|
*/
|
2001-06-22 21:18:36 +02:00
|
|
|
case 'd':
|
|
|
|
tabhash = NULL;
|
2008-05-15 02:17:41 +02:00
|
|
|
funchash = NULL;
|
2001-06-22 21:18:36 +02:00
|
|
|
break;
|
|
|
|
|
2001-10-25 07:50:21 +02:00
|
|
|
/*
|
|
|
|
* 'T' A PgStat_StatTabEntry follows.
|
|
|
|
*/
|
2001-06-22 21:18:36 +02:00
|
|
|
case 'T':
|
2006-01-18 21:35:06 +01:00
|
|
|
if (fread(&tabbuf, 1, sizeof(PgStat_StatTabEntry),
|
|
|
|
fpin) != sizeof(PgStat_StatTabEntry))
|
2001-06-22 21:18:36 +02:00
|
|
|
{
|
2003-07-22 21:00:12 +02:00
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
2010-03-12 23:19:19 +01:00
|
|
|
(errmsg("corrupted statistics file \"%s\"",
|
|
|
|
statfile)));
|
2004-10-28 03:38:41 +02:00
|
|
|
goto done;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Skip if table belongs to a not requested database.
|
|
|
|
*/
|
|
|
|
if (tabhash == NULL)
|
|
|
|
break;
|
|
|
|
|
2001-10-01 07:36:17 +02:00
|
|
|
tabentry = (PgStat_StatTabEntry *) hash_search(tabhash,
|
2005-10-15 04:49:52 +02:00
|
|
|
(void *) &tabbuf.tableid,
|
|
|
|
HASH_ENTER, &found);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
if (found)
|
|
|
|
{
|
2003-07-22 21:00:12 +02:00
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
2010-03-12 23:19:19 +01:00
|
|
|
(errmsg("corrupted statistics file \"%s\"",
|
|
|
|
statfile)));
|
2004-10-28 03:38:41 +02:00
|
|
|
goto done;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(tabentry, &tabbuf, sizeof(tabbuf));
|
|
|
|
break;
|
|
|
|
|
2008-05-15 02:17:41 +02:00
|
|
|
/*
|
|
|
|
* 'F' A PgStat_StatFuncEntry follows.
|
|
|
|
*/
|
|
|
|
case 'F':
|
|
|
|
if (fread(&funcbuf, 1, sizeof(PgStat_StatFuncEntry),
|
|
|
|
fpin) != sizeof(PgStat_StatFuncEntry))
|
|
|
|
{
|
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
2010-03-12 23:19:19 +01:00
|
|
|
(errmsg("corrupted statistics file \"%s\"",
|
|
|
|
statfile)));
|
2008-05-15 02:17:41 +02:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Skip if function belongs to a not requested database.
|
|
|
|
*/
|
|
|
|
if (funchash == NULL)
|
|
|
|
break;
|
|
|
|
|
|
|
|
funcentry = (PgStat_StatFuncEntry *) hash_search(funchash,
|
2009-06-11 16:49:15 +02:00
|
|
|
(void *) &funcbuf.functionid,
|
2008-05-15 02:17:41 +02:00
|
|
|
HASH_ENTER, &found);
|
|
|
|
|
|
|
|
if (found)
|
|
|
|
{
|
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
2010-03-12 23:19:19 +01:00
|
|
|
(errmsg("corrupted statistics file \"%s\"",
|
|
|
|
statfile)));
|
2008-05-15 02:17:41 +02:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(funcentry, &funcbuf, sizeof(funcbuf));
|
|
|
|
break;
|
|
|
|
|
2001-10-25 07:50:21 +02:00
|
|
|
/*
|
2006-06-19 03:51:22 +02:00
|
|
|
* 'E' The EOF marker of a complete stats file.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2006-06-19 03:51:22 +02:00
|
|
|
case 'E':
|
|
|
|
goto done;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
default:
|
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
2010-03-12 23:19:19 +01:00
|
|
|
(errmsg("corrupted statistics file \"%s\"",
|
|
|
|
statfile)));
|
2006-06-19 03:51:22 +02:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
}
|
2001-10-25 07:50:21 +02:00
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
done:
|
|
|
|
FreeFile(fpin);
|
2007-02-08 00:11:30 +01:00
|
|
|
|
2008-08-05 14:09:30 +02:00
|
|
|
if (permanent)
|
|
|
|
unlink(PGSTAT_STAT_PERMANENT_FILENAME);
|
|
|
|
|
2007-02-08 00:11:30 +01:00
|
|
|
return dbhash;
|
2006-06-19 03:51:22 +02:00
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
/* ----------
|
|
|
|
* pgstat_read_statsfile_timestamp() -
|
|
|
|
*
|
|
|
|
* Attempt to fetch the timestamp of an existing stats file.
|
|
|
|
* Returns TRUE if successful (timestamp is stored at *ts).
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
|
|
|
|
{
|
|
|
|
PgStat_GlobalStats myGlobalStats;
|
|
|
|
FILE *fpin;
|
|
|
|
int32 format_id;
|
2009-06-11 16:49:15 +02:00
|
|
|
const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename;
|
2008-11-03 02:17:08 +01:00
|
|
|
|
|
|
|
/*
|
2010-03-12 23:19:19 +01:00
|
|
|
* Try to open the status file. As above, anything but ENOENT is worthy
|
|
|
|
* of complaining about.
|
2008-11-03 02:17:08 +01:00
|
|
|
*/
|
|
|
|
if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
|
2010-03-12 23:19:19 +01:00
|
|
|
{
|
|
|
|
if (errno != ENOENT)
|
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not open statistics file \"%s\": %m",
|
|
|
|
statfile)));
|
2008-11-03 02:17:08 +01:00
|
|
|
return false;
|
2010-03-12 23:19:19 +01:00
|
|
|
}
|
2008-11-03 02:17:08 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Verify it's of the expected format.
|
|
|
|
*/
|
|
|
|
if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id)
|
|
|
|
|| format_id != PGSTAT_FILE_FORMAT_ID)
|
|
|
|
{
|
2010-03-12 23:19:19 +01:00
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
|
|
|
(errmsg("corrupted statistics file \"%s\"", statfile)));
|
2008-11-03 02:17:08 +01:00
|
|
|
FreeFile(fpin);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read global stats struct
|
|
|
|
*/
|
|
|
|
if (fread(&myGlobalStats, 1, sizeof(myGlobalStats), fpin) != sizeof(myGlobalStats))
|
|
|
|
{
|
2010-03-12 23:19:19 +01:00
|
|
|
ereport(pgStatRunningInCollector ? LOG : WARNING,
|
|
|
|
(errmsg("corrupted statistics file \"%s\"", statfile)));
|
2008-11-03 02:17:08 +01:00
|
|
|
FreeFile(fpin);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
*ts = myGlobalStats.stats_timestamp;
|
|
|
|
|
|
|
|
FreeFile(fpin);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2004-07-01 02:52:04 +02:00
|
|
|
/*
|
2007-02-08 00:11:30 +01:00
|
|
|
* If not already done, read the statistics collector stats file into
|
|
|
|
* some hash tables. The results will be kept until pgstat_clear_snapshot()
|
|
|
|
* is called (typically, at end of transaction).
|
2004-07-01 02:52:04 +02:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
backend_read_statsfile(void)
|
|
|
|
{
|
2011-08-10 22:45:43 +02:00
|
|
|
TimestampTz cur_ts;
|
2008-11-03 02:17:08 +01:00
|
|
|
TimestampTz min_ts;
|
|
|
|
int count;
|
2011-08-10 22:45:43 +02:00
|
|
|
int last_delay_errno = 0;
|
2008-11-03 02:17:08 +01:00
|
|
|
|
2007-02-08 00:11:30 +01:00
|
|
|
/* already read it? */
|
|
|
|
if (pgStatDBHash)
|
|
|
|
return;
|
|
|
|
Assert(!pgStatRunningInCollector);
|
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
/*
|
|
|
|
* We set the minimum acceptable timestamp to PGSTAT_STAT_INTERVAL msec
|
2009-06-11 16:49:15 +02:00
|
|
|
* before now. This indirectly ensures that the collector needn't write
|
2008-11-03 20:03:41 +01:00
|
|
|
* the file more often than PGSTAT_STAT_INTERVAL. In an autovacuum
|
|
|
|
* worker, however, we want a lower delay to avoid using stale data, so we
|
|
|
|
* use PGSTAT_RETRY_DELAY (since the number of worker is low, this
|
|
|
|
* shouldn't be a problem).
|
2008-11-03 02:17:08 +01:00
|
|
|
*
|
|
|
|
* Note that we don't recompute min_ts after sleeping; so we might end up
|
2009-06-11 16:49:15 +02:00
|
|
|
* accepting a file a bit older than PGSTAT_STAT_INTERVAL. In practice
|
2008-11-03 02:17:08 +01:00
|
|
|
* that shouldn't happen, though, as long as the sleep time is less than
|
|
|
|
* PGSTAT_STAT_INTERVAL; and we don't want to lie to the collector about
|
|
|
|
* what our cutoff time really is.
|
|
|
|
*/
|
2011-08-10 22:45:43 +02:00
|
|
|
cur_ts = GetCurrentTimestamp();
|
2008-11-03 20:03:41 +01:00
|
|
|
if (IsAutoVacuumWorkerProcess())
|
2011-08-10 22:45:43 +02:00
|
|
|
min_ts = TimestampTzPlusMilliseconds(cur_ts, -PGSTAT_RETRY_DELAY);
|
2008-11-03 20:03:41 +01:00
|
|
|
else
|
2011-08-10 22:45:43 +02:00
|
|
|
min_ts = TimestampTzPlusMilliseconds(cur_ts, -PGSTAT_STAT_INTERVAL);
|
2008-11-03 02:17:08 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Loop until fresh enough stats file is available or we ran out of time.
|
2009-06-11 16:49:15 +02:00
|
|
|
* The stats inquiry message is sent repeatedly in case collector drops
|
|
|
|
* it.
|
2008-11-03 02:17:08 +01:00
|
|
|
*/
|
|
|
|
for (count = 0; count < PGSTAT_POLL_LOOP_COUNT; count++)
|
|
|
|
{
|
2008-11-04 12:04:06 +01:00
|
|
|
TimestampTz file_ts = 0;
|
2008-11-03 02:17:08 +01:00
|
|
|
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
|
|
|
if (pgstat_read_statsfile_timestamp(false, &file_ts) &&
|
|
|
|
file_ts >= min_ts)
|
|
|
|
break;
|
|
|
|
|
2011-08-10 22:45:43 +02:00
|
|
|
/* Make debugging printouts once we've waited unreasonably long */
|
|
|
|
if (count >= PGSTAT_POLL_LOOP_COUNT/2)
|
|
|
|
{
|
|
|
|
TimestampTz now_ts = GetCurrentTimestamp();
|
|
|
|
|
|
|
|
#ifdef HAVE_INT64_TIMESTAMP
|
|
|
|
elog(WARNING, "pgstat waiting for " INT64_FORMAT " usec (%d loops), file timestamp " INT64_FORMAT " target timestamp " INT64_FORMAT " last errno %d",
|
|
|
|
now_ts - cur_ts, count,
|
|
|
|
file_ts, min_ts,
|
|
|
|
last_delay_errno);
|
|
|
|
#else
|
|
|
|
elog(WARNING, "pgstat waiting for %.6f sec (%d loops), file timestamp %.6f target timestamp %.6f last errno %d",
|
|
|
|
now_ts - cur_ts, count,
|
|
|
|
file_ts, min_ts,
|
|
|
|
last_delay_errno);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
/* Not there or too old, so kick the collector and wait a bit */
|
2011-08-10 22:45:43 +02:00
|
|
|
errno = 0;
|
2008-11-03 02:17:08 +01:00
|
|
|
pgstat_send_inquiry(min_ts);
|
|
|
|
pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
|
2011-08-10 22:45:43 +02:00
|
|
|
last_delay_errno = errno;
|
2008-11-03 02:17:08 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (count >= PGSTAT_POLL_LOOP_COUNT)
|
|
|
|
elog(WARNING, "pgstat wait timeout");
|
|
|
|
|
2007-02-16 00:23:23 +01:00
|
|
|
/* Autovacuum launcher wants stats about all databases */
|
|
|
|
if (IsAutoVacuumLauncherProcess())
|
2008-08-05 14:09:30 +02:00
|
|
|
pgStatDBHash = pgstat_read_statsfile(InvalidOid, false);
|
2005-07-14 07:13:45 +02:00
|
|
|
else
|
2008-08-05 14:09:30 +02:00
|
|
|
pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false);
|
2007-02-08 00:11:30 +01:00
|
|
|
}
|
2005-07-14 07:13:45 +02:00
|
|
|
|
2007-02-08 00:11:30 +01:00
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_setup_memcxt() -
|
|
|
|
*
|
|
|
|
* Create pgStatLocalContext, if not already done.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_setup_memcxt(void)
|
|
|
|
{
|
|
|
|
if (!pgStatLocalContext)
|
|
|
|
pgStatLocalContext = AllocSetContextCreate(TopMemoryContext,
|
|
|
|
"Statistics snapshot",
|
|
|
|
ALLOCSET_SMALL_MINSIZE,
|
|
|
|
ALLOCSET_SMALL_INITSIZE,
|
|
|
|
ALLOCSET_SMALL_MAXSIZE);
|
2004-07-01 02:52:04 +02:00
|
|
|
}
|
|
|
|
|
2007-02-08 00:11:30 +01:00
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_clear_snapshot() -
|
|
|
|
*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Discard any data collected in the current transaction. Any subsequent
|
2007-02-08 00:11:30 +01:00
|
|
|
* request will cause new snapshots to be read.
|
|
|
|
*
|
|
|
|
* This is also invoked during transaction commit or abort to discard
|
|
|
|
* the no-longer-wanted snapshot.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_clear_snapshot(void)
|
|
|
|
{
|
|
|
|
/* Release memory, if any was allocated */
|
|
|
|
if (pgStatLocalContext)
|
|
|
|
MemoryContextDelete(pgStatLocalContext);
|
|
|
|
|
|
|
|
/* Reset variables */
|
|
|
|
pgStatLocalContext = NULL;
|
|
|
|
pgStatDBHash = NULL;
|
|
|
|
localBackendStatusTable = NULL;
|
|
|
|
localNumBackends = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-11-03 02:17:08 +01:00
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_inquiry() -
|
|
|
|
*
|
|
|
|
* Process stat inquiry requests.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
|
|
|
|
{
|
|
|
|
if (msg->inquiry_time > last_statrequest)
|
|
|
|
last_statrequest = msg->inquiry_time;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_tabstat() -
|
|
|
|
*
|
|
|
|
* Count what the backend has done.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
PgStat_StatTabEntry *tabentry;
|
|
|
|
int i;
|
|
|
|
bool found;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2005-07-29 21:30:09 +02:00
|
|
|
dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2006-04-06 22:38:00 +02:00
|
|
|
* Update database-wide stats.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2001-10-25 07:50:21 +02:00
|
|
|
dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
|
|
|
|
dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Process all table entries in the message.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < msg->m_nentries; i++)
|
|
|
|
{
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
PgStat_TableEntry *tabmsg = &(msg->m_entry[i]);
|
|
|
|
|
2001-10-01 07:36:17 +02:00
|
|
|
tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
|
2010-02-26 03:01:40 +01:00
|
|
|
(void *) &(tabmsg->t_id),
|
2005-10-15 04:49:52 +02:00
|
|
|
HASH_ENTER, &found);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
if (!found)
|
|
|
|
{
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If it's a new table entry, initialize counters to the values we
|
|
|
|
* just got.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
tabentry->numscans = tabmsg->t_counts.t_numscans;
|
|
|
|
tabentry->tuples_returned = tabmsg->t_counts.t_tuples_returned;
|
|
|
|
tabentry->tuples_fetched = tabmsg->t_counts.t_tuples_fetched;
|
|
|
|
tabentry->tuples_inserted = tabmsg->t_counts.t_tuples_inserted;
|
|
|
|
tabentry->tuples_updated = tabmsg->t_counts.t_tuples_updated;
|
|
|
|
tabentry->tuples_deleted = tabmsg->t_counts.t_tuples_deleted;
|
|
|
|
tabentry->tuples_hot_updated = tabmsg->t_counts.t_tuples_hot_updated;
|
|
|
|
tabentry->n_live_tuples = tabmsg->t_counts.t_delta_live_tuples;
|
|
|
|
tabentry->n_dead_tuples = tabmsg->t_counts.t_delta_dead_tuples;
|
|
|
|
tabentry->changes_since_analyze = tabmsg->t_counts.t_changed_tuples;
|
|
|
|
tabentry->blocks_fetched = tabmsg->t_counts.t_blocks_fetched;
|
|
|
|
tabentry->blocks_hit = tabmsg->t_counts.t_blocks_hit;
|
2007-05-27 05:50:39 +02:00
|
|
|
|
2006-05-19 21:08:27 +02:00
|
|
|
tabentry->vacuum_timestamp = 0;
|
2011-03-07 17:17:06 +01:00
|
|
|
tabentry->vacuum_count = 0;
|
2006-05-19 21:08:27 +02:00
|
|
|
tabentry->autovac_vacuum_timestamp = 0;
|
2011-03-07 17:17:06 +01:00
|
|
|
tabentry->autovac_vacuum_count = 0;
|
2006-05-19 21:08:27 +02:00
|
|
|
tabentry->analyze_timestamp = 0;
|
2011-03-07 17:17:06 +01:00
|
|
|
tabentry->analyze_count = 0;
|
2006-05-19 21:08:27 +02:00
|
|
|
tabentry->autovac_analyze_timestamp = 0;
|
2011-03-07 17:17:06 +01:00
|
|
|
tabentry->autovac_analyze_count = 0;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Otherwise add the values to the existing entry.
|
|
|
|
*/
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
tabentry->numscans += tabmsg->t_counts.t_numscans;
|
|
|
|
tabentry->tuples_returned += tabmsg->t_counts.t_tuples_returned;
|
|
|
|
tabentry->tuples_fetched += tabmsg->t_counts.t_tuples_fetched;
|
|
|
|
tabentry->tuples_inserted += tabmsg->t_counts.t_tuples_inserted;
|
|
|
|
tabentry->tuples_updated += tabmsg->t_counts.t_tuples_updated;
|
|
|
|
tabentry->tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
|
|
|
|
tabentry->tuples_hot_updated += tabmsg->t_counts.t_tuples_hot_updated;
|
|
|
|
tabentry->n_live_tuples += tabmsg->t_counts.t_delta_live_tuples;
|
|
|
|
tabentry->n_dead_tuples += tabmsg->t_counts.t_delta_dead_tuples;
|
|
|
|
tabentry->changes_since_analyze += tabmsg->t_counts.t_changed_tuples;
|
|
|
|
tabentry->blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
|
|
|
|
tabentry->blocks_hit += tabmsg->t_counts.t_blocks_hit;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
/* Clamp n_live_tuples in case of negative delta_live_tuples */
|
2007-05-27 19:28:36 +02:00
|
|
|
tabentry->n_live_tuples = Max(tabentry->n_live_tuples, 0);
|
2007-09-20 19:56:33 +02:00
|
|
|
/* Likewise for n_dead_tuples */
|
|
|
|
tabentry->n_dead_tuples = Max(tabentry->n_dead_tuples, 0);
|
2007-05-27 19:28:36 +02:00
|
|
|
|
2007-03-16 18:57:36 +01:00
|
|
|
/*
|
2007-05-27 05:50:39 +02:00
|
|
|
* Add per-table stats to the per-database entry, too.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
dbentry->n_tuples_returned += tabmsg->t_counts.t_tuples_returned;
|
|
|
|
dbentry->n_tuples_fetched += tabmsg->t_counts.t_tuples_fetched;
|
|
|
|
dbentry->n_tuples_inserted += tabmsg->t_counts.t_tuples_inserted;
|
|
|
|
dbentry->n_tuples_updated += tabmsg->t_counts.t_tuples_updated;
|
|
|
|
dbentry->n_tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
|
|
|
|
dbentry->n_blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
|
|
|
|
dbentry->n_blocks_hit += tabmsg->t_counts.t_blocks_hit;
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_tabpurge() -
|
|
|
|
*
|
|
|
|
* Arrange for dead table removal.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len)
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
int i;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2005-07-29 21:30:09 +02:00
|
|
|
dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* No need to purge if we don't even know the database.
|
|
|
|
*/
|
|
|
|
if (!dbentry || !dbentry->tables)
|
|
|
|
return;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Process all table entries in the message.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < msg->m_nentries; i++)
|
|
|
|
{
|
2006-04-06 22:38:00 +02:00
|
|
|
/* Remove from hashtable if present; we don't care if it's not. */
|
|
|
|
(void) hash_search(dbentry->tables,
|
|
|
|
(void *) &(msg->m_tableid[i]),
|
|
|
|
HASH_REMOVE, NULL);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_dropdb() -
|
|
|
|
*
|
|
|
|
* Arrange for dead database removal
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
PgStat_StatDBEntry *dbentry;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Lookup the database in the hashtable.
|
|
|
|
*/
|
2005-07-29 21:30:09 +02:00
|
|
|
dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2006-04-06 22:38:00 +02:00
|
|
|
* If found, remove it.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2005-07-29 21:30:09 +02:00
|
|
|
if (dbentry)
|
2006-04-06 22:38:00 +02:00
|
|
|
{
|
|
|
|
if (dbentry->tables != NULL)
|
|
|
|
hash_destroy(dbentry->tables);
|
2008-05-15 02:17:41 +02:00
|
|
|
if (dbentry->functions != NULL)
|
|
|
|
hash_destroy(dbentry->functions);
|
2006-04-06 22:38:00 +02:00
|
|
|
|
|
|
|
if (hash_search(pgStatDBHash,
|
|
|
|
(void *) &(dbentry->databaseid),
|
|
|
|
HASH_REMOVE, NULL) == NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("database hash table corrupted "
|
|
|
|
"during cleanup --- abort")));
|
|
|
|
}
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
2005-05-11 03:41:41 +02:00
|
|
|
* pgstat_recv_resetcounter() -
|
2001-06-22 21:18:36 +02:00
|
|
|
*
|
2005-05-11 03:41:41 +02:00
|
|
|
* Reset the statistics for the specified database.
|
2001-06-22 21:18:36 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len)
|
|
|
|
{
|
2001-10-25 07:50:21 +02:00
|
|
|
HASHCTL hash_ctl;
|
|
|
|
PgStat_StatDBEntry *dbentry;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2005-07-29 21:30:09 +02:00
|
|
|
* Lookup the database in the hashtable. Nothing to do if not there.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
2005-07-29 21:30:09 +02:00
|
|
|
dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
|
|
|
|
|
|
|
|
if (!dbentry)
|
|
|
|
return;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* We simply throw away all the database's table entries by recreating a
|
|
|
|
* new hash table for them.
|
2001-06-22 21:18:36 +02:00
|
|
|
*/
|
|
|
|
if (dbentry->tables != NULL)
|
|
|
|
hash_destroy(dbentry->tables);
|
2008-05-15 02:17:41 +02:00
|
|
|
if (dbentry->functions != NULL)
|
|
|
|
hash_destroy(dbentry->functions);
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2001-10-25 07:50:21 +02:00
|
|
|
dbentry->tables = NULL;
|
2008-05-15 02:17:41 +02:00
|
|
|
dbentry->functions = NULL;
|
2010-12-12 21:09:53 +01:00
|
|
|
|
|
|
|
/*
|
2011-04-10 17:42:00 +02:00
|
|
|
* Reset database-level stats too. This should match the initialization
|
2010-12-12 21:09:53 +01:00
|
|
|
* code in pgstat_get_db_entry().
|
|
|
|
*/
|
2001-10-25 07:50:21 +02:00
|
|
|
dbentry->n_xact_commit = 0;
|
|
|
|
dbentry->n_xact_rollback = 0;
|
|
|
|
dbentry->n_blocks_fetched = 0;
|
|
|
|
dbentry->n_blocks_hit = 0;
|
2010-12-12 21:09:53 +01:00
|
|
|
dbentry->n_tuples_returned = 0;
|
|
|
|
dbentry->n_tuples_fetched = 0;
|
|
|
|
dbentry->n_tuples_inserted = 0;
|
|
|
|
dbentry->n_tuples_updated = 0;
|
|
|
|
dbentry->n_tuples_deleted = 0;
|
|
|
|
dbentry->last_autovac_time = 0;
|
2001-06-22 21:18:36 +02:00
|
|
|
|
2011-02-10 15:09:35 +01:00
|
|
|
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
|
|
|
|
|
2001-06-22 21:18:36 +02:00
|
|
|
memset(&hash_ctl, 0, sizeof(hash_ctl));
|
2001-10-25 07:50:21 +02:00
|
|
|
hash_ctl.keysize = sizeof(Oid);
|
2001-10-01 07:36:17 +02:00
|
|
|
hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
|
2005-04-14 22:32:43 +02:00
|
|
|
hash_ctl.hash = oid_hash;
|
2001-10-05 19:28:13 +02:00
|
|
|
dbentry->tables = hash_create("Per-database table",
|
|
|
|
PGSTAT_TAB_HASH_SIZE,
|
|
|
|
&hash_ctl,
|
|
|
|
HASH_ELEM | HASH_FUNCTION);
|
2008-05-15 02:17:41 +02:00
|
|
|
|
|
|
|
hash_ctl.keysize = sizeof(Oid);
|
|
|
|
hash_ctl.entrysize = sizeof(PgStat_StatFuncEntry);
|
|
|
|
hash_ctl.hash = oid_hash;
|
|
|
|
dbentry->functions = hash_create("Per-database function",
|
|
|
|
PGSTAT_FUNCTION_HASH_SIZE,
|
|
|
|
&hash_ctl,
|
|
|
|
HASH_ELEM | HASH_FUNCTION);
|
2001-06-22 21:18:36 +02:00
|
|
|
}
|
2006-06-19 03:51:22 +02:00
|
|
|
|
2010-01-19 15:11:32 +01:00
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_resetshared() -
|
|
|
|
*
|
|
|
|
* Reset some shared statistics of the cluster.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len)
|
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
if (msg->m_resettarget == RESET_BGWRITER)
|
2010-01-19 15:11:32 +01:00
|
|
|
{
|
|
|
|
/* Reset the global background writer statistics for the cluster. */
|
|
|
|
memset(&globalStats, 0, sizeof(globalStats));
|
2011-02-10 15:09:35 +01:00
|
|
|
globalStats.stat_reset_timestamp = GetCurrentTimestamp();
|
2010-01-19 15:11:32 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Presumably the sender of this message validated the target, don't
|
|
|
|
* complain here if it's not valid
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
2010-01-28 15:25:41 +01:00
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_resetsinglecounter() -
|
|
|
|
*
|
|
|
|
* Reset a statistics for a single object
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_resetsinglecounter(PgStat_MsgResetsinglecounter *msg, int len)
|
|
|
|
{
|
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
|
|
|
|
dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
|
|
|
|
|
|
|
|
if (!dbentry)
|
|
|
|
return;
|
|
|
|
|
2011-02-10 15:09:35 +01:00
|
|
|
/* Set the reset timestamp for the whole database */
|
|
|
|
dbentry->stat_reset_timestamp = GetCurrentTimestamp();
|
2010-01-28 15:25:41 +01:00
|
|
|
|
|
|
|
/* Remove object if it exists, ignore it if not */
|
|
|
|
if (msg->m_resettype == RESET_TABLE)
|
2011-03-07 17:17:06 +01:00
|
|
|
(void) hash_search(dbentry->tables, (void *) &(msg->m_objectid),
|
|
|
|
HASH_REMOVE, NULL);
|
2010-01-28 15:25:41 +01:00
|
|
|
else if (msg->m_resettype == RESET_FUNCTION)
|
2011-03-07 17:17:06 +01:00
|
|
|
(void) hash_search(dbentry->functions, (void *) &(msg->m_objectid),
|
|
|
|
HASH_REMOVE, NULL);
|
2010-01-28 15:25:41 +01:00
|
|
|
}
|
|
|
|
|
2006-06-19 03:51:22 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_autovac() -
|
|
|
|
*
|
|
|
|
* Process an autovacuum signalling message.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len)
|
|
|
|
{
|
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
|
|
|
|
/*
|
2009-09-05 00:32:33 +02:00
|
|
|
* Store the last autovacuum time in the database's hashtable entry.
|
2006-06-19 03:51:22 +02:00
|
|
|
*/
|
2009-09-05 00:32:33 +02:00
|
|
|
dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
|
2006-06-19 03:51:22 +02:00
|
|
|
|
|
|
|
dbentry->last_autovac_time = msg->m_start_time;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_vacuum() -
|
|
|
|
*
|
|
|
|
* Process a VACUUM message.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
|
|
|
|
{
|
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
PgStat_StatTabEntry *tabentry;
|
|
|
|
|
|
|
|
/*
|
2009-09-05 00:32:33 +02:00
|
|
|
* Store the data in the table's hashtable entry.
|
2006-06-19 03:51:22 +02:00
|
|
|
*/
|
2009-09-05 00:32:33 +02:00
|
|
|
dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
|
2006-06-19 03:51:22 +02:00
|
|
|
|
2009-09-05 00:32:33 +02:00
|
|
|
tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
|
2006-06-19 03:51:22 +02:00
|
|
|
|
Fix VACUUM so that it always updates pg_class.reltuples/relpages.
When we added the ability for vacuum to skip heap pages by consulting the
visibility map, we made it just not update the reltuples/relpages
statistics if it skipped any pages. But this could leave us with extremely
out-of-date stats for a table that contains any unchanging areas,
especially for TOAST tables which never get processed by ANALYZE. In
particular this could result in autovacuum making poor decisions about when
to process the table, as in recent report from Florian Helmberger. And in
general it's a bad idea to not update the stats at all. Instead, use the
previous values of reltuples/relpages as an estimate of the tuple density
in unvisited pages. This approach results in a "moving average" estimate
of reltuples, which should converge to the correct value over multiple
VACUUM and ANALYZE cycles even when individual measurements aren't very
good.
This new method for updating reltuples is used by both VACUUM and ANALYZE,
with the result that we no longer need the grotty interconnections that
caused ANALYZE to not update the stats depending on what had happened
in the parent VACUUM command.
Also, fix the logic for skipping all-visible pages during VACUUM so that it
looks ahead rather than behind to decide what to do, as per a suggestion
from Greg Stark. This eliminates useless scanning of all-visible pages at
the start of the relation or just after a not-all-visible page. In
particular, the first few pages of the relation will not be invariably
included in the scanned pages, which seems to help in not overweighting
them in the reltuples estimate.
Back-patch to 8.4, where the visibility map was introduced.
2011-05-30 23:05:26 +02:00
|
|
|
tabentry->n_live_tuples = msg->m_tuples;
|
2007-09-20 19:56:33 +02:00
|
|
|
/* Resetting dead_tuples to 0 is an approximation ... */
|
2006-06-19 03:51:22 +02:00
|
|
|
tabentry->n_dead_tuples = 0;
|
2009-06-07 00:13:52 +02:00
|
|
|
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
if (msg->m_autovacuum)
|
2010-08-21 12:59:17 +02:00
|
|
|
{
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
tabentry->autovac_vacuum_timestamp = msg->m_vacuumtime;
|
2010-08-21 12:59:17 +02:00
|
|
|
tabentry->autovac_vacuum_count++;
|
|
|
|
}
|
2006-06-27 05:45:16 +02:00
|
|
|
else
|
2010-08-21 12:59:17 +02:00
|
|
|
{
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
tabentry->vacuum_timestamp = msg->m_vacuumtime;
|
2010-08-21 12:59:17 +02:00
|
|
|
tabentry->vacuum_count++;
|
|
|
|
}
|
2006-06-19 03:51:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_analyze() -
|
|
|
|
*
|
|
|
|
* Process an ANALYZE message.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len)
|
|
|
|
{
|
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
PgStat_StatTabEntry *tabentry;
|
|
|
|
|
|
|
|
/*
|
2009-09-05 00:32:33 +02:00
|
|
|
* Store the data in the table's hashtable entry.
|
2006-06-19 03:51:22 +02:00
|
|
|
*/
|
2009-09-05 00:32:33 +02:00
|
|
|
dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
|
2006-06-19 03:51:22 +02:00
|
|
|
|
2009-09-05 00:32:33 +02:00
|
|
|
tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
|
2006-06-19 03:51:22 +02:00
|
|
|
|
Fix VACUUM so that it always updates pg_class.reltuples/relpages.
When we added the ability for vacuum to skip heap pages by consulting the
visibility map, we made it just not update the reltuples/relpages
statistics if it skipped any pages. But this could leave us with extremely
out-of-date stats for a table that contains any unchanging areas,
especially for TOAST tables which never get processed by ANALYZE. In
particular this could result in autovacuum making poor decisions about when
to process the table, as in recent report from Florian Helmberger. And in
general it's a bad idea to not update the stats at all. Instead, use the
previous values of reltuples/relpages as an estimate of the tuple density
in unvisited pages. This approach results in a "moving average" estimate
of reltuples, which should converge to the correct value over multiple
VACUUM and ANALYZE cycles even when individual measurements aren't very
good.
This new method for updating reltuples is used by both VACUUM and ANALYZE,
with the result that we no longer need the grotty interconnections that
caused ANALYZE to not update the stats depending on what had happened
in the parent VACUUM command.
Also, fix the logic for skipping all-visible pages during VACUUM so that it
looks ahead rather than behind to decide what to do, as per a suggestion
from Greg Stark. This eliminates useless scanning of all-visible pages at
the start of the relation or just after a not-all-visible page. In
particular, the first few pages of the relation will not be invariably
included in the scanned pages, which seems to help in not overweighting
them in the reltuples estimate.
Back-patch to 8.4, where the visibility map was introduced.
2011-05-30 23:05:26 +02:00
|
|
|
tabentry->n_live_tuples = msg->m_live_tuples;
|
|
|
|
tabentry->n_dead_tuples = msg->m_dead_tuples;
|
Revise pgstat's tracking of tuple changes to improve the reliability of
decisions about when to auto-analyze.
The previous code depended on n_live_tuples + n_dead_tuples - last_anl_tuples,
where all three of these numbers could be bad estimates from ANALYZE itself.
Even worse, in the presence of a steady flow of HOT updates and matching
HOT-tuple reclamations, auto-analyze might never trigger at all, even if all
three numbers are exactly right, because n_dead_tuples could hold steady.
To fix, replace last_anl_tuples with an accurately tracked count of the total
number of committed tuple inserts + updates + deletes since the last ANALYZE
on the table. This can still be compared to the same threshold as before, but
it's much more trustworthy than the old computation. Tracking this requires
one more intra-transaction counter per modified table within backends, but no
additional memory space in the stats collector. There probably isn't any
measurable speed difference; if anything it might be a bit faster than before,
since I was able to eliminate some per-tuple arithmetic operations in favor of
adding sums once per (sub)transaction.
Also, simplify the logic around pgstat vacuum and analyze reporting messages
by not trying to fold VACUUM ANALYZE into a single pgstat message.
The original thought behind this patch was to allow scheduling of analyzes
on parent tables by artificially inflating their changes_since_analyze count.
I've left that for a separate patch since this change seems to stand on its
own merit.
2009-12-30 21:32:14 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We reset changes_since_analyze to zero, forgetting any changes that
|
|
|
|
* occurred while the ANALYZE was in progress.
|
|
|
|
*/
|
|
|
|
tabentry->changes_since_analyze = 0;
|
|
|
|
|
2006-10-04 02:30:14 +02:00
|
|
|
if (msg->m_autovacuum)
|
2010-08-21 12:59:17 +02:00
|
|
|
{
|
2006-06-19 03:51:22 +02:00
|
|
|
tabentry->autovac_analyze_timestamp = msg->m_analyzetime;
|
2010-08-21 12:59:17 +02:00
|
|
|
tabentry->autovac_analyze_count++;
|
|
|
|
}
|
2006-10-04 02:30:14 +02:00
|
|
|
else
|
2010-08-21 12:59:17 +02:00
|
|
|
{
|
2006-06-19 03:51:22 +02:00
|
|
|
tabentry->analyze_timestamp = msg->m_analyzetime;
|
2010-08-21 12:59:17 +02:00
|
|
|
tabentry->analyze_count++;
|
|
|
|
}
|
2006-06-19 03:51:22 +02:00
|
|
|
}
|
2007-03-30 20:34:56 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_bgwriter() -
|
|
|
|
*
|
|
|
|
* Process a BGWRITER message.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
2007-11-15 23:25:18 +01:00
|
|
|
pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
|
2007-03-30 20:34:56 +02:00
|
|
|
{
|
|
|
|
globalStats.timed_checkpoints += msg->m_timed_checkpoints;
|
|
|
|
globalStats.requested_checkpoints += msg->m_requested_checkpoints;
|
|
|
|
globalStats.buf_written_checkpoints += msg->m_buf_written_checkpoints;
|
2007-06-28 02:02:40 +02:00
|
|
|
globalStats.buf_written_clean += msg->m_buf_written_clean;
|
|
|
|
globalStats.maxwritten_clean += msg->m_maxwritten_clean;
|
2007-09-25 22:03:38 +02:00
|
|
|
globalStats.buf_written_backend += msg->m_buf_written_backend;
|
2010-11-15 18:42:59 +01:00
|
|
|
globalStats.buf_fsync_backend += msg->m_buf_fsync_backend;
|
2007-09-25 22:03:38 +02:00
|
|
|
globalStats.buf_alloc += msg->m_buf_alloc;
|
2007-03-30 20:34:56 +02:00
|
|
|
}
|
2008-05-15 02:17:41 +02:00
|
|
|
|
2011-01-03 12:46:03 +01:00
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_recoveryconflict() -
|
|
|
|
*
|
2011-04-10 17:42:00 +02:00
|
|
|
* Process as RECOVERYCONFLICT message.
|
2011-01-03 12:46:03 +01:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len)
|
|
|
|
{
|
|
|
|
PgStat_StatDBEntry *dbentry;
|
2011-04-10 17:42:00 +02:00
|
|
|
|
2011-01-03 12:46:03 +01:00
|
|
|
dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
|
|
|
|
|
|
|
|
switch (msg->m_reason)
|
|
|
|
{
|
|
|
|
case PROCSIG_RECOVERY_CONFLICT_DATABASE:
|
2011-04-10 17:42:00 +02:00
|
|
|
|
2011-01-03 12:46:03 +01:00
|
|
|
/*
|
2011-04-10 17:42:00 +02:00
|
|
|
* Since we drop the information about the database as soon as it
|
|
|
|
* replicates, there is no point in counting these conflicts.
|
2011-01-03 12:46:03 +01:00
|
|
|
*/
|
|
|
|
break;
|
|
|
|
case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
|
|
|
|
dbentry->n_conflict_tablespace++;
|
|
|
|
break;
|
|
|
|
case PROCSIG_RECOVERY_CONFLICT_LOCK:
|
|
|
|
dbentry->n_conflict_lock++;
|
|
|
|
break;
|
|
|
|
case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
|
|
|
|
dbentry->n_conflict_snapshot++;
|
|
|
|
break;
|
|
|
|
case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
|
|
|
|
dbentry->n_conflict_bufferpin++;
|
|
|
|
break;
|
|
|
|
case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
|
|
|
|
dbentry->n_conflict_startup_deadlock++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-05-15 02:17:41 +02:00
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_funcstat() -
|
|
|
|
*
|
|
|
|
* Count what the backend has done.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len)
|
|
|
|
{
|
|
|
|
PgStat_FunctionEntry *funcmsg = &(msg->m_entry[0]);
|
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
PgStat_StatFuncEntry *funcentry;
|
|
|
|
int i;
|
|
|
|
bool found;
|
|
|
|
|
|
|
|
dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Process all function entries in the message.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < msg->m_nentries; i++, funcmsg++)
|
|
|
|
{
|
|
|
|
funcentry = (PgStat_StatFuncEntry *) hash_search(dbentry->functions,
|
2009-06-11 16:49:15 +02:00
|
|
|
(void *) &(funcmsg->f_id),
|
|
|
|
HASH_ENTER, &found);
|
2008-05-15 02:17:41 +02:00
|
|
|
|
|
|
|
if (!found)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If it's a new function entry, initialize counters to the values
|
|
|
|
* we just got.
|
|
|
|
*/
|
|
|
|
funcentry->f_numcalls = funcmsg->f_numcalls;
|
|
|
|
funcentry->f_time = funcmsg->f_time;
|
|
|
|
funcentry->f_time_self = funcmsg->f_time_self;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Otherwise add the values to the existing entry.
|
|
|
|
*/
|
|
|
|
funcentry->f_numcalls += funcmsg->f_numcalls;
|
|
|
|
funcentry->f_time += funcmsg->f_time;
|
|
|
|
funcentry->f_time_self += funcmsg->f_time_self;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_recv_funcpurge() -
|
|
|
|
*
|
|
|
|
* Arrange for dead function removal.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len)
|
|
|
|
{
|
|
|
|
PgStat_StatDBEntry *dbentry;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* No need to purge if we don't even know the database.
|
|
|
|
*/
|
|
|
|
if (!dbentry || !dbentry->functions)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Process all function entries in the message.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < msg->m_nentries; i++)
|
|
|
|
{
|
|
|
|
/* Remove from hashtable if present; we don't care if it's not. */
|
|
|
|
(void) hash_search(dbentry->functions,
|
|
|
|
(void *) &(msg->m_functionid[i]),
|
|
|
|
HASH_REMOVE, NULL);
|
|
|
|
}
|
|
|
|
}
|