From 7466eeac61e2ce2af25d67d25d3ec60f0f0764da Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 4 Jan 2009 22:19:59 +0000 Subject: [PATCH] Add contrib/pg_stat_statements for server-wide tracking of statement execution statistics. Takahiro Itagaki --- contrib/Makefile | 3 +- contrib/README | 4 + contrib/pg_stat_statements/Makefile | 17 + .../pg_stat_statements/pg_stat_statements.c | 904 ++++++++++++++++++ .../pg_stat_statements.sql.in | 31 + .../uninstall_pg_stat_statements.sql | 8 + doc/src/sgml/contrib.sgml | 3 +- doc/src/sgml/filelist.sgml | 3 +- doc/src/sgml/pgstatstatements.sgml | 265 +++++ src/backend/postmaster/postmaster.c | 4 +- src/include/pgstat.h | 4 +- 11 files changed, 1239 insertions(+), 7 deletions(-) create mode 100644 contrib/pg_stat_statements/Makefile create mode 100644 contrib/pg_stat_statements/pg_stat_statements.c create mode 100644 contrib/pg_stat_statements/pg_stat_statements.sql.in create mode 100644 contrib/pg_stat_statements/uninstall_pg_stat_statements.sql create mode 100644 doc/src/sgml/pgstatstatements.sgml diff --git a/contrib/Makefile b/contrib/Makefile index bbd43e1f66..4b1d2ae95d 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -1,4 +1,4 @@ -# $PostgreSQL: pgsql/contrib/Makefile,v 1.85 2008/11/19 02:59:28 tgl Exp $ +# $PostgreSQL: pgsql/contrib/Makefile,v 1.86 2009/01/04 22:19:59 tgl Exp $ subdir = contrib top_builddir = .. @@ -27,6 +27,7 @@ WANTED_DIRS = \ pg_buffercache \ pg_freespacemap \ pg_standby \ + pg_stat_statements \ pg_trgm \ pgbench \ pgcrypto \ diff --git a/contrib/README b/contrib/README index 060853fa21..7d258d4b2f 100644 --- a/contrib/README +++ b/contrib/README @@ -112,6 +112,10 @@ pg_standby - Sample archive_command for warm standby operation by Simon Riggs +pg_stat_statements - + Track statement execution times across a whole database cluster + by Takahiro Itagaki + pg_trgm - Functions for determining the similarity of text based on trigram matching. diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile new file mode 100644 index 0000000000..ce335a656e --- /dev/null +++ b/contrib/pg_stat_statements/Makefile @@ -0,0 +1,17 @@ +# $PostgreSQL: pgsql/contrib/pg_stat_statements/Makefile,v 1.1 2009/01/04 22:19:59 tgl Exp $ + +MODULE_big = pg_stat_statements +DATA_built = pg_stat_statements.sql +DATA = uninstall_pg_stat_statements.sql +OBJS = pg_stat_statements.o + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/pg_stat_statements +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c new file mode 100644 index 0000000000..df14d0559b --- /dev/null +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -0,0 +1,904 @@ +/*------------------------------------------------------------------------- + * + * pg_stat_statements.c + * Track statement execution times across a whole database cluster. + * + * Note about locking issues: to create or delete an entry in the shared + * hashtable, one must hold pgss->lock exclusively. Modifying any field + * in an entry except the counters requires the same. To look up an entry, + * one must hold the lock shared. To read or update the counters within + * an entry, one must hold the lock shared or exclusive (so the entry doesn't + * disappear!) and also take the entry's mutex spinlock. + * + * + * Copyright (c) 2008-2009, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL: pgsql/contrib/pg_stat_statements/pg_stat_statements.c,v 1.1 2009/01/04 22:19:59 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/hash.h" +#include "catalog/pg_type.h" +#include "executor/executor.h" +#include "executor/instrument.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "storage/fd.h" +#include "storage/ipc.h" +#include "storage/spin.h" +#include "utils/builtins.h" +#include "utils/hsearch.h" +#include "utils/guc.h" + + +PG_MODULE_MAGIC; + +/* Location of stats file */ +#define PGSS_DUMP_FILE "global/pg_stat_statements.stat" + +/* This constant defines the magic number in the stats file header */ +static const uint32 PGSS_FILE_HEADER = 0x20081202; + +/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */ +#define USAGE_EXEC(duration) (1.0) +#define USAGE_INIT (1.0) /* including initial planning */ +#define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */ +#define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */ + +/* + * Hashtable key that defines the identity of a hashtable entry. The + * hash comparators do not assume that the query string is null-terminated; + * this lets us search for an mbcliplen'd string without copying it first. + * + * Presently, the query encoding is fully determined by the source database + * and so we don't really need it to be in the key. But that might not always + * be true. Anyway it's notationally convenient to pass it as part of the key. + */ +typedef struct pgssHashKey +{ + Oid userid; /* user OID */ + Oid dbid; /* database OID */ + int encoding; /* query encoding */ + int query_len; /* # of valid bytes in query string */ + const char *query_ptr; /* query string proper */ +} pgssHashKey; + +/* + * The actual stats counters kept within pgssEntry. + */ +typedef struct Counters +{ + int64 calls; /* # of times executed */ + double total_time; /* total execution time in seconds */ + int64 rows; /* total # of retrieved or affected rows */ + double usage; /* usage factor */ +} Counters; + +/* + * Statistics per statement + * + * NB: see the file read/write code before changing field order here. + */ +typedef struct pgssEntry +{ + pgssHashKey key; /* hash key of entry - MUST BE FIRST */ + Counters counters; /* the statistics for this query */ + slock_t mutex; /* protects the counters only */ + char query[1]; /* VARIABLE LENGTH ARRAY - MUST BE LAST */ + /* Note: the allocated length of query[] is actually pgss->query_size */ +} pgssEntry; + +/* + * Global shared state + */ +typedef struct pgssSharedState +{ + LWLockId lock; /* protects hashtable search/modification */ + int query_size; /* max query length in bytes */ +} pgssSharedState; + +/*---- Local variables ----*/ + +/* Current nesting depth of ExecutorRun calls */ +static int nested_level = 0; +/* Saved hook values in case of unload */ +static shmem_startup_hook_type prev_shmem_startup_hook = NULL; +static ExecutorStart_hook_type prev_ExecutorStart = NULL; +static ExecutorRun_hook_type prev_ExecutorRun = NULL; +static ExecutorEnd_hook_type prev_ExecutorEnd = NULL; +/* Links to shared memory state */ +static pgssSharedState *pgss = NULL; +static HTAB *pgss_hash = NULL; + +/*---- GUC variables ----*/ + +typedef enum +{ + PGSS_TRACK_NONE, /* track no statements */ + PGSS_TRACK_TOP, /* only top level statements */ + PGSS_TRACK_ALL, /* all statements, including nested ones */ +} PGSSTrackLevel; + +static const struct config_enum_entry track_options[] = { + {"none", PGSS_TRACK_NONE, false}, + {"top", PGSS_TRACK_TOP, false}, + {"all", PGSS_TRACK_ALL, false}, + {NULL, 0, false} +}; + +static int pgss_max; /* max # statements to track */ +static int pgss_track; /* tracking level */ +static bool pgss_save; /* whether to save stats across shutdown */ + + +#define pgss_enabled() \ + (pgss_track == PGSS_TRACK_ALL || \ + (pgss_track == PGSS_TRACK_TOP && nested_level == 0)) + +/*---- Function declarations ----*/ + +void _PG_init(void); +void _PG_fini(void); + +Datum pg_stat_statements_reset(PG_FUNCTION_ARGS); +Datum pg_stat_statements(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(pg_stat_statements_reset); +PG_FUNCTION_INFO_V1(pg_stat_statements); + +static void pgss_shmem_startup(void); +static void pgss_shmem_shutdown(int code, Datum arg); +static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags); +static void pgss_ExecutorRun(QueryDesc *queryDesc, + ScanDirection direction, + long count); +static void pgss_ExecutorEnd(QueryDesc *queryDesc); +static uint32 pgss_hash_fn(const void *key, Size keysize); +static int pgss_match_fn(const void *key1, const void *key2, Size keysize); +static void pgss_store(const char *query, + const Instrumentation *instr, uint32 rows); +static Size pgss_memsize(void); +static pgssEntry *entry_alloc(pgssHashKey *key); +static void entry_dealloc(void); +static void entry_reset(void); + + +/* + * Module load callback + */ +void +_PG_init(void) +{ + /* + * In order to create our shared memory area, we have to be loaded via + * shared_preload_libraries. If not, fall out without hooking into + * any of the main system. (We don't throw error here because it seems + * useful to allow the pg_stat_statements functions to be created even + * when the module isn't active. The functions must protect themselves + * against being called then, however.) + */ + if (!process_shared_preload_libraries_in_progress) + return; + + /* + * Define (or redefine) custom GUC variables. + */ + DefineCustomIntVariable("pg_stat_statements.max", + "Sets the maximum number of statements tracked by pg_stat_statements.", + NULL, + &pgss_max, + 1000, + 100, + INT_MAX, + PGC_POSTMASTER, + 0, + NULL, + NULL); + + DefineCustomEnumVariable("pg_stat_statements.track", + "Selects which statements are tracked by pg_stat_statements.", + NULL, + &pgss_track, + PGSS_TRACK_TOP, + track_options, + PGC_SUSET, + 0, + NULL, + NULL); + + DefineCustomBoolVariable("pg_stat_statements.save", + "Save pg_stat_statements statistics across server shutdowns.", + NULL, + &pgss_save, + true, + PGC_SIGHUP, + 0, + NULL, + NULL); + + /* + * Request additional shared resources. (These are no-ops if we're not in + * the postmaster process.) We'll allocate or attach to the shared + * resources in pgss_shmem_startup(). + */ + RequestAddinShmemSpace(pgss_memsize()); + RequestAddinLWLocks(1); + + /* + * Install hooks. + */ + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = pgss_shmem_startup; + prev_ExecutorStart = ExecutorStart_hook; + ExecutorStart_hook = pgss_ExecutorStart; + prev_ExecutorRun = ExecutorRun_hook; + ExecutorRun_hook = pgss_ExecutorRun; + prev_ExecutorEnd = ExecutorEnd_hook; + ExecutorEnd_hook = pgss_ExecutorEnd; +} + +/* + * Module unload callback + */ +void +_PG_fini(void) +{ + /* Uninstall hooks. */ + ExecutorStart_hook = prev_ExecutorStart; + ExecutorRun_hook = prev_ExecutorRun; + ExecutorEnd_hook = prev_ExecutorEnd; + shmem_startup_hook = prev_shmem_startup_hook; +} + +/* + * shmem_startup hook: allocate or attach to shared memory, + * then load any pre-existing statistics from file. + */ +static void +pgss_shmem_startup(void) +{ + bool found; + HASHCTL info; + FILE *file; + uint32 header; + int32 num; + int32 i; + int query_size; + int buffer_size; + char *buffer = NULL; + + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + + /* reset in case this is a restart within the postmaster */ + pgss = NULL; + pgss_hash = NULL; + + /* + * Create or attach to the shared memory state, including hash table + */ + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + + pgss = ShmemInitStruct("pg_stat_statements", + sizeof(pgssSharedState), + &found); + if (!pgss) + elog(ERROR, "out of shared memory"); + + if (!found) + { + /* First time through ... */ + pgss->lock = LWLockAssign(); + pgss->query_size = pgstat_track_activity_query_size; + } + + /* Be sure everyone agrees on the hash table entry size */ + query_size = pgss->query_size; + + memset(&info, 0, sizeof(info)); + info.keysize = sizeof(pgssHashKey); + info.entrysize = offsetof(pgssEntry, query) + query_size; + info.hash = pgss_hash_fn; + info.match = pgss_match_fn; + pgss_hash = ShmemInitHash("pg_stat_statements hash", + pgss_max, pgss_max, + &info, + HASH_ELEM | HASH_FUNCTION | HASH_COMPARE); + if (!pgss_hash) + elog(ERROR, "out of shared memory"); + + LWLockRelease(AddinShmemInitLock); + + /* + * If we're in the postmaster (or a standalone backend...), set up a + * shmem exit hook to dump the statistics to disk. + */ + if (!IsUnderPostmaster) + on_shmem_exit(pgss_shmem_shutdown, (Datum) 0); + + /* + * Attempt to load old statistics from the dump file. + * + * Note: we don't bother with locks here, because there should be no + * other processes running when this is called. + */ + if (!pgss_save) + return; + + file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R); + if (file == NULL) + { + if (errno == ENOENT) + return; /* ignore not-found error */ + goto error; + } + + buffer_size = query_size; + buffer = (char *) palloc(buffer_size); + + if (fread(&header, sizeof(uint32), 1, file) != 1 || + header != PGSS_FILE_HEADER || + fread(&num, sizeof(int32), 1, file) != 1) + goto error; + + for (i = 0; i < num; i++) + { + pgssEntry temp; + pgssEntry *entry; + + if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1) + goto error; + + /* Encoding is the only field we can easily sanity-check */ + if (!PG_VALID_BE_ENCODING(temp.key.encoding)) + goto error; + + /* Previous incarnation might have had a larger query_size */ + if (temp.key.query_len >= buffer_size) + { + buffer = (char *) repalloc(buffer, temp.key.query_len + 1); + buffer_size = temp.key.query_len + 1; + } + + if (fread(buffer, 1, temp.key.query_len, file) != temp.key.query_len) + goto error; + buffer[temp.key.query_len] = '\0'; + + /* Clip to available length if needed */ + if (temp.key.query_len >= query_size) + temp.key.query_len = pg_encoding_mbcliplen(temp.key.encoding, + buffer, + temp.key.query_len, + query_size - 1); + temp.key.query_ptr = buffer; + + /* make the hashtable entry (discards old entries if too many) */ + entry = entry_alloc(&temp.key); + + /* copy in the actual stats */ + entry->counters = temp.counters; + } + + pfree(buffer); + FreeFile(file); + return; + +error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read pg_stat_statement file \"%s\": %m", + PGSS_DUMP_FILE))); + if (buffer) + pfree(buffer); + if (file) + FreeFile(file); + /* If possible, throw away the bogus file; ignore any error */ + unlink(PGSS_DUMP_FILE); +} + +/* + * shmem_shutdown hook: Dump statistics into file. + * + * Note: we don't bother with acquiring lock, because there should be no + * other processes running when this is called. + */ +static void +pgss_shmem_shutdown(int code, Datum arg) +{ + FILE *file; + HASH_SEQ_STATUS hash_seq; + int32 num_entries; + pgssEntry *entry; + + /* Don't try to dump during a crash. */ + if (code) + return; + + /* Safety check ... shouldn't get here unless shmem is set up. */ + if (!pgss || !pgss_hash) + return; + + /* Don't dump if told not to. */ + if (!pgss_save) + return; + + file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_W); + if (file == NULL) + goto error; + + if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1) + goto error; + num_entries = hash_get_num_entries(pgss_hash); + if (fwrite(&num_entries, sizeof(int32), 1, file) != 1) + goto error; + + hash_seq_init(&hash_seq, pgss_hash); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + int len = entry->key.query_len; + + if (fwrite(entry, offsetof(pgssEntry, mutex), 1, file) != 1 || + fwrite(entry->query, 1, len, file) != len) + goto error; + } + + if (FreeFile(file)) + { + file = NULL; + goto error; + } + + return; + +error: + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write pg_stat_statement file \"%s\": %m", + PGSS_DUMP_FILE))); + if (file) + FreeFile(file); + unlink(PGSS_DUMP_FILE); +} + +/* + * ExecutorStart hook: start up tracking if needed + */ +static void +pgss_ExecutorStart(QueryDesc *queryDesc, int eflags) +{ + if (prev_ExecutorStart) + prev_ExecutorStart(queryDesc, eflags); + else + standard_ExecutorStart(queryDesc, eflags); + + if (pgss_enabled()) + { + /* + * Set up to track total elapsed time in ExecutorRun. Make sure + * the space is allocated in the per-query context so it will go + * away at ExecutorEnd. + */ + if (queryDesc->totaltime == NULL) + { + MemoryContext oldcxt; + + oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); + queryDesc->totaltime = InstrAlloc(1); + MemoryContextSwitchTo(oldcxt); + } + } +} + +/* + * ExecutorRun hook: all we need do is track nesting depth + */ +static void +pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count) +{ + nested_level++; + PG_TRY(); + { + if (prev_ExecutorRun) + prev_ExecutorRun(queryDesc, direction, count); + else + standard_ExecutorRun(queryDesc, direction, count); + nested_level--; + } + PG_CATCH(); + { + nested_level--; + PG_RE_THROW(); + } + PG_END_TRY(); +} + +/* + * ExecutorEnd hook: store results if needed + */ +static void +pgss_ExecutorEnd(QueryDesc *queryDesc) +{ + if (queryDesc->totaltime && pgss_enabled()) + { + /* + * Make sure stats accumulation is done. (Note: it's okay if + * several levels of hook all do this.) + */ + InstrEndLoop(queryDesc->totaltime); + + pgss_store(queryDesc->sourceText, + queryDesc->totaltime, + queryDesc->estate->es_processed); + } + + if (prev_ExecutorEnd) + prev_ExecutorEnd(queryDesc); + else + standard_ExecutorEnd(queryDesc); +} + +/* + * Calculate hash value for a key + */ +static uint32 +pgss_hash_fn(const void *key, Size keysize) +{ + const pgssHashKey *k = (const pgssHashKey *) key; + + /* we don't bother to include encoding in the hash */ + return hash_uint32((uint32) k->userid) ^ + hash_uint32((uint32) k->dbid) ^ + DatumGetUInt32(hash_any((const unsigned char *) k->query_ptr, + k->query_len)); +} + +/* + * Compare two keys - zero means match + */ +static int +pgss_match_fn(const void *key1, const void *key2, Size keysize) +{ + const pgssHashKey *k1 = (const pgssHashKey *) key1; + const pgssHashKey *k2 = (const pgssHashKey *) key2; + + if (k1->userid == k2->userid && + k1->dbid == k2->dbid && + k1->encoding == k2->encoding && + k1->query_len == k2->query_len && + memcmp(k1->query_ptr, k2->query_ptr, k1->query_len) == 0) + return 0; + else + return 1; +} + +/* + * Store some statistics for a statement. + */ +static void +pgss_store(const char *query, const Instrumentation *instr, uint32 rows) +{ + pgssHashKey key; + double usage; + pgssEntry *entry; + + Assert(query != NULL); + + /* Safety check... */ + if (!pgss || !pgss_hash) + return; + + /* Set up key for hashtable search */ + key.userid = GetUserId(); + key.dbid = MyDatabaseId; + key.encoding = GetDatabaseEncoding(); + key.query_len = strlen(query); + if (key.query_len >= pgss->query_size) + key.query_len = pg_encoding_mbcliplen(key.encoding, + query, + key.query_len, + pgss->query_size - 1); + key.query_ptr = query; + + usage = USAGE_EXEC(duration); + + /* Lookup the hash table entry with shared lock. */ + LWLockAcquire(pgss->lock, LW_SHARED); + + entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL); + if (!entry) + { + /* Must acquire exclusive lock to add a new entry. */ + LWLockRelease(pgss->lock); + LWLockAcquire(pgss->lock, LW_EXCLUSIVE); + entry = entry_alloc(&key); + } + + /* Grab the spinlock while updating the counters. */ + { + volatile pgssEntry *e = (volatile pgssEntry *) entry; + + SpinLockAcquire(&e->mutex); + e->counters.calls += 1; + e->counters.total_time += instr->total; + e->counters.rows += rows; + e->counters.usage += usage; + SpinLockRelease(&e->mutex); + } + + LWLockRelease(pgss->lock); +} + +/* + * Reset all statement statistics. + */ +Datum +pg_stat_statements_reset(PG_FUNCTION_ARGS) +{ + if (!pgss || !pgss_hash) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("pg_stat_statements must be loaded via shared_preload_libraries"))); + entry_reset(); + PG_RETURN_VOID(); +} + +#define PG_STAT_STATEMENTS_COLS 6 + +/* + * Retrieve statement statistics. + */ +Datum +pg_stat_statements(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Oid userid = GetUserId(); + bool is_superuser = superuser(); + HASH_SEQ_STATUS hash_seq; + pgssEntry *entry; + + if (!pgss || !pgss_hash) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("pg_stat_statements must be loaded via shared_preload_libraries"))); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " \ + "allowed in this context"))); + + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + tupdesc = CreateTemplateTupleDesc(PG_STAT_STATEMENTS_COLS, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "userid", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "dbid", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "query", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "calls", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "total_time", + FLOAT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "rows", + INT8OID, -1, 0); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + LWLockAcquire(pgss->lock, LW_SHARED); + + hash_seq_init(&hash_seq, pgss_hash); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + Datum values[PG_STAT_STATEMENTS_COLS]; + bool nulls[PG_STAT_STATEMENTS_COLS]; + int i = 0; + Counters tmp; + + /* generate junk in short-term context */ + MemoryContextSwitchTo(oldcontext); + + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + + values[i++] = ObjectIdGetDatum(entry->key.userid); + values[i++] = ObjectIdGetDatum(entry->key.dbid); + + if (is_superuser || entry->key.userid == userid) + { + char *qstr; + + qstr = (char *) + pg_do_encoding_conversion((unsigned char *) entry->query, + entry->key.query_len, + entry->key.encoding, + GetDatabaseEncoding()); + values[i++] = CStringGetTextDatum(qstr); + if (qstr != entry->query) + pfree(qstr); + } + else + values[i++] = CStringGetTextDatum(""); + + /* copy counters to a local variable to keep locking time short */ + { + volatile pgssEntry *e = (volatile pgssEntry *) entry; + + SpinLockAcquire(&e->mutex); + tmp = e->counters; + SpinLockRelease(&e->mutex); + } + + values[i++] = Int64GetDatumFast(tmp.calls); + values[i++] = Float8GetDatumFast(tmp.total_time); + values[i++] = Int64GetDatumFast(tmp.rows); + + Assert(i == PG_STAT_STATEMENTS_COLS); + + /* switch to appropriate context while storing the tuple */ + MemoryContextSwitchTo(per_query_ctx); + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + LWLockRelease(pgss->lock); + + /* clean up and return the tuplestore */ + tuplestore_donestoring(tupstore); + + MemoryContextSwitchTo(oldcontext); + + return (Datum) 0; +} + +/* + * Estimate shared memory space needed. + */ +static Size +pgss_memsize(void) +{ + Size size; + Size entrysize; + + size = MAXALIGN(sizeof(pgssSharedState)); + entrysize = offsetof(pgssEntry, query) + pgstat_track_activity_query_size; + size = add_size(size, hash_estimate_size(pgss_max, entrysize)); + + return size; +} + +/* + * Allocate a new hashtable entry. + * caller must hold an exclusive lock on pgss->lock + * + * Note: despite needing exclusive lock, it's not an error for the target + * entry to already exist. This is because pgss_store releases and + * reacquires lock after failing to find a match; so someone else could + * have made the entry while we waited to get exclusive lock. + */ +static pgssEntry * +entry_alloc(pgssHashKey *key) +{ + pgssEntry *entry; + bool found; + + /* Caller must have clipped query properly */ + Assert(key->query_len < pgss->query_size); + + /* Make space if needed */ + while (hash_get_num_entries(pgss_hash) >= pgss_max) + entry_dealloc(); + + /* Find or create an entry with desired hash code */ + entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found); + + if (!found) + { + /* New entry, initialize it */ + + /* dynahash tried to copy the key for us, but must fix query_ptr */ + entry->key.query_ptr = entry->query; + /* reset the statistics */ + memset(&entry->counters, 0, sizeof(Counters)); + entry->counters.usage = USAGE_INIT; + /* re-initialize the mutex each time ... we assume no one using it */ + SpinLockInit(&entry->mutex); + /* ... and don't forget the query text */ + memcpy(entry->query, key->query_ptr, key->query_len); + entry->query[key->query_len] = '\0'; + } + + return entry; +} + +/* + * qsort comparator for sorting into increasing usage order + */ +static int +entry_cmp(const void *lhs, const void *rhs) +{ + double l_usage = (*(const pgssEntry **)lhs)->counters.usage; + double r_usage = (*(const pgssEntry **)rhs)->counters.usage; + + if (l_usage < r_usage) + return -1; + else if (l_usage > r_usage) + return +1; + else + return 0; +} + +/* + * Deallocate least used entries. + * Caller must hold an exclusive lock on pgss->lock. + */ +static void +entry_dealloc(void) +{ + HASH_SEQ_STATUS hash_seq; + pgssEntry **entries; + pgssEntry *entry; + int nvictims; + int i; + + /* Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them. */ + + entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *)); + + i = 0; + hash_seq_init(&hash_seq, pgss_hash); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + entries[i++] = entry; + entry->counters.usage *= USAGE_DECREASE_FACTOR; + } + + qsort(entries, i, sizeof(pgssEntry *), entry_cmp); + nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100); + nvictims = Min(nvictims, i); + + for (i = 0; i < nvictims; i++) + { + hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL); + } + + pfree(entries); +} + +/* + * Release all entries. + */ +static void +entry_reset(void) +{ + HASH_SEQ_STATUS hash_seq; + pgssEntry *entry; + + LWLockAcquire(pgss->lock, LW_EXCLUSIVE); + + hash_seq_init(&hash_seq, pgss_hash); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL); + } + + LWLockRelease(pgss->lock); +} diff --git a/contrib/pg_stat_statements/pg_stat_statements.sql.in b/contrib/pg_stat_statements/pg_stat_statements.sql.in new file mode 100644 index 0000000000..7655136ed9 --- /dev/null +++ b/contrib/pg_stat_statements/pg_stat_statements.sql.in @@ -0,0 +1,31 @@ +/* $PostgreSQL: pgsql/contrib/pg_stat_statements/pg_stat_statements.sql.in,v 1.1 2009/01/04 22:19:59 tgl Exp $ */ + +-- Adjust this setting to control where the objects get created. +SET search_path = public; + +-- Register functions. +CREATE FUNCTION pg_stat_statements_reset() +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C; + +CREATE FUNCTION pg_stat_statements( + OUT userid oid, + OUT dbid oid, + OUT query text, + OUT calls int8, + OUT total_time float8, + OUT rows int8 +) +RETURNS SETOF record +AS 'MODULE_PATHNAME' +LANGUAGE C; + +-- Register a view on the function for ease of use. +CREATE VIEW pg_stat_statements AS + SELECT * FROM pg_stat_statements(); + +GRANT SELECT ON pg_stat_statements TO PUBLIC; + +-- Don't want this to be available to non-superusers. +REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC; diff --git a/contrib/pg_stat_statements/uninstall_pg_stat_statements.sql b/contrib/pg_stat_statements/uninstall_pg_stat_statements.sql new file mode 100644 index 0000000000..31fd0af39d --- /dev/null +++ b/contrib/pg_stat_statements/uninstall_pg_stat_statements.sql @@ -0,0 +1,8 @@ +/* $PostgreSQL: pgsql/contrib/pg_stat_statements/uninstall_pg_stat_statements.sql,v 1.1 2009/01/04 22:19:59 tgl Exp $ */ + +-- Adjust this setting to control where the objects get dropped. +SET search_path = public; + +DROP VIEW pg_stat_statements; +DROP FUNCTION pg_stat_statements(); +DROP FUNCTION pg_stat_statements_reset(); diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index ecc5a0b23e..89fb5314fe 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -1,4 +1,4 @@ - + Additional Supplied Modules @@ -103,6 +103,7 @@ psql -d dbname -f SHAREDIR/contrib/module.sql &pgfreespacemap; &pgrowlocks; &pgstandby; + &pgstatstatements; &pgstattuple; &pgtrgm; &seg; diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index ea1c7c274f..273d5a0979 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -1,4 +1,4 @@ - + @@ -116,6 +116,7 @@ + diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml new file mode 100644 index 0000000000..93cda9f406 --- /dev/null +++ b/doc/src/sgml/pgstatstatements.sgml @@ -0,0 +1,265 @@ + + + + pg_stat_statements + + + pg_stat_statements + + + + The pg_stat_statements module provides a means for + tracking execution statistics of all SQL statements executed by a server. + + + + The module must be loaded by adding pg_stat_statements to + in + postgresql.conf, because it requires additional shared memory. + This means that a server restart is needed to add or remove the module. + + + + The <structname>pg_stat_statements</structname> view + + + The statistics gathered by the module are made available via a system view + named pg_stat_statements. This view contains one row for + each distinct query text, database ID, and user ID (up to the maximum + number of distinct statements that the module can track). The columns + of the view are: + + + + <structname>pg_stat_statements</> columns + + + + + Name + Type + References + Description + + + + + userid + oid + pg_authid.oid + OID of user who executed the statement + + + + dbid + oid + pg_database.oid + OID of database in which the statement was executed + + + + query + text + + Text of the statement (up to bytes) + + + + calls + bigint + + Number of times executed + + + + total_time + double precision + + Total time spent in the statement, in seconds + + + + rows + bigint + + Total number of rows retrieved or affected by the statement + + + + +
+ + + This view, and the function pg_stat_statements_reset, + are available only in databases they have been specifically installed into + by running the pg_stat_statements.sql install script. + However, statistics are tracked across all databases of the server + whenever the pg_stat_statements module is loaded + into the server, regardless of presence of the view. + + + + For security reasons, non-superusers are not allowed to see the text of + queries executed by other users. They can see the statistics, however, + if the view has been installed in their database. + + + + Note that statements are considered the same if they have the same text, + regardless of the values of any out-of-line parameters used in the + statement. Using out-of-line parameters will help to group statements + together and may make the statistics more useful. + +
+ + + Functions + + + + + pg_stat_statements_reset() returns void + + + + + pg_stat_statements_reset discards all statistics + gathered so far by pg_stat_statements. + By default, this function can only be executed by superusers. + + + + + + + + + Configuration parameters + + + + + pg_stat_statements.max (integer) + + + + + pg_stat_statements.max is the maximum number of + statements tracked by the module (i.e., the maximum number of rows + in the pg_stat_statements view). If more distinct + statements than that are observed, information about the least-executed + statements is discarded. + The default value is 1000. + This parameter can only be set at server start. + + + + + + + pg_stat_statements.track (enum) + + + + + pg_stat_statements.track controls which statements + are counted by the module. + Specify top to track top-level statements (those issued + directly by clients), all to also track nested statements + (such as statements invoked within functions), or none to + disable. + The default value is top. + Only superusers can change this setting. + + + + + + + pg_stat_statements.save (boolean) + + + + + pg_stat_statements.save specifies whether to + save statement statistics across server shutdowns. + If it is off then statistics are not saved at + shutdown nor reloaded at server start. + The default value is on. + This parameter can only be set in the postgresql.conf + file or on the server command line. + + + + + + + The module requires additional shared memory amounting to about + pg_stat_statements.max * + bytes. Note that this + memory is consumed whenever the module is loaded, even if + pg_stat_statements.track is set to none. + + + + In order to set any of these parameters in your + postgresql.conf file, + you will need to add pg_stat_statements to + . Typical usage might be: + + + +# postgresql.conf +shared_preload_libraries = 'pg_stat_statements' + +custom_variable_classes = 'pg_stat_statements' +pg_stat_statements.max = 10000 +pg_stat_statements.track = all + + + + + Sample output + + +$ pgbench -i bench + +postgres=# SELECT pg_stat_statements_reset(); + +$ pgbench -c10 -t300 -M prepared bench + +postgres=# \x +postgres=# SELECT * FROM pg_stat_statements ORDER BY total_time DESC LIMIT 3; +-[ RECORD 1 ]------------------------------------------------------------ +userid | 10 +dbid | 63781 +query | UPDATE branches SET bbalance = bbalance + $1 WHERE bid = $2; +calls | 3000 +total_time | 20.716706 +rows | 3000 +-[ RECORD 2 ]------------------------------------------------------------ +userid | 10 +dbid | 63781 +query | UPDATE tellers SET tbalance = tbalance + $1 WHERE tid = $2; +calls | 3000 +total_time | 17.1107649999999 +rows | 3000 +-[ RECORD 3 ]------------------------------------------------------------ +userid | 10 +dbid | 63781 +query | UPDATE accounts SET abalance = abalance + $1 WHERE aid = $2; +calls | 3000 +total_time | 0.645601 +rows | 3000 + + + + + Author + + + Takahiro Itagaki itagaki.takahiro@oss.ntt.co.jp + + + +
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 22bc5b1f3f..0d0d23f53a 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.569 2009/01/03 17:08:38 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.570 2009/01/04 22:19:59 tgl Exp $ * * NOTES * @@ -2731,7 +2731,7 @@ PostmasterStateMachine(void) ereport(LOG, (errmsg("all server processes terminated; reinitializing"))); - shmem_exit(0); + shmem_exit(1); reset_shared(PostPortNumber); StartupPID = StartupDataBase(); diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 5bb4cc597a..169821b79e 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -5,7 +5,7 @@ * * Copyright (c) 2001-2009, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.81 2009/01/01 17:23:55 momjian Exp $ + * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.82 2009/01/04 22:19:59 tgl Exp $ * ---------- */ #ifndef PGSTAT_H @@ -592,7 +592,7 @@ typedef struct PgStat_FunctionCallUsage extern bool pgstat_track_activities; extern bool pgstat_track_counts; extern int pgstat_track_functions; -extern int pgstat_track_activity_query_size; +extern PGDLLIMPORT int pgstat_track_activity_query_size; extern char *pgstat_stat_tmpname; extern char *pgstat_stat_filename;