postgresql/src/backend/postmaster/autovacuum.c

770 lines
20 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* autovacuum.c
*
* PostgreSQL Integrated Autovacuum Daemon
*
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.2 2005/07/29 19:30:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <signal.h>
#include <time.h>
#include <sys/types.h>
#include <unistd.h>
#include "access/genam.h"
#include "access/heapam.h"
#include "access/xlog.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/pg_autovacuum.h"
#include "commands/vacuum.h"
#include "libpq/hba.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "postmaster/fork_process.h"
#include "postmaster/postmaster.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/sinval.h"
#include "tcop/tcopprot.h"
#include "utils/flatfiles.h"
#include "utils/fmgroids.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/relcache.h"
/*
* GUC parameters
*/
bool autovacuum_start_daemon = false;
int autovacuum_naptime;
int autovacuum_vac_thresh;
double autovacuum_vac_scale;
int autovacuum_anl_thresh;
double autovacuum_anl_scale;
/* Flag to tell if we are in the autovacuum daemon process */
static bool am_autovacuum = false;
/* Last time autovac daemon started/stopped (only valid in postmaster) */
static time_t last_autovac_start_time = 0;
static time_t last_autovac_stop_time = 0;
/* struct to keep list of candidate databases for vacuum */
typedef struct autovac_dbase
{
Oid oid;
char *name;
TransactionId frozenxid;
PgStat_StatDBEntry *entry;
int32 age;
} autovac_dbase;
#ifdef EXEC_BACKEND
static pid_t autovac_forkexec(void);
#endif
NON_EXEC_STATIC void AutoVacMain(int argc, char *argv[]);
static void do_autovacuum(bool whole_db, PgStat_StatDBEntry *dbentry);
static List *autovac_get_database_list(void);
static void test_rel_for_autovac(Oid relid, PgStat_StatTabEntry *tabentry,
Form_pg_class classForm, Form_pg_autovacuum avForm,
List **vacuum_tables, List **analyze_tables);
static void autovacuum_do_vac_analyze(List *relids, bool dovacuum);
/*
* Main entry point for autovacuum controller process.
*
* This code is heavily based on pgarch.c, q.v.
*/
int
autovac_start(void)
{
time_t curtime;
pid_t AutoVacPID;
/* Do nothing if no autovacuum process needed */
if (!AutoVacuumingActive())
return 0;
/*
* Do nothing if too soon since last autovacuum exit. This limits
* how often the daemon runs. Since the time per iteration can be
* quite variable, it seems more useful to measure/control the time
* since last subprocess exit than since last subprocess launch.
*
* However, we *also* check the time since last subprocess launch;
* this prevents thrashing under fork-failure conditions.
*
* Note that since we will be re-called from the postmaster main loop,
* we will get another chance later if we do nothing now.
*
* XXX todo: implement sleep scale factor that existed in contrib code.
*/
curtime = time(NULL);
if ((unsigned int) (curtime - last_autovac_stop_time) <
(unsigned int) autovacuum_naptime)
return 0;
if ((unsigned int) (curtime - last_autovac_start_time) <
(unsigned int) autovacuum_naptime)
return 0;
last_autovac_start_time = curtime;
#ifdef EXEC_BACKEND
switch((AutoVacPID = autovac_forkexec()))
#else
switch((AutoVacPID = fork_process()))
#endif
{
case -1:
ereport(LOG,
(errmsg("could not fork autovacuum process: %m")));
return 0;
#ifndef EXEC_BACKEND
case 0:
/* in postmaster child ... */
/* Close the postmaster's sockets */
ClosePostmasterPorts(false);
AutoVacMain(0, NULL);
break;
#endif
default:
return (int) AutoVacPID;
}
/* shouldn't get here */
return 0;
}
/*
* autovac_stopped --- called by postmaster when subprocess exit is detected
*/
void
autovac_stopped(void)
{
last_autovac_stop_time = time(NULL);
}
#ifdef EXEC_BACKEND
/*
* autovac_forkexec()
*
* Format up the arglist for the autovacuum process, then fork and exec.
*/
static pid_t
autovac_forkexec(void)
{
char *av[10];
int ac = 0;
av[ac++] = "postgres";
av[ac++] = "-forkautovac";
av[ac++] = NULL; /* filled in by postmaster_forkexec */
av[ac] = NULL;
Assert(ac < lengthof(av));
return postmaster_forkexec(ac, av);
}
#endif /* EXEC_BACKEND */
/*
* AutoVacMain
*/
NON_EXEC_STATIC void
AutoVacMain(int argc, char *argv[])
{
ListCell *cell;
List *dblist;
TransactionId nextXid;
autovac_dbase *db;
bool whole_db;
sigjmp_buf local_sigjmp_buf;
/* we are a postmaster subprocess now */
IsUnderPostmaster = true;
am_autovacuum = true;
/* reset MyProcPid */
MyProcPid = getpid();
/* Lose the postmaster's on-exit routines */
on_exit_reset();
/*
* Set up signal handlers. We operate on databases much like a
* regular backend, so we use the same signal handling. See
* equivalent code in tcop/postgres.c.
*
* Currently, we don't pay attention to postgresql.conf changes
* that happen during a single daemon iteration, so we can ignore
* SIGHUP.
*/
pqsignal(SIGHUP, SIG_IGN);
/*
* Presently, SIGINT will lead to autovacuum shutdown, because that's
* how we handle ereport(ERROR). It could be improved however.
*/
pqsignal(SIGINT, StatementCancelHandler);
pqsignal(SIGTERM, die);
pqsignal(SIGQUIT, quickdie);
pqsignal(SIGALRM, handle_sig_alarm);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, CatchupInterruptHandler);
/* We don't listen for async notifies */
pqsignal(SIGUSR2, SIG_IGN);
pqsignal(SIGCHLD, SIG_DFL);
/* Identify myself via ps */
init_ps_display("autovacuum process", "", "");
set_ps_display("");
/* Early initialization */
BaseInit();
/*
* If an exception is encountered, processing resumes here.
*
* See notes in postgres.c about the design of this coding.
*/
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
{
/* Prevents interrupts while cleaning up */
HOLD_INTERRUPTS();
/* Report the error to the server log */
EmitErrorReport();
/*
* We can now go away. Note that because we'll call InitProcess,
* a callback will be registered to do ProcKill, which will clean
* up necessary state.
*/
proc_exit(0);
}
/* We can now handle ereport(ERROR) */
PG_exception_stack = &local_sigjmp_buf;
PG_SETMASK(&UnBlockSig);
/* Get a list of databases */
dblist = autovac_get_database_list();
/*
* Get the next Xid that was current as of the last checkpoint.
* We need it to determine whether databases are about to need
* database-wide vacuums.
*/
nextXid = GetRecentNextXid();
/*
* Choose a database to connect to. We pick the database that was least
* recently auto-vacuumed, or one that needs database-wide vacuum (to
* prevent Xid wraparound-related data loss).
*
* Note that a database with no stats entry is not considered, except
* for Xid wraparound purposes. The theory is that if no one has ever
* connected to it since the stats were last initialized, it doesn't
* need vacuuming.
*
* XXX This could be improved if we had more info about whether it needs
* vacuuming before connecting to it. Perhaps look through the pgstats
* data for the database's tables? One idea is to keep track of the
* number of new and dead tuples per database in pgstats. However it
* isn't clear how to construct a metric that measures that and not
* cause starvation for less busy databases.
*/
db = NULL;
whole_db = false;
foreach(cell, dblist)
{
autovac_dbase *tmp = lfirst(cell);
bool this_whole_db;
/*
* We look for the database that most urgently needs a database-wide
* vacuum. We decide that a database-wide vacuum is needed 100000
* transactions sooner than vacuum.c's vac_truncate_clog() would
* decide to start giving warnings. If any such db is found, we
* ignore all other dbs.
*/
tmp->age = (int32) (nextXid - tmp->frozenxid);
this_whole_db = (tmp->age > (int32) ((MaxTransactionId >> 3) * 3 - 100000));
if (whole_db || this_whole_db)
{
if (!this_whole_db)
continue;
if (db == NULL || tmp->age > db->age)
{
db = tmp;
whole_db = true;
}
continue;
}
/*
* Otherwise, skip a database with no pgstat entry; it means it hasn't
* seen any activity.
*/
tmp->entry = pgstat_fetch_stat_dbentry(tmp->oid);
if (!tmp->entry)
continue;
/*
* Don't try to access a database that was dropped. This could only
* happen if we read the pg_database flat file right before it was
* modified, after the database was dropped from the pg_database
* table. (This is of course a not-very-bulletproof test, but it's
* cheap to make. If we do mistakenly choose a recently dropped
* database, InitPostgres will fail and we'll drop out until the
* next autovac run.)
*/
if (tmp->entry->destroy != 0)
continue;
/*
* Else remember the db with oldest autovac time.
*/
if (db == NULL ||
tmp->entry->last_autovac_time < db->entry->last_autovac_time)
db = tmp;
}
if (db)
{
/*
* Connect to the selected database
*/
InitPostgres(db->name, NULL);
SetProcessingMode(NormalProcessing);
pgstat_report_autovac();
set_ps_display(db->name);
ereport(LOG,
(errmsg("autovacuum: processing database \"%s\"", db->name)));
/*
* And do an appropriate amount of work on it
*/
do_autovacuum(whole_db, db->entry);
}
/* One iteration done, go away */
proc_exit(0);
}
/*
* autovac_get_database_list
*
* Return a list of all databases. Note we cannot use pg_database,
* because we aren't connected yet; we use the flat database file.
*/
static List *
autovac_get_database_list(void)
{
char *filename;
List *dblist = NIL;
char thisname[NAMEDATALEN];
FILE *db_file;
Oid db_id;
Oid db_tablespace;
TransactionId db_frozenxid;
filename = database_getflatfilename();
db_file = AllocateFile(filename, "r");
if (db_file == NULL)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", filename)));
while (read_pg_database_line(db_file, thisname, &db_id,
&db_tablespace, &db_frozenxid))
{
autovac_dbase *db;
db = (autovac_dbase *) palloc(sizeof(autovac_dbase));
db->oid = db_id;
db->name = pstrdup(thisname);
db->frozenxid = db_frozenxid;
/* these get set later: */
db->entry = NULL;
db->age = 0;
dblist = lappend(dblist, db);
}
FreeFile(db_file);
pfree(filename);
return dblist;
}
/*
* Process a database.
*
* If whole_db is true, the database is processed as a whole, and the
* dbentry parameter is ignored. If it's false, dbentry must be a valid
* pointer to the database entry in the stats databases' hash table, and
* it will be used to determine whether vacuum or analyze is needed on a
* per-table basis.
*
* Note that test_rel_for_autovac generates two separate lists, one for
* vacuum and other for analyze. This is to facilitate processing all
* analyzes first, and then all vacuums.
*
* Note that CHECK_FOR_INTERRUPTS is supposed to be used in certain spots in
* order not to ignore shutdown commands for too long.
*/
static void
do_autovacuum(bool whole_db, PgStat_StatDBEntry *dbentry)
{
Relation classRel,
avRel;
HeapTuple tuple;
HeapScanDesc relScan;
List *vacuum_tables = NIL,
*analyze_tables = NIL;
MemoryContext AutovacMemCxt;
Assert(whole_db || PointerIsValid(dbentry));
/* Memory context where cross-transaction state is stored */
AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
"Autovacuum context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
/* Start a transaction so our commands have one to play into. */
StartTransactionCommand();
/*
* StartTransactionCommand and CommitTransactionCommand will
* automatically switch to other contexts. We need this one
* to keep the list of relations to vacuum/analyze across
* transactions.
*/
MemoryContextSwitchTo(AutovacMemCxt);
if (whole_db)
{
elog(DEBUG2, "autovacuum: VACUUM ANALYZE whole database");
autovacuum_do_vac_analyze(NIL, true);
}
else
{
/* the hash entry where pgstat stores shared relations */
PgStat_StatDBEntry *shared = pgstat_fetch_stat_dbentry(InvalidOid);
classRel = heap_open(RelationRelationId, AccessShareLock);
avRel = heap_open(AutovacuumRelationId, AccessShareLock);
relScan = heap_beginscan(classRel, SnapshotNow, 0, NULL);
/* Scan pg_class looking for tables to vacuum */
while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
{
Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
Form_pg_autovacuum avForm = NULL;
PgStat_StatTabEntry *tabentry;
SysScanDesc avScan;
HeapTuple avTup;
ScanKeyData entry[1];
Oid relid;
/* Skip non-table entries. */
/* XXX possibly allow RELKIND_TOASTVALUE entries here too? */
if (classForm->relkind != RELKIND_RELATION)
continue;
/*
* Skip temp tables (i.e. those in temp namespaces). We cannot
* safely process other backends' temp tables.
*/
if (isTempNamespace(classForm->relnamespace))
continue;
relid = HeapTupleGetOid(tuple);
/* See if we have a pg_autovacuum entry for this relation. */
ScanKeyInit(&entry[0],
Anum_pg_autovacuum_vacrelid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(relid));
avScan = systable_beginscan(avRel, AutovacuumRelidIndexId, true,
SnapshotNow, 1, entry);
avTup = systable_getnext(avScan);
if (HeapTupleIsValid(avTup))
avForm = (Form_pg_autovacuum) GETSTRUCT(avTup);
if (classForm->relisshared && PointerIsValid(shared))
tabentry = hash_search(shared->tables, &relid,
HASH_FIND, NULL);
else
tabentry = hash_search(dbentry->tables, &relid,
HASH_FIND, NULL);
test_rel_for_autovac(relid, tabentry, classForm, avForm,
&vacuum_tables, &analyze_tables);
systable_endscan(avScan);
}
heap_endscan(relScan);
heap_close(avRel, AccessShareLock);
heap_close(classRel, AccessShareLock);
CHECK_FOR_INTERRUPTS();
/*
* Perform operations on collected tables.
*/
if (analyze_tables)
autovacuum_do_vac_analyze(analyze_tables, false);
CHECK_FOR_INTERRUPTS();
/* get back to proper context */
MemoryContextSwitchTo(AutovacMemCxt);
if (vacuum_tables)
autovacuum_do_vac_analyze(vacuum_tables, true);
}
/* Finally close out the last transaction. */
CommitTransactionCommand();
}
/*
* test_rel_for_autovac
*
* Check whether a table needs to be vacuumed or analyzed. Add it to the
* respective list if so.
*
* A table needs to be vacuumed if the number of dead tuples exceeds a
* threshold. This threshold is calculated as
*
* threshold = vac_base_thresh + vac_scale_factor * reltuples
*
* For analyze, the analysis done is that the number of tuples inserted,
* deleted and updated since the last analyze exceeds a threshold calculated
* in the same fashion as above. Note that the collector actually stores
* the number of tuples (both live and dead) that there were as of the last
* analyze. This is asymmetric to the VACUUM case.
*
* A table whose pg_autovacuum.enabled value is false, is automatically
* skipped. Thus autovacuum can be disabled for specific tables. Also,
* when the stats collector does not have data about a table, it will be
* skipped.
*
* A table whose vac_base_thresh value is <0 takes the base value from the
* autovacuum_vacuum_threshold GUC variable. Similarly, a vac_scale_factor
* value <0 is substituted with the value of
* autovacuum_vacuum_scale_factor GUC variable. Ditto for analyze.
*/
static void
test_rel_for_autovac(Oid relid, PgStat_StatTabEntry *tabentry,
Form_pg_class classForm,
Form_pg_autovacuum avForm,
List **vacuum_tables, List **analyze_tables)
{
Relation rel;
float4 reltuples; /* pg_class.reltuples */
/* constants from pg_autovacuum or GUC variables */
int vac_base_thresh,
anl_base_thresh;
float4 vac_scale_factor,
anl_scale_factor;
/* thresholds calculated from above constants */
float4 vacthresh,
anlthresh;
/* number of vacuum (resp. analyze) tuples at this time */
float4 vactuples,
anltuples;
/* User disabled it in pg_autovacuum? */
if (avForm && !avForm->enabled)
return;
/*
* Skip a table not found in stat hash. If it's not acted upon,
* there's no need to vacuum it. (Note that database-level check
* will take care of Xid wraparound.)
*/
if (!PointerIsValid(tabentry))
return;
rel = RelationIdGetRelation(relid);
/* The table was recently dropped? */
if (!PointerIsValid(rel))
return;
reltuples = rel->rd_rel->reltuples;
vactuples = tabentry->n_dead_tuples;
anltuples = tabentry->n_live_tuples + tabentry->n_dead_tuples -
tabentry->last_anl_tuples;
/*
* If there is a tuple in pg_autovacuum, use it; else, use the GUC
* defaults. Note that the fields may contain "-1" (or indeed any
* negative value), which means use the GUC defaults for each setting.
*/
if (avForm != NULL)
{
vac_scale_factor = (avForm->vac_scale_factor < 0) ?
autovacuum_vac_scale : avForm->vac_scale_factor;
vac_base_thresh = (avForm->vac_base_thresh < 0) ?
autovacuum_vac_thresh : avForm->vac_base_thresh;
anl_scale_factor = (avForm->anl_scale_factor < 0) ?
autovacuum_anl_scale : avForm->anl_scale_factor;
anl_base_thresh = (avForm->anl_base_thresh < 0) ?
autovacuum_anl_thresh : avForm->anl_base_thresh;
}
else
{
vac_scale_factor = autovacuum_vac_scale;
vac_base_thresh = autovacuum_vac_thresh;
anl_scale_factor = autovacuum_anl_scale;
anl_base_thresh = autovacuum_anl_thresh;
}
vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;
/*
* Note that we don't need to take special consideration for stat
* reset, because if that happens, the last vacuum and analyze counts
* will be reset too.
*/
elog(DEBUG2, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f)",
RelationGetRelationName(rel),
vactuples, vacthresh, anltuples, anlthresh);
/* Determine if this table needs vacuum or analyze. */
if (vactuples > vacthresh)
{
elog(DEBUG2, "will VACUUM ANALYZE %s",
RelationGetRelationName(rel));
*vacuum_tables = lappend_oid(*vacuum_tables, relid);
}
else if (anltuples > anlthresh)
{
/* ANALYZE refuses to work with pg_statistics */
if (relid != StatisticRelationId)
{
elog(DEBUG2, "will ANALYZE %s",
RelationGetRelationName(rel));
*analyze_tables = lappend_oid(*analyze_tables, relid);
}
}
RelationClose(rel);
}
/*
* autovacuum_do_vac_analyze
* Vacuum or analyze a list of tables; or all tables if relids = NIL
*
* We must be in AutovacMemCxt when this routine is called.
*/
static void
autovacuum_do_vac_analyze(List *relids, bool dovacuum)
{
VacuumStmt *vacstmt = makeNode(VacuumStmt);
/*
* Point QueryContext to the autovac memory context to fake out the
* PreventTransactionChain check inside vacuum(). Note that this
* is also why we palloc vacstmt instead of just using a local variable.
*/
QueryContext = CurrentMemoryContext;
/* Set up command parameters */
vacstmt->vacuum = dovacuum;
vacstmt->full = false;
vacstmt->analyze = true;
vacstmt->freeze = false;
vacstmt->verbose = false;
vacstmt->relation = NULL; /* all tables, or not used if relids != NIL */
vacstmt->va_cols = NIL;
vacuum(vacstmt, relids);
}
/*
* AutoVacuumingActive
* Check GUC vars and report whether the autovacuum process should be
* running.
*/
bool
AutoVacuumingActive(void)
{
if (!autovacuum_start_daemon || !pgstat_collect_startcollector ||
!pgstat_collect_tuplelevel)
return false;
return true;
}
/*
* autovac_init
* This is called at postmaster initialization.
*
* Annoy the user if he got it wrong.
*/
void
autovac_init(void)
{
if (!autovacuum_start_daemon)
return;
if (!pgstat_collect_startcollector || !pgstat_collect_tuplelevel)
{
ereport(WARNING,
(errmsg("autovacuum not started because of misconfiguration"),
errhint("Enable options \"stats_start_collector\" and \"stats_row_level\".")));
/*
* Set the GUC var so we don't fork autovacuum uselessly, and also to
* help debugging.
*/
autovacuum_start_daemon = false;
}
}
/*
* IsAutoVacuumProcess
* Return whether this process is an autovacuum process.
*/
bool
IsAutoVacuumProcess(void)
{
return am_autovacuum;
}