Minor changes to autovacuum worker: change error handling so that it continues

with the next table on schedule instead of exiting, in all cases instead of
just on query cancel.

Add a errcontext() line indicating the activity of the worker to the error
message when it is cancelled.

Change the WorkerInfo struct to contain a pointer to the worker's PGPROC
instead of just the PID.

Add forgotten post-auth delays, per Simon Riggs.  Also to autovac launcher.
This commit is contained in:
Alvaro Herrera 2007-10-24 19:08:25 +00:00
parent c29a9c37bf
commit 2627c83561
1 changed files with 52 additions and 43 deletions

View File

@ -55,7 +55,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.61 2007/09/24 04:12:01 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.62 2007/10/24 19:08:25 alvherre Exp $
*
*-------------------------------------------------------------------------
*/
@ -182,7 +182,7 @@ typedef struct autovac_table
* wi_links entry into free list or running list
* wi_dboid OID of the database this worker is supposed to work on
* wi_tableoid OID of the table currently being vacuumed
* wi_workerpid PID of the running worker, 0 if not yet started
* wi_proc pointer to PGPROC of the running worker, NULL if not started
* wi_launchtime Time at which this worker was launched
* wi_cost_* Vacuum cost-based delay parameters current in this worker
*
@ -196,7 +196,7 @@ typedef struct WorkerInfoData
SHM_QUEUE wi_links;
Oid wi_dboid;
Oid wi_tableoid;
int wi_workerpid;
PGPROC *wi_proc;
TimestampTz wi_launchtime;
int wi_cost_delay;
int wi_cost_limit;
@ -395,6 +395,9 @@ AutoVacLauncherMain(int argc, char *argv[])
/* Identify myself via ps */
init_ps_display("autovacuum launcher process", "", "", "");
if (PostAuthDelay)
pg_usleep(PostAuthDelay * 1000000L);
SetProcessingMode(InitProcessing);
/*
@ -694,7 +697,7 @@ AutoVacLauncherMain(int argc, char *argv[])
worker = (WorkerInfo) MAKE_PTR(AutoVacuumShmem->av_startingWorker);
worker->wi_dboid = InvalidOid;
worker->wi_tableoid = InvalidOid;
worker->wi_workerpid = 0;
worker->wi_proc = NULL;
worker->wi_launchtime = 0;
worker->wi_links.next = AutoVacuumShmem->av_freeWorkers;
AutoVacuumShmem->av_freeWorkers = MAKE_OFFSET(worker);
@ -1198,7 +1201,7 @@ do_start_worker(void)
AutoVacuumShmem->av_freeWorkers = worker->wi_links.next;
worker->wi_dboid = avdb->adw_datid;
worker->wi_workerpid = 0;
worker->wi_proc = NULL;
worker->wi_launchtime = GetCurrentTimestamp();
AutoVacuumShmem->av_startingWorker = sworker;
@ -1437,6 +1440,9 @@ AutoVacWorkerMain(int argc, char *argv[])
/* Identify myself via ps */
init_ps_display("autovacuum worker process", "", "", "");
if (PostAuthDelay)
pg_usleep(PostAuthDelay * 1000000L);
SetProcessingMode(InitProcessing);
/*
@ -1542,7 +1548,7 @@ AutoVacWorkerMain(int argc, char *argv[])
{
MyWorkerInfo = (WorkerInfo) MAKE_PTR(AutoVacuumShmem->av_startingWorker);
dbid = MyWorkerInfo->wi_dboid;
MyWorkerInfo->wi_workerpid = MyProcPid;
MyWorkerInfo->wi_proc = MyProc;
/* insert into the running list */
SHMQueueInsertBefore(&AutoVacuumShmem->av_runningWorkers,
@ -1637,7 +1643,7 @@ FreeWorkerInfo(int code, Datum arg)
MyWorkerInfo->wi_links.next = AutoVacuumShmem->av_freeWorkers;
MyWorkerInfo->wi_dboid = InvalidOid;
MyWorkerInfo->wi_tableoid = InvalidOid;
MyWorkerInfo->wi_workerpid = 0;
MyWorkerInfo->wi_proc = NULL;
MyWorkerInfo->wi_launchtime = 0;
MyWorkerInfo->wi_cost_delay = 0;
MyWorkerInfo->wi_cost_limit = 0;
@ -1701,7 +1707,7 @@ autovac_balance_cost(void)
offsetof(WorkerInfoData, wi_links));
while (worker)
{
if (worker->wi_workerpid != 0 &&
if (worker->wi_proc != NULL &&
worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
cost_total +=
(double) worker->wi_cost_limit_base / worker->wi_cost_delay;
@ -1724,7 +1730,7 @@ autovac_balance_cost(void)
offsetof(WorkerInfoData, wi_links));
while (worker)
{
if (worker->wi_workerpid != 0 &&
if (worker->wi_proc != NULL &&
worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
{
int limit = (int)
@ -1737,7 +1743,7 @@ autovac_balance_cost(void)
worker->wi_cost_limit = Max(Min(limit, worker->wi_cost_limit_base), 1);
elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, cost_limit=%d, cost_delay=%d)",
worker->wi_workerpid, worker->wi_dboid,
worker->wi_proc->pid, worker->wi_dboid,
worker->wi_tableoid, worker->wi_cost_limit, worker->wi_cost_delay);
}
@ -2062,25 +2068,27 @@ next_worker:
VacuumCostDelay = tab->at_vacuum_cost_delay;
VacuumCostLimit = tab->at_vacuum_cost_limit;
/*
* Advertise my cost delay parameters for the balancing algorithm, and
* do a balance
*/
/* Last fixups before actually starting to work */
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
/* advertise my cost delay parameters for the balancing algorithm */
MyWorkerInfo->wi_cost_delay = tab->at_vacuum_cost_delay;
MyWorkerInfo->wi_cost_limit = tab->at_vacuum_cost_limit;
MyWorkerInfo->wi_cost_limit_base = tab->at_vacuum_cost_limit;
/* do a balance */
autovac_balance_cost();
/* done */
LWLockRelease(AutovacuumLock);
/* clean up memory before each iteration */
MemoryContextResetAndDeleteChildren(PortalContext);
/*
* We will abort vacuuming the current table if we are interrupted, and
* continue with the next one in schedule; but if anything else
* happens, we will do our usual error handling which is to cause the
* worker process to exit.
* We will abort vacuuming the current table if something errors out,
* and continue with the next one in schedule; in particular, this
* happens if we are interrupted with SIGINT.
*/
PG_TRY();
{
@ -2094,39 +2102,40 @@ next_worker:
}
PG_CATCH();
{
ErrorData *errdata;
MemoryContextSwitchTo(TopTransactionContext);
errdata = CopyErrorData();
/*
* If we errored out due to a cancel request, abort and restart the
* transaction and go to the next table. Otherwise rethrow the
* error so that the outermost handler deals with it.
* Abort the transaction, start a new one, and proceed with the
* next table in our list.
*/
if (errdata->sqlerrcode == ERRCODE_QUERY_CANCELED)
{
HOLD_INTERRUPTS();
elog(LOG, "cancelling autovacuum of table \"%s.%s.%s\"",
get_database_name(MyDatabaseId),
get_namespace_name(get_rel_namespace(tab->at_relid)),
get_rel_name(tab->at_relid));
AbortOutOfAnyTransaction();
FlushErrorState();
MemoryContextResetAndDeleteChildren(PortalContext);
/* restart our transaction for the following operations */
StartTransactionCommand();
RESUME_INTERRUPTS();
}
HOLD_INTERRUPTS();
if (tab->at_dovacuum)
errcontext("automatic vacuum of table \"%s.%s.%s\"",
get_database_name(MyDatabaseId),
get_namespace_name(get_rel_namespace(tab->at_relid)),
get_rel_name(tab->at_relid));
else
PG_RE_THROW();
errcontext("automatic analyze of table \"%s.%s.%s\"",
get_database_name(MyDatabaseId),
get_namespace_name(get_rel_namespace(tab->at_relid)),
get_rel_name(tab->at_relid));
EmitErrorReport();
AbortOutOfAnyTransaction();
FlushErrorState();
MemoryContextResetAndDeleteChildren(PortalContext);
/* restart our transaction for the following operations */
StartTransactionCommand();
RESUME_INTERRUPTS();
}
PG_END_TRY();
/* be tidy */
pfree(tab);
/* remove my info from shared memory */
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
MyWorkerInfo->wi_tableoid = InvalidOid;
LWLockRelease(AutovacuumLock);
}
/*