From 15739393e4c3b64b9038d75784e848a415827517 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Tue, 10 May 2016 16:23:54 -0300 Subject: [PATCH] Fix autovacuum for shared relations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The table-skipping logic in autovacuum would fail to consider that multiple workers could be processing the same shared catalog in different databases. This normally wouldn't be a problem: firstly because autovacuum workers not for wraparound would simply ignore tables in which they cannot acquire lock, and secondly because most of the time these tables are small enough that even if multiple for-wraparound workers are stuck in the same catalog, they would be over pretty quickly. But in cases where the catalogs are severely bloated it could become a problem. Backpatch all the way back, because the problem has been there since the beginning. Reported by Ondřej Světlík Discussion: https://www.postgresql.org/message-id/572B63B1.3030603%40flexibee.eu https://www.postgresql.org/message-id/572A1072.5080308%40flexibee.eu --- src/backend/postmaster/autovacuum.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index e2859df41d..6bdaac50e0 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -190,6 +190,7 @@ typedef struct autovac_table int at_vacuum_cost_delay; int at_vacuum_cost_limit; bool at_dobalance; + bool at_sharedrel; char *at_relname; char *at_nspname; char *at_datname; @@ -203,6 +204,7 @@ typedef struct autovac_table * wi_links entry into free list or running list * wi_dboid OID of the database this worker is supposed to work on * wi_tableoid OID of the table currently being vacuumed, if any + * wi_sharedrel flag indicating whether table is marked relisshared * wi_proc pointer to PGPROC of the running worker, NULL if not started * wi_launchtime Time at which this worker was launched * wi_cost_* Vacuum cost-based delay parameters current in this worker @@ -220,6 +222,7 @@ typedef struct WorkerInfoData PGPROC *wi_proc; TimestampTz wi_launchtime; bool wi_dobalance; + bool wi_sharedrel; int wi_cost_delay; int wi_cost_limit; int wi_cost_limit_base; @@ -717,6 +720,7 @@ AutoVacLauncherMain(int argc, char *argv[]) worker = AutoVacuumShmem->av_startingWorker; worker->wi_dboid = InvalidOid; worker->wi_tableoid = InvalidOid; + worker->wi_sharedrel = false; worker->wi_proc = NULL; worker->wi_launchtime = 0; dlist_push_head(&AutoVacuumShmem->av_freeWorkers, @@ -1683,6 +1687,7 @@ FreeWorkerInfo(int code, Datum arg) dlist_delete(&MyWorkerInfo->wi_links); MyWorkerInfo->wi_dboid = InvalidOid; MyWorkerInfo->wi_tableoid = InvalidOid; + MyWorkerInfo->wi_sharedrel = false; MyWorkerInfo->wi_proc = NULL; MyWorkerInfo->wi_launchtime = 0; MyWorkerInfo->wi_dobalance = false; @@ -2229,8 +2234,8 @@ do_autovacuum(void) if (worker == MyWorkerInfo) continue; - /* ignore workers in other databases */ - if (worker->wi_dboid != MyDatabaseId) + /* ignore workers in other databases (unless table is shared) */ + if (!worker->wi_sharedrel && worker->wi_dboid != MyDatabaseId) continue; if (worker->wi_tableoid == relid) @@ -2271,6 +2276,7 @@ do_autovacuum(void) * the lock so that other workers don't vacuum it concurrently. */ MyWorkerInfo->wi_tableoid = relid; + MyWorkerInfo->wi_sharedrel = tab->at_sharedrel; LWLockRelease(AutovacuumScheduleLock); /* @@ -2382,6 +2388,7 @@ deleted: */ LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); MyWorkerInfo->wi_tableoid = InvalidOid; + MyWorkerInfo->wi_sharedrel = false; LWLockRelease(AutovacuumLock); /* restore vacuum cost GUCs for the next iteration */ @@ -2577,6 +2584,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, tab = palloc(sizeof(autovac_table)); tab->at_relid = relid; + tab->at_sharedrel = classForm->relisshared; tab->at_vacoptions = VACOPT_SKIPTOAST | (dovacuum ? VACOPT_VACUUM : 0) | (doanalyze ? VACOPT_ANALYZE : 0) |