From 5a9325fdde1d8ab8031fa09ebd0f63dc3f967a0a Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 4 Sep 2023 08:04:43 +0900 Subject: [PATCH] Fix handling of shared statistics with dropped databases Dropping a database while a connection is attempted on it was able to lead to the presence of valid database entries in shared statistics. The issue is that MyDatabaseId was getting set too early than it should, as, if the connection attempted on the dropped database fails when renamed or dropped, the shutdown callback of the shared statistics would finish by re-inserting a correct entry related to the database already dropped. As analyzed by the bug reporters, this issue could lead to phantom entries in the database list maintained by the autovacuum launcher (in rebuild_database_list()) if the database dropped was part of the database list when it was still valid. After the database was dropped, it would remain the highest on the list of databases to considered by the autovacuum worker as things to process. This would prevent autovacuum jobs to happen on all the other databases still present. The commit fixes this issue by delaying setting MyDatabaseId until the database existence has been re-checked with the second scan on pg_database after getting a shared lock on it, and by switching pgstat_update_dbstats() so as nothing happens if MyDatabaseId is not valid. Issue introduced by 5891c7a8ed8f, so backpatch down to 15. Reported-by: Will Mortensen, Jacob Speidel Analyzed-by: Will Mortensen, Jacob Speidel Author: Andres Freund Discussion: https://postgr.es/m/17973-bca1f7d5c14f601e@postgresql.org Backpatch-through: 15 --- src/backend/utils/activity/pgstat_database.c | 13 ++ src/backend/utils/init/postinit.c | 118 ++++++++++--------- 2 files changed, 74 insertions(+), 57 deletions(-) diff --git a/src/backend/utils/activity/pgstat_database.c b/src/backend/utils/activity/pgstat_database.c index d9275611f0..4235fa0ccb 100644 --- a/src/backend/utils/activity/pgstat_database.c +++ b/src/backend/utils/activity/pgstat_database.c @@ -268,6 +268,13 @@ pgstat_update_dbstats(TimestampTz ts) { PgStat_StatDBEntry *dbentry; + /* + * If not connected to a database yet, don't attribute time to "shared + * state" (InvalidOid is used to track stats for shared relations, etc.). + */ + if (!OidIsValid(MyDatabaseId)) + return; + dbentry = pgstat_prep_database_pending(MyDatabaseId); /* @@ -324,6 +331,12 @@ pgstat_prep_database_pending(Oid dboid) { PgStat_EntryRef *entry_ref; + /* + * This should not report stats on database objects before having + * connected to a database. + */ + Assert(!OidIsValid(dboid) || OidIsValid(MyDatabaseId)); + entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_DATABASE, dboid, InvalidOid, NULL); diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 884cb2785f..8ab282ecfd 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -932,7 +932,7 @@ InitPostgres(const char *in_dbname, Oid dboid, */ if (bootstrap) { - MyDatabaseId = Template1DbOid; + dboid = Template1DbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else if (in_dbname != NULL) @@ -946,32 +946,9 @@ InitPostgres(const char *in_dbname, Oid dboid, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); dbform = (Form_pg_database) GETSTRUCT(tuple); - MyDatabaseId = dbform->oid; - MyDatabaseTableSpace = dbform->dattablespace; - /* take database name from the caller, just for paranoia */ - strlcpy(dbname, in_dbname, sizeof(dbname)); + dboid = dbform->oid; } - else if (OidIsValid(dboid)) - { - /* caller specified database by OID */ - HeapTuple tuple; - Form_pg_database dbform; - - tuple = GetDatabaseTupleByOid(dboid); - if (!HeapTupleIsValid(tuple)) - ereport(FATAL, - (errcode(ERRCODE_UNDEFINED_DATABASE), - errmsg("database %u does not exist", dboid))); - dbform = (Form_pg_database) GETSTRUCT(tuple); - MyDatabaseId = dbform->oid; - MyDatabaseTableSpace = dbform->dattablespace; - Assert(MyDatabaseId == dboid); - strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname)); - /* pass the database name back to the caller */ - if (out_dbname) - strcpy(out_dbname, dbname); - } - else + else if (!OidIsValid(dboid)) { /* * If this is a background worker not bound to any particular @@ -1009,8 +986,64 @@ InitPostgres(const char *in_dbname, Oid dboid, * CREATE DATABASE. */ if (!bootstrap) - LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, - RowExclusiveLock); + LockSharedObject(DatabaseRelationId, dboid, 0, RowExclusiveLock); + + /* + * Recheck pg_database to make sure the target database hasn't gone away. + * If there was a concurrent DROP DATABASE, this ensures we will die + * cleanly without creating a mess. + */ + if (!bootstrap) + { + HeapTuple tuple; + Form_pg_database datform; + + tuple = GetDatabaseTupleByOid(dboid); + if (HeapTupleIsValid(tuple)) + datform = (Form_pg_database) GETSTRUCT(tuple); + + if (!HeapTupleIsValid(tuple) || + (in_dbname && namestrcmp(&datform->datname, in_dbname))) + { + if (in_dbname) + ereport(FATAL, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database \"%s\" does not exist", in_dbname), + errdetail("It seems to have just been dropped or renamed."))); + else + ereport(FATAL, + (errcode(ERRCODE_UNDEFINED_DATABASE), + errmsg("database %u does not exist", dboid))); + } + + strlcpy(dbname, NameStr(datform->datname), sizeof(dbname)); + + if (database_is_invalid_form(datform)) + { + ereport(FATAL, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot connect to invalid database \"%s\"", dbname), + errhint("Use DROP DATABASE to drop invalid databases.")); + } + + MyDatabaseTableSpace = datform->dattablespace; + /* pass the database name back to the caller */ + if (out_dbname) + strcpy(out_dbname, dbname); + } + + /* + * Now that we rechecked, we are certain to be connected to a database and + * thus can set MyDatabaseId. + * + * It is important that MyDatabaseId only be set once we are sure that the + * target database can no longer be concurrently dropped or renamed. For + * example, without this guarantee, pgstat_update_dbstats() could create + * entries for databases that were just dropped in the pgstat shutdown + * callback, which could confuse other code paths like the autovacuum + * scheduler. + */ + MyDatabaseId = dboid; /* * Now we can mark our PGPROC entry with the database ID. @@ -1034,35 +1067,6 @@ InitPostgres(const char *in_dbname, Oid dboid, */ InvalidateCatalogSnapshot(); - /* - * Recheck pg_database to make sure the target database hasn't gone away. - * If there was a concurrent DROP DATABASE, this ensures we will die - * cleanly without creating a mess. - */ - if (!bootstrap) - { - HeapTuple tuple; - Form_pg_database datform; - - tuple = GetDatabaseTuple(dbname); - if (!HeapTupleIsValid(tuple) || - MyDatabaseId != ((Form_pg_database) GETSTRUCT(tuple))->oid || - MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace) - ereport(FATAL, - (errcode(ERRCODE_UNDEFINED_DATABASE), - errmsg("database \"%s\" does not exist", dbname), - errdetail("It seems to have just been dropped or renamed."))); - - datform = (Form_pg_database) GETSTRUCT(tuple); - if (database_is_invalid_form(datform)) - { - ereport(FATAL, - errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("cannot connect to invalid database \"%s\"", dbname), - errhint("Use DROP DATABASE to drop invalid databases.")); - } - } - /* * Now we should be able to access the database directory safely. Verify * it's there and looks reasonable.