From 3faa21bb761f52974cfc59dd9d3993f89d81f44e Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sat, 19 Feb 2022 12:27:20 -0800 Subject: [PATCH] Fix temporary object cleanup failing due to toast access without snapshot. When cleaning up temporary objects during process exit the cleanup could fail with: FATAL: cannot fetch toast data without an active snapshot The bug is caused by RemoveTempRelationsCallback() not setting up a snapshot. If an object with toasted catalog data needs to be cleaned up, init_toast_snapshot() could fail with the above error. Most of the time however the the problem is masked due to cached catalog snapshots being returned by GetOldestSnapshot(). But dropping an object can cause catalog invalidations to be emitted. If no further catalog accesses are necessary between the invalidation processing and the next toast datum deletion, the bug becomes visible. It's easy to miss this bug because it typically happens after clients disconnect and the FATAL error just ends up in the log. Luckily temporary table cleanup at the next use of the same temporary schema or during DISCARD ALL does not have the same problem. Fix the bug by pushing a snapshot in RemoveTempRelationsCallback(). Also add isolation tests for temporary object cleanup, including objects with toasted catalog data. A future HEAD only commit will add more assertions. Reported-By: Miles Delahunty Author: Andres Freund Discussion: https://postgr.es/m/CAOFAq3BU5Mf2TTvu8D9n_ZOoFAeQswuzk7yziAb7xuw_qyw5gw@mail.gmail.com Backpatch: 10- --- src/backend/catalog/namespace.c | 3 + .../expected/temp-schema-cleanup.out | 115 ++++++++++++++++++ src/test/isolation/isolation_schedule | 1 + .../isolation/specs/temp-schema-cleanup.spec | 85 +++++++++++++ 4 files changed, 204 insertions(+) create mode 100644 src/test/isolation/expected/temp-schema-cleanup.out create mode 100644 src/test/isolation/specs/temp-schema-cleanup.spec diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index 2c9cea5826..74e804e461 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -55,6 +55,7 @@ #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/varlena.h" @@ -4178,9 +4179,11 @@ RemoveTempRelationsCallback(int code, Datum arg) /* Need to ensure we have a usable transaction. */ AbortOutOfAnyTransaction(); StartTransactionCommand(); + PushActiveSnapshot(GetTransactionSnapshot()); RemoveTempRelations(myTempNamespace); + PopActiveSnapshot(); CommitTransactionCommand(); } } diff --git a/src/test/isolation/expected/temp-schema-cleanup.out b/src/test/isolation/expected/temp-schema-cleanup.out new file mode 100644 index 0000000000..35b91d9e45 --- /dev/null +++ b/src/test/isolation/expected/temp-schema-cleanup.out @@ -0,0 +1,115 @@ +Parsed test spec with 2 sessions + +starting permutation: s1_create_temp_objects s1_discard_temp s2_check_schema +step s1_create_temp_objects: + + -- create function large enough to be toasted, to ensure we correctly clean those up, a prior bug + -- https://postgr.es/m/CAOFAq3BU5Mf2TTvu8D9n_ZOoFAeQswuzk7yziAb7xuw_qyw5gw%40mail.gmail.com + SELECT exec(format($outer$ + CREATE OR REPLACE FUNCTION pg_temp.long() RETURNS text LANGUAGE sql AS $body$ SELECT %L; $body$$outer$, + (SELECT string_agg(g.i::text||':'||random()::text, '|') FROM generate_series(1, 100) g(i)))); + + -- The above bug requirs function removal to happen after a catalog + -- invalidation. dependency.c sorts objects in descending oid order so + -- that newer objects are deleted before older objects, so create a + -- table after. + CREATE TEMPORARY TABLE invalidate_catalog_cache(); + + -- test non-temp function is dropped when depending on temp table + CREATE TEMPORARY TABLE just_give_me_a_type(id serial primary key); + + CREATE FUNCTION uses_a_temp_type(just_give_me_a_type) RETURNS int LANGUAGE sql AS $$SELECT 1;$$; + +exec +---- + +(1 row) + +step s1_discard_temp: + DISCARD TEMP; + +step s2_check_schema: + SELECT oid::regclass FROM pg_class WHERE relnamespace = (SELECT oid FROM s1_temp_schema); + SELECT oid::regproc FROM pg_proc WHERE pronamespace = (SELECT oid FROM s1_temp_schema); + SELECT oid::regproc FROM pg_type WHERE typnamespace = (SELECT oid FROM s1_temp_schema); + +oid +--- +(0 rows) + +oid +--- +(0 rows) + +oid +--- +(0 rows) + + +starting permutation: s1_advisory s2_advisory s1_create_temp_objects s1_exit s2_check_schema +step s1_advisory: + SELECT pg_advisory_lock('pg_namespace'::regclass::int8); + +pg_advisory_lock +---------------- + +(1 row) + +step s2_advisory: + SELECT pg_advisory_lock('pg_namespace'::regclass::int8); + +step s1_create_temp_objects: + + -- create function large enough to be toasted, to ensure we correctly clean those up, a prior bug + -- https://postgr.es/m/CAOFAq3BU5Mf2TTvu8D9n_ZOoFAeQswuzk7yziAb7xuw_qyw5gw%40mail.gmail.com + SELECT exec(format($outer$ + CREATE OR REPLACE FUNCTION pg_temp.long() RETURNS text LANGUAGE sql AS $body$ SELECT %L; $body$$outer$, + (SELECT string_agg(g.i::text||':'||random()::text, '|') FROM generate_series(1, 100) g(i)))); + + -- The above bug requirs function removal to happen after a catalog + -- invalidation. dependency.c sorts objects in descending oid order so + -- that newer objects are deleted before older objects, so create a + -- table after. + CREATE TEMPORARY TABLE invalidate_catalog_cache(); + + -- test non-temp function is dropped when depending on temp table + CREATE TEMPORARY TABLE just_give_me_a_type(id serial primary key); + + CREATE FUNCTION uses_a_temp_type(just_give_me_a_type) RETURNS int LANGUAGE sql AS $$SELECT 1;$$; + +exec +---- + +(1 row) + +step s1_exit: + SELECT pg_terminate_backend(pg_backend_pid()); + +FATAL: terminating connection due to administrator command +server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. + +step s2_advisory: <... completed> +pg_advisory_lock +---------------- + +(1 row) + +step s2_check_schema: + SELECT oid::regclass FROM pg_class WHERE relnamespace = (SELECT oid FROM s1_temp_schema); + SELECT oid::regproc FROM pg_proc WHERE pronamespace = (SELECT oid FROM s1_temp_schema); + SELECT oid::regproc FROM pg_type WHERE typnamespace = (SELECT oid FROM s1_temp_schema); + +oid +--- +(0 rows) + +oid +--- +(0 rows) + +oid +--- +(0 rows) + diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 4a16d42366..6c6f7e554e 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -31,6 +31,7 @@ test: eval-plan-qual-trigger test: lock-update-delete test: lock-update-traversal test: inherit-temp +test: temp-schema-cleanup test: insert-conflict-do-nothing test: insert-conflict-do-nothing-2 test: insert-conflict-do-update diff --git a/src/test/isolation/specs/temp-schema-cleanup.spec b/src/test/isolation/specs/temp-schema-cleanup.spec new file mode 100644 index 0000000000..a9417b7e90 --- /dev/null +++ b/src/test/isolation/specs/temp-schema-cleanup.spec @@ -0,0 +1,85 @@ +# Test cleanup of objects in temporary schema. + +setup { + CREATE TABLE s1_temp_schema(oid oid); + -- to help create a long function + CREATE FUNCTION exec(p_foo text) RETURNS void LANGUAGE plpgsql AS $$BEGIN EXECUTE p_foo; END;$$; +} + +teardown { + DROP TABLE s1_temp_schema; + DROP FUNCTION exec(text); +} + +session "s1" +setup { + CREATE TEMPORARY TABLE just_to_create_temp_schema(); + DROP TABLE just_to_create_temp_schema; + INSERT INTO s1_temp_schema SELECT pg_my_temp_schema(); +} + +step s1_advisory { + SELECT pg_advisory_lock('pg_namespace'::regclass::int8); +} + +step s1_create_temp_objects { + + -- create function large enough to be toasted, to ensure we correctly clean those up, a prior bug + -- https://postgr.es/m/CAOFAq3BU5Mf2TTvu8D9n_ZOoFAeQswuzk7yziAb7xuw_qyw5gw%40mail.gmail.com + SELECT exec(format($outer$ + CREATE OR REPLACE FUNCTION pg_temp.long() RETURNS text LANGUAGE sql AS $body$ SELECT %L; $body$$outer$, + (SELECT string_agg(g.i::text||':'||random()::text, '|') FROM generate_series(1, 100) g(i)))); + + -- The above bug requirs function removal to happen after a catalog + -- invalidation. dependency.c sorts objects in descending oid order so + -- that newer objects are deleted before older objects, so create a + -- table after. + CREATE TEMPORARY TABLE invalidate_catalog_cache(); + + -- test non-temp function is dropped when depending on temp table + CREATE TEMPORARY TABLE just_give_me_a_type(id serial primary key); + + CREATE FUNCTION uses_a_temp_type(just_give_me_a_type) RETURNS int LANGUAGE sql AS $$SELECT 1;$$; +} + +step s1_discard_temp { + DISCARD TEMP; +} + +step s1_exit { + SELECT pg_terminate_backend(pg_backend_pid()); +} + + +session "s2" + +step s2_advisory { + SELECT pg_advisory_lock('pg_namespace'::regclass::int8); +} + +step s2_check_schema { + SELECT oid::regclass FROM pg_class WHERE relnamespace = (SELECT oid FROM s1_temp_schema); + SELECT oid::regproc FROM pg_proc WHERE pronamespace = (SELECT oid FROM s1_temp_schema); + SELECT oid::regproc FROM pg_type WHERE typnamespace = (SELECT oid FROM s1_temp_schema); +} + + +# Test temporary object cleanup during DISCARD. +permutation + s1_create_temp_objects + s1_discard_temp + s2_check_schema + +# Test temporary object cleanup during process exit. +# +# To check (in s2) if temporary objects (in s1) have properly been removed we +# need to wait for s1 to finish cleaning up. Luckily session level advisory +# locks are released only after temp table cleanup. +permutation + s1_advisory + s2_advisory + s1_create_temp_objects + s1_exit + s2_check_schema + +# Can't run further tests here, because s1's connection is dead