Fix temporary object cleanup failing due to toast access without snapshot.

When cleaning up temporary objects during process exit the cleanup could fail
with:
  FATAL: cannot fetch toast data without an active snapshot

The bug is caused by RemoveTempRelationsCallback() not setting up a
snapshot. If an object with toasted catalog data needs to be cleaned up,
init_toast_snapshot() could fail with the above error.

Most of the time however the the problem is masked due to cached catalog
snapshots being returned by GetOldestSnapshot(). But dropping an object can
cause catalog invalidations to be emitted. If no further catalog accesses are
necessary between the invalidation processing and the next toast datum
deletion, the bug becomes visible.

It's easy to miss this bug because it typically happens after clients
disconnect and the FATAL error just ends up in the log.

Luckily temporary table cleanup at the next use of the same temporary schema
or during DISCARD ALL does not have the same problem.

Fix the bug by pushing a snapshot in RemoveTempRelationsCallback(). Also add
isolation tests for temporary object cleanup, including objects with toasted
catalog data.

A future HEAD only commit will add an assertion trying to make this more
visible.

Reported-By: Miles Delahunty
Author: Andres Freund
Discussion: https://postgr.es/m/CAOFAq3BU5Mf2TTvu8D9n_ZOoFAeQswuzk7yziAb7xuw_qyw5gw@mail.gmail.com
Backpatch: 10-
This commit is contained in:
Andres Freund 2022-02-21 08:57:34 -08:00
parent 27b02e070f
commit 7c38ef2a5d
4 changed files with 204 additions and 0 deletions

View File

@ -55,6 +55,7 @@
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/varlena.h"
@ -4292,9 +4293,11 @@ RemoveTempRelationsCallback(int code, Datum arg)
/* Need to ensure we have a usable transaction. */
AbortOutOfAnyTransaction();
StartTransactionCommand();
PushActiveSnapshot(GetTransactionSnapshot());
RemoveTempRelations(myTempNamespace);
PopActiveSnapshot();
CommitTransactionCommand();
}
}

View File

@ -0,0 +1,115 @@
Parsed test spec with 2 sessions
starting permutation: s1_create_temp_objects s1_discard_temp s2_check_schema
step s1_create_temp_objects:
-- create function large enough to be toasted, to ensure we correctly clean those up, a prior bug
-- https://postgr.es/m/CAOFAq3BU5Mf2TTvu8D9n_ZOoFAeQswuzk7yziAb7xuw_qyw5gw%40mail.gmail.com
SELECT exec(format($outer$
CREATE OR REPLACE FUNCTION pg_temp.long() RETURNS text LANGUAGE sql AS $body$ SELECT %L; $body$$outer$,
(SELECT string_agg(g.i::text||':'||random()::text, '|') FROM generate_series(1, 100) g(i))));
-- The above bug requirs function removal to happen after a catalog
-- invalidation. dependency.c sorts objects in descending oid order so
-- that newer objects are deleted before older objects, so create a
-- table after.
CREATE TEMPORARY TABLE invalidate_catalog_cache();
-- test non-temp function is dropped when depending on temp table
CREATE TEMPORARY TABLE just_give_me_a_type(id serial primary key);
CREATE FUNCTION uses_a_temp_type(just_give_me_a_type) RETURNS int LANGUAGE sql AS $$SELECT 1;$$;
exec
----
(1 row)
step s1_discard_temp:
DISCARD TEMP;
step s2_check_schema:
SELECT oid::regclass FROM pg_class WHERE relnamespace = (SELECT oid FROM s1_temp_schema);
SELECT oid::regproc FROM pg_proc WHERE pronamespace = (SELECT oid FROM s1_temp_schema);
SELECT oid::regproc FROM pg_type WHERE typnamespace = (SELECT oid FROM s1_temp_schema);
oid
---
(0 rows)
oid
---
(0 rows)
oid
---
(0 rows)
starting permutation: s1_advisory s2_advisory s1_create_temp_objects s1_exit s2_check_schema
step s1_advisory:
SELECT pg_advisory_lock('pg_namespace'::regclass::int8);
pg_advisory_lock
----------------
(1 row)
step s2_advisory:
SELECT pg_advisory_lock('pg_namespace'::regclass::int8);
<waiting ...>
step s1_create_temp_objects:
-- create function large enough to be toasted, to ensure we correctly clean those up, a prior bug
-- https://postgr.es/m/CAOFAq3BU5Mf2TTvu8D9n_ZOoFAeQswuzk7yziAb7xuw_qyw5gw%40mail.gmail.com
SELECT exec(format($outer$
CREATE OR REPLACE FUNCTION pg_temp.long() RETURNS text LANGUAGE sql AS $body$ SELECT %L; $body$$outer$,
(SELECT string_agg(g.i::text||':'||random()::text, '|') FROM generate_series(1, 100) g(i))));
-- The above bug requirs function removal to happen after a catalog
-- invalidation. dependency.c sorts objects in descending oid order so
-- that newer objects are deleted before older objects, so create a
-- table after.
CREATE TEMPORARY TABLE invalidate_catalog_cache();
-- test non-temp function is dropped when depending on temp table
CREATE TEMPORARY TABLE just_give_me_a_type(id serial primary key);
CREATE FUNCTION uses_a_temp_type(just_give_me_a_type) RETURNS int LANGUAGE sql AS $$SELECT 1;$$;
exec
----
(1 row)
step s1_exit:
SELECT pg_terminate_backend(pg_backend_pid());
FATAL: terminating connection due to administrator command
server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
step s2_advisory: <... completed>
pg_advisory_lock
----------------
(1 row)
step s2_check_schema:
SELECT oid::regclass FROM pg_class WHERE relnamespace = (SELECT oid FROM s1_temp_schema);
SELECT oid::regproc FROM pg_proc WHERE pronamespace = (SELECT oid FROM s1_temp_schema);
SELECT oid::regproc FROM pg_type WHERE typnamespace = (SELECT oid FROM s1_temp_schema);
oid
---
(0 rows)
oid
---
(0 rows)
oid
---
(0 rows)

View File

@ -38,6 +38,7 @@ test: eval-plan-qual-trigger
test: lock-update-delete
test: lock-update-traversal
test: inherit-temp
test: temp-schema-cleanup
test: insert-conflict-do-nothing
test: insert-conflict-do-nothing-2
test: insert-conflict-do-update

View File

@ -0,0 +1,85 @@
# Test cleanup of objects in temporary schema.
setup {
CREATE TABLE s1_temp_schema(oid oid);
-- to help create a long function
CREATE FUNCTION exec(p_foo text) RETURNS void LANGUAGE plpgsql AS $$BEGIN EXECUTE p_foo; END;$$;
}
teardown {
DROP TABLE s1_temp_schema;
DROP FUNCTION exec(text);
}
session "s1"
setup {
CREATE TEMPORARY TABLE just_to_create_temp_schema();
DROP TABLE just_to_create_temp_schema;
INSERT INTO s1_temp_schema SELECT pg_my_temp_schema();
}
step s1_advisory {
SELECT pg_advisory_lock('pg_namespace'::regclass::int8);
}
step s1_create_temp_objects {
-- create function large enough to be toasted, to ensure we correctly clean those up, a prior bug
-- https://postgr.es/m/CAOFAq3BU5Mf2TTvu8D9n_ZOoFAeQswuzk7yziAb7xuw_qyw5gw%40mail.gmail.com
SELECT exec(format($outer$
CREATE OR REPLACE FUNCTION pg_temp.long() RETURNS text LANGUAGE sql AS $body$ SELECT %L; $body$$outer$,
(SELECT string_agg(g.i::text||':'||random()::text, '|') FROM generate_series(1, 100) g(i))));
-- The above bug requirs function removal to happen after a catalog
-- invalidation. dependency.c sorts objects in descending oid order so
-- that newer objects are deleted before older objects, so create a
-- table after.
CREATE TEMPORARY TABLE invalidate_catalog_cache();
-- test non-temp function is dropped when depending on temp table
CREATE TEMPORARY TABLE just_give_me_a_type(id serial primary key);
CREATE FUNCTION uses_a_temp_type(just_give_me_a_type) RETURNS int LANGUAGE sql AS $$SELECT 1;$$;
}
step s1_discard_temp {
DISCARD TEMP;
}
step s1_exit {
SELECT pg_terminate_backend(pg_backend_pid());
}
session "s2"
step s2_advisory {
SELECT pg_advisory_lock('pg_namespace'::regclass::int8);
}
step s2_check_schema {
SELECT oid::regclass FROM pg_class WHERE relnamespace = (SELECT oid FROM s1_temp_schema);
SELECT oid::regproc FROM pg_proc WHERE pronamespace = (SELECT oid FROM s1_temp_schema);
SELECT oid::regproc FROM pg_type WHERE typnamespace = (SELECT oid FROM s1_temp_schema);
}
# Test temporary object cleanup during DISCARD.
permutation
s1_create_temp_objects
s1_discard_temp
s2_check_schema
# Test temporary object cleanup during process exit.
#
# To check (in s2) if temporary objects (in s1) have properly been removed we
# need to wait for s1 to finish cleaning up. Luckily session level advisory
# locks are released only after temp table cleanup.
permutation
s1_advisory
s2_advisory
s1_create_temp_objects
s1_exit
s2_check_schema
# Can't run further tests here, because s1's connection is dead