From ba019b4dacf637459148f1327f9345bff031dfe2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 27 Feb 2023 16:29:51 -0500 Subject: [PATCH] Harden postgres_fdw tests against unexpected cache flushes. postgres_fdw will close its remote session if an sinval cache reset occurs, since it's possible that that means some FDW parameters changed. We had two tests that were trying to ensure that the session remains alive by setting debug_discard_caches = 0; but that's not sufficient. Even though the tests seem stable enough in the buildfarm, they flap a lot under CI. In the first test, which is checking the ability to recover from a lost connection, we can stabilize the results by just not caring whether pg_terminate_backend() finds a victim backend. If a reset did happen, there won't be a session to terminate anymore, but the test can proceed anyway. (Arguably, we are then not testing the unintentional-disconnect case, but as long as that scenario is exercised in most runs I think it's fine; testing the reset-driven case is of value too.) In the second test, which is trying to verify the application_name displayed in pg_stat_activity by a remote session, we had a race condition in that the remote session might go away before we can fetch its pg_stat_activity entry. We can close that race and make the test more certainly test what it intends to by arranging things so that the remote session itself fetches its pg_stat_activity entry (based on PID rather than a somewhat-circular assumption about the application name). Both tests now demonstrably pass under debug_discard_caches = 1, so we can remove that hack. Back-patch into relevant back branches. Discussion: https://postgr.es/m/20230226194340.u44bkfgyz64c67i6@awork3.anarazel.de --- .../postgres_fdw/expected/postgres_fdw.out | 26 ++++++------------- contrib/postgres_fdw/sql/postgres_fdw.sql | 18 ++++++------- 2 files changed, 16 insertions(+), 28 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index ee231cedcf..0a02d63d1c 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -9614,11 +9614,6 @@ WARNING: there is no transaction in progress -- Change application_name of remote connection to special one -- so that we can easily terminate the connection later. ALTER SERVER loopback OPTIONS (application_name 'fdw_retry_check'); --- If debug_discard_caches is active, it results in --- dropping remote connections after every transaction, making it --- impossible to test termination meaningfully. So turn that off --- for this test. -SET debug_discard_caches = 0; -- Make sure we have a remote connection. SELECT 1 FROM ft1 LIMIT 1; ?column? @@ -9627,13 +9622,12 @@ SELECT 1 FROM ft1 LIMIT 1; (1 row) -- Terminate the remote connection and wait for the termination to complete. -SELECT pg_terminate_backend(pid, 180000) FROM pg_stat_activity +-- (If a cache flush happens, the remote connection might have already been +-- dropped; so code this step in a way that doesn't fail if no connection.) +DO $$ BEGIN +PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity WHERE application_name = 'fdw_retry_check'; - pg_terminate_backend ----------------------- - t -(1 row) - +END $$; -- This query should detect the broken connection when starting new remote -- transaction, reestablish new connection, and then succeed. BEGIN; @@ -9646,13 +9640,10 @@ SELECT 1 FROM ft1 LIMIT 1; -- If we detect the broken connection when starting a new remote -- subtransaction, we should fail instead of establishing a new connection. -- Terminate the remote connection and wait for the termination to complete. -SELECT pg_terminate_backend(pid, 180000) FROM pg_stat_activity +DO $$ BEGIN +PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity WHERE application_name = 'fdw_retry_check'; - pg_terminate_backend ----------------------- - t -(1 row) - +END $$; SAVEPOINT s; -- The text of the error might vary across platforms, so only show SQLSTATE. \set VERBOSITY sqlstate @@ -9660,7 +9651,6 @@ SELECT 1 FROM ft1 LIMIT 1; -- should fail ERROR: 08006 \set VERBOSITY default COMMIT; -RESET debug_discard_caches; -- ============================================================================= -- test connection invalidation cases and postgres_fdw_get_connections function -- ============================================================================= diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 258506b01a..a87542a195 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -2908,18 +2908,16 @@ ROLLBACK; -- so that we can easily terminate the connection later. ALTER SERVER loopback OPTIONS (application_name 'fdw_retry_check'); --- If debug_discard_caches is active, it results in --- dropping remote connections after every transaction, making it --- impossible to test termination meaningfully. So turn that off --- for this test. -SET debug_discard_caches = 0; - -- Make sure we have a remote connection. SELECT 1 FROM ft1 LIMIT 1; -- Terminate the remote connection and wait for the termination to complete. -SELECT pg_terminate_backend(pid, 180000) FROM pg_stat_activity +-- (If a cache flush happens, the remote connection might have already been +-- dropped; so code this step in a way that doesn't fail if no connection.) +DO $$ BEGIN +PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity WHERE application_name = 'fdw_retry_check'; +END $$; -- This query should detect the broken connection when starting new remote -- transaction, reestablish new connection, and then succeed. @@ -2929,8 +2927,10 @@ SELECT 1 FROM ft1 LIMIT 1; -- If we detect the broken connection when starting a new remote -- subtransaction, we should fail instead of establishing a new connection. -- Terminate the remote connection and wait for the termination to complete. -SELECT pg_terminate_backend(pid, 180000) FROM pg_stat_activity +DO $$ BEGIN +PERFORM pg_terminate_backend(pid, 180000) FROM pg_stat_activity WHERE application_name = 'fdw_retry_check'; +END $$; SAVEPOINT s; -- The text of the error might vary across platforms, so only show SQLSTATE. \set VERBOSITY sqlstate @@ -2938,8 +2938,6 @@ SELECT 1 FROM ft1 LIMIT 1; -- should fail \set VERBOSITY default COMMIT; -RESET debug_discard_caches; - -- ============================================================================= -- test connection invalidation cases and postgres_fdw_get_connections function -- =============================================================================