From 9b2a41db1cc007df304bf7874813e6ab78b9a830 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 24 Aug 2023 12:02:40 -0400
Subject: [PATCH] Avoid unnecessary plancache revalidation of utility
 statements.

Revalidation of a plancache entry (after a cache invalidation event)
requires acquiring a snapshot.  Normally that is harmless, but not
if the cached statement is one that needs to run without acquiring a
snapshot.  We were already aware of that for TransactionStmts,
but for some reason hadn't extrapolated to the other statements that
PlannedStmtRequiresSnapshot() knows mustn't set a snapshot.  This can
lead to unexpected failures of commands such as SET TRANSACTION
ISOLATION LEVEL.  We can fix it in the same way, by excluding those
command types from revalidation.

However, we can do even better than that: there is no need to
revalidate for any statement type for which parse analysis, rewrite,
and plan steps do nothing interesting, which is nearly all utility
commands.  To mechanize this, invent a parser function
stmt_requires_parse_analysis() that tells whether parse analysis does
anything beyond wrapping a CMD_UTILITY Query around the raw parse
tree.  If that's what it does, then rewrite and plan will just
skip the Query, so that it is not possible for the same raw parse
tree to produce a different plan tree after cache invalidation.

stmt_requires_parse_analysis() is basically equivalent to the
existing function analyze_requires_snapshot(), except that for
obscure reasons that function omits ReturnStmt and CallStmt.
It is unclear whether those were oversights or intentional.
I have not been able to demonstrate a bug from not acquiring a
snapshot while analyzing these commands, but at best it seems mighty
fragile.  It seems safer to acquire a snapshot for parse analysis of
these commands too, which allows making stmt_requires_parse_analysis
and analyze_requires_snapshot equivalent.

In passing this fixes a second bug, which is that ResetPlanCache
would exclude ReturnStmts and CallStmts from revalidation.
That's surely *not* safe, since they contain parsable expressions.

Per bug #18059 from Pavel Kulakov.  Back-patch to all supported
branches.

Discussion: https://postgr.es/m/18059-79c692f036b25346@postgresql.org
---
 src/backend/parser/analyze.c                 | 52 +++++++++++++--
 src/backend/utils/cache/plancache.c          | 69 ++++++++------------
 src/include/parser/analyze.h                 |  1 +
 src/pl/plpgsql/src/expected/plpgsql_call.out | 17 +++++
 src/pl/plpgsql/src/sql/plpgsql_call.sql      | 18 +++++
 5 files changed, 108 insertions(+), 49 deletions(-)

diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 333be34fc4..ff1342e500 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -296,6 +296,11 @@ transformStmt(ParseState *pstate, Node *parseTree)
 	}
 #endif							/* RAW_EXPRESSION_COVERAGE_TEST */
 
+	/*
+	 * Caution: when changing the set of statement types that have non-default
+	 * processing here, see also stmt_requires_parse_analysis() and
+	 * analyze_requires_snapshot().
+	 */
 	switch (nodeTag(parseTree))
 	{
 			/*
@@ -378,14 +383,22 @@ transformStmt(ParseState *pstate, Node *parseTree)
 }
 
 /*
- * analyze_requires_snapshot
- *		Returns true if a snapshot must be set before doing parse analysis
- *		on the given raw parse tree.
+ * stmt_requires_parse_analysis
+ *		Returns true if parse analysis will do anything non-trivial
+ *		with the given raw parse tree.
  *
- * Classification here should match transformStmt().
+ * Generally, this should return true for any statement type for which
+ * transformStmt() does more than wrap a CMD_UTILITY Query around it.
+ * When it returns false, the caller can assume that there is no situation
+ * in which parse analysis of the raw statement could need to be re-done.
+ *
+ * Currently, since the rewriter and planner do nothing for CMD_UTILITY
+ * Queries, a false result means that the entire parse analysis/rewrite/plan
+ * pipeline will never need to be re-done.  If that ever changes, callers
+ * will likely need adjustment.
  */
 bool
-analyze_requires_snapshot(RawStmt *parseTree)
+stmt_requires_parse_analysis(RawStmt *parseTree)
 {
 	bool		result;
 
@@ -398,6 +411,7 @@ analyze_requires_snapshot(RawStmt *parseTree)
 		case T_DeleteStmt:
 		case T_UpdateStmt:
 		case T_SelectStmt:
+		case T_ReturnStmt:
 		case T_PLAssignStmt:
 			result = true;
 			break;
@@ -408,12 +422,12 @@ analyze_requires_snapshot(RawStmt *parseTree)
 		case T_DeclareCursorStmt:
 		case T_ExplainStmt:
 		case T_CreateTableAsStmt:
-			/* yes, because we must analyze the contained statement */
+		case T_CallStmt:
 			result = true;
 			break;
 
 		default:
-			/* other utility statements don't have any real parse analysis */
+			/* all other statements just get wrapped in a CMD_UTILITY Query */
 			result = false;
 			break;
 	}
@@ -421,6 +435,30 @@ analyze_requires_snapshot(RawStmt *parseTree)
 	return result;
 }
 
+/*
+ * analyze_requires_snapshot
+ *		Returns true if a snapshot must be set before doing parse analysis
+ *		on the given raw parse tree.
+ */
+bool
+analyze_requires_snapshot(RawStmt *parseTree)
+{
+	/*
+	 * Currently, this should return true in exactly the same cases that
+	 * stmt_requires_parse_analysis() does, so we just invoke that function
+	 * rather than duplicating it.  We keep the two entry points separate for
+	 * clarity of callers, since from the callers' standpoint these are
+	 * different conditions.
+	 *
+	 * While there may someday be a statement type for which transformStmt()
+	 * does something nontrivial and yet no snapshot is needed for that
+	 * processing, it seems likely that making such a choice would be fragile.
+	 * If you want to install an exception, document the reasoning for it in a
+	 * comment.
+	 */
+	return stmt_requires_parse_analysis(parseTree);
+}
+
 /*
  * transformDeleteStmt -
  *	  transforms a Delete Statement
diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c
index 1355c3a6f1..8095808bb4 100644
--- a/src/backend/utils/cache/plancache.c
+++ b/src/backend/utils/cache/plancache.c
@@ -77,13 +77,15 @@
 
 /*
  * We must skip "overhead" operations that involve database access when the
- * cached plan's subject statement is a transaction control command.
- * For the convenience of postgres.c, treat empty statements as control
- * commands too.
+ * cached plan's subject statement is a transaction control command or one
+ * that requires a snapshot not to be set yet (such as SET or LOCK).  More
+ * generally, statements that do not require parse analysis/rewrite/plan
+ * activity never need to be revalidated, so we can treat them all like that.
+ * For the convenience of postgres.c, treat empty statements that way too.
  */
-#define IsTransactionStmtPlan(plansource)  \
-	((plansource)->raw_parse_tree == NULL || \
-	 IsA((plansource)->raw_parse_tree->stmt, TransactionStmt))
+#define StmtPlanRequiresRevalidation(plansource)  \
+	((plansource)->raw_parse_tree != NULL && \
+	 stmt_requires_parse_analysis((plansource)->raw_parse_tree))
 
 /*
  * This is the head of the backend's list of "saved" CachedPlanSources (i.e.,
@@ -383,13 +385,13 @@ CompleteCachedPlan(CachedPlanSource *plansource,
 	plansource->query_context = querytree_context;
 	plansource->query_list = querytree_list;
 
-	if (!plansource->is_oneshot && !IsTransactionStmtPlan(plansource))
+	if (!plansource->is_oneshot && StmtPlanRequiresRevalidation(plansource))
 	{
 		/*
 		 * Use the planner machinery to extract dependencies.  Data is saved
 		 * in query_context.  (We assume that not a lot of extra cruft is
 		 * created by this call.)  We can skip this for one-shot plans, and
-		 * transaction control commands have no such dependencies anyway.
+		 * plans not needing revalidation have no such dependencies anyway.
 		 */
 		extract_query_dependencies((Node *) querytree_list,
 								   &plansource->relationOids,
@@ -568,11 +570,11 @@ RevalidateCachedQuery(CachedPlanSource *plansource,
 	/*
 	 * For one-shot plans, we do not support revalidation checking; it's
 	 * assumed the query is parsed, planned, and executed in one transaction,
-	 * so that no lock re-acquisition is necessary.  Also, there is never any
-	 * need to revalidate plans for transaction control commands (and we
-	 * mustn't risk any catalog accesses when handling those).
+	 * so that no lock re-acquisition is necessary.  Also, if the statement
+	 * type can't require revalidation, we needn't do anything (and we mustn't
+	 * risk catalog accesses when handling, eg, transaction control commands).
 	 */
-	if (plansource->is_oneshot || IsTransactionStmtPlan(plansource))
+	if (plansource->is_oneshot || !StmtPlanRequiresRevalidation(plansource))
 	{
 		Assert(plansource->is_valid);
 		return NIL;
@@ -1029,8 +1031,8 @@ choose_custom_plan(CachedPlanSource *plansource, ParamListInfo boundParams)
 	/* Otherwise, never any point in a custom plan if there's no parameters */
 	if (boundParams == NULL)
 		return false;
-	/* ... nor for transaction control statements */
-	if (IsTransactionStmtPlan(plansource))
+	/* ... nor when planning would be a no-op */
+	if (!StmtPlanRequiresRevalidation(plansource))
 		return false;
 
 	/* Let settings force the decision */
@@ -1963,8 +1965,8 @@ PlanCacheRelCallback(Datum arg, Oid relid)
 		if (!plansource->is_valid)
 			continue;
 
-		/* Never invalidate transaction control commands */
-		if (IsTransactionStmtPlan(plansource))
+		/* Never invalidate if parse/plan would be a no-op anyway */
+		if (!StmtPlanRequiresRevalidation(plansource))
 			continue;
 
 		/*
@@ -2048,8 +2050,8 @@ PlanCacheObjectCallback(Datum arg, int cacheid, uint32 hashvalue)
 		if (!plansource->is_valid)
 			continue;
 
-		/* Never invalidate transaction control commands */
-		if (IsTransactionStmtPlan(plansource))
+		/* Never invalidate if parse/plan would be a no-op anyway */
+		if (!StmtPlanRequiresRevalidation(plansource))
 			continue;
 
 		/*
@@ -2158,7 +2160,6 @@ ResetPlanCache(void)
 	{
 		CachedPlanSource *plansource = dlist_container(CachedPlanSource,
 													   node, iter.cur);
-		ListCell   *lc;
 
 		Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC);
 
@@ -2170,32 +2171,16 @@ ResetPlanCache(void)
 		 * We *must not* mark transaction control statements as invalid,
 		 * particularly not ROLLBACK, because they may need to be executed in
 		 * aborted transactions when we can't revalidate them (cf bug #5269).
+		 * In general there's no point in invalidating statements for which a
+		 * new parse analysis/rewrite/plan cycle would certainly give the same
+		 * results.
 		 */
-		if (IsTransactionStmtPlan(plansource))
+		if (!StmtPlanRequiresRevalidation(plansource))
 			continue;
 
-		/*
-		 * In general there is no point in invalidating utility statements
-		 * since they have no plans anyway.  So invalidate it only if it
-		 * contains at least one non-utility statement, or contains a utility
-		 * statement that contains a pre-analyzed query (which could have
-		 * dependencies.)
-		 */
-		foreach(lc, plansource->query_list)
-		{
-			Query	   *query = lfirst_node(Query, lc);
-
-			if (query->commandType != CMD_UTILITY ||
-				UtilityContainsQuery(query->utilityStmt))
-			{
-				/* non-utility statement, so invalidate */
-				plansource->is_valid = false;
-				if (plansource->gplan)
-					plansource->gplan->is_valid = false;
-				/* no need to look further */
-				break;
-			}
-		}
+		plansource->is_valid = false;
+		if (plansource->gplan)
+			plansource->gplan->is_valid = false;
 	}
 
 	/* Likewise invalidate cached expressions */
diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h
index a0f0bd38d7..560e42cb6a 100644
--- a/src/include/parser/analyze.h
+++ b/src/include/parser/analyze.h
@@ -37,6 +37,7 @@ extern Query *parse_sub_analyze(Node *parseTree, ParseState *parentParseState,
 extern Query *transformTopLevelStmt(ParseState *pstate, RawStmt *parseTree);
 extern Query *transformStmt(ParseState *pstate, Node *parseTree);
 
+extern bool stmt_requires_parse_analysis(RawStmt *parseTree);
 extern bool analyze_requires_snapshot(RawStmt *parseTree);
 
 extern const char *LCS_asString(LockClauseStrength strength);
diff --git a/src/pl/plpgsql/src/expected/plpgsql_call.out b/src/pl/plpgsql/src/expected/plpgsql_call.out
index 7b156f3489..ad3d7276ac 100644
--- a/src/pl/plpgsql/src/expected/plpgsql_call.out
+++ b/src/pl/plpgsql/src/expected/plpgsql_call.out
@@ -35,6 +35,23 @@ SELECT * FROM test1;
  55
 (1 row)
 
+-- Check that plan revalidation doesn't prevent setting transaction properties
+-- (bug #18059).  This test must include the first temp-object creation in
+-- this script, or it won't exercise the bug scenario.  Hence we put it early.
+CREATE PROCEDURE test_proc3a()
+LANGUAGE plpgsql
+AS $$
+BEGIN
+   COMMIT;
+   SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+   RAISE NOTICE 'done';
+END;
+$$;
+CALL test_proc3a();
+NOTICE:  done
+CREATE TEMP TABLE tt1(f1 int);
+CALL test_proc3a();
+NOTICE:  done
 -- nested CALL
 TRUNCATE TABLE test1;
 CREATE PROCEDURE test_proc4(y int)
diff --git a/src/pl/plpgsql/src/sql/plpgsql_call.sql b/src/pl/plpgsql/src/sql/plpgsql_call.sql
index 8108d05060..f93f28435f 100644
--- a/src/pl/plpgsql/src/sql/plpgsql_call.sql
+++ b/src/pl/plpgsql/src/sql/plpgsql_call.sql
@@ -38,6 +38,24 @@ CALL test_proc3(55);
 SELECT * FROM test1;
 
 
+-- Check that plan revalidation doesn't prevent setting transaction properties
+-- (bug #18059).  This test must include the first temp-object creation in
+-- this script, or it won't exercise the bug scenario.  Hence we put it early.
+CREATE PROCEDURE test_proc3a()
+LANGUAGE plpgsql
+AS $$
+BEGIN
+   COMMIT;
+   SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+   RAISE NOTICE 'done';
+END;
+$$;
+
+CALL test_proc3a();
+CREATE TEMP TABLE tt1(f1 int);
+CALL test_proc3a();
+
+
 -- nested CALL
 TRUNCATE TABLE test1;