From c8ab9701791e22f7a8e1badf362654db179c9703 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 19 Oct 2020 14:33:01 -0400
Subject: [PATCH] Fix list-munging bug that broke SQL function result
 coercions.

Since commit 913bbd88d, check_sql_fn_retval() can either insert type
coercion steps in-line in the Query that produces the SQL function's
results, or generate a new top-level Query to perform the coercions,
if modifying the Query's output in-place wouldn't be safe.  However,
it appears that the latter case has never actually worked, because
the code tried to inject the new Query back into the query list it was
passed ... which is not the list that will be used for later processing
when we execute the SQL function "normally" (without inlining it).
So we ended up with no coercion happening at run-time, leading to
wrong results or crashes depending on the datatypes involved.

While the regression tests look like they cover this area well enough,
through a huge bit of bad luck all the test cases that exercise the
separate-Query path were checking either inline-able cases (which
accidentally didn't have the bug) or cases that are no-ops at runtime
(e.g., varchar to text), so that the failure to perform the coercion
wasn't obvious.  The fact that the cases that don't work weren't
allowed at all before v13 probably contributed to not noticing the
problem sooner, too.

To fix, get rid of the separate "flat" list of Query nodes and instead
pass the real two-level list that is going to be used later.  I chose
to make the same change in check_sql_fn_statements(), although that has
no actual bug, just so that we don't need that data structure at all.

This is an API change, as evidenced by the adjustments needed to
callers outside functions.c.  That's a bit scary to be doing in a
released branch, but so far as I can tell from a quick search,
there are no outside callers of these functions (and they are
sufficiently specific to our semantics for SQL-language functions that
it's not apparent why any extension would need to call them).  In any
case, v13 already changed the API of check_sql_fn_retval() compared to
prior branches.

Per report from pinker.  Back-patch to v13 where this code came in.

Discussion: https://postgr.es/m/1603050466566-0.post@n3.nabble.com
---
 src/backend/catalog/pg_proc.c            |   4 +-
 src/backend/executor/functions.c         | 120 +++++++++++++----------
 src/backend/optimizer/util/clauses.c     |   7 +-
 src/include/executor/functions.h         |   4 +-
 src/test/regress/expected/rangefuncs.out |  44 +++++++++
 src/test/regress/sql/rangefuncs.sql      |  11 +++
 6 files changed, 131 insertions(+), 59 deletions(-)

diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c
index f7dab9925b..1dd9ecc063 100644
--- a/src/backend/catalog/pg_proc.c
+++ b/src/backend/catalog/pg_proc.c
@@ -913,8 +913,8 @@ fmgr_sql_validator(PG_FUNCTION_ARGS)
 																  (ParserSetupHook) sql_fn_parser_setup,
 																  pinfo,
 																  NULL);
-				querytree_list = list_concat(querytree_list,
-											 querytree_sublist);
+				querytree_list = lappend(querytree_list,
+										 querytree_sublist);
 			}
 
 			check_sql_fn_statements(querytree_list);
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index bf00a9c1e8..459a33375b 100644
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -609,7 +609,6 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
 	SQLFunctionCachePtr fcache;
 	List	   *raw_parsetree_list;
 	List	   *queryTree_list;
-	List	   *flat_query_list;
 	List	   *resulttlist;
 	ListCell   *lc;
 	Datum		tmp;
@@ -689,13 +688,7 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
 
 	/*
 	 * Parse and rewrite the queries in the function text.  Use sublists to
-	 * keep track of the original query boundaries.  But we also build a
-	 * "flat" list of the rewritten queries to pass to check_sql_fn_retval.
-	 * This is because the last canSetTag query determines the result type
-	 * independently of query boundaries --- and it might not be in the last
-	 * sublist, for example if the last query rewrites to DO INSTEAD NOTHING.
-	 * (It might not be unreasonable to throw an error in such a case, but
-	 * this is the historical behavior and it doesn't seem worth changing.)
+	 * keep track of the original query boundaries.
 	 *
 	 * Note: since parsing and planning is done in fcontext, we will generate
 	 * a lot of cruft that lives as long as the fcache does.  This is annoying
@@ -705,7 +698,6 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
 	raw_parsetree_list = pg_parse_query(fcache->src);
 
 	queryTree_list = NIL;
-	flat_query_list = NIL;
 	foreach(lc, raw_parsetree_list)
 	{
 		RawStmt    *parsetree = lfirst_node(RawStmt, lc);
@@ -717,10 +709,12 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
 														  fcache->pinfo,
 														  NULL);
 		queryTree_list = lappend(queryTree_list, queryTree_sublist);
-		flat_query_list = list_concat(flat_query_list, queryTree_sublist);
 	}
 
-	check_sql_fn_statements(flat_query_list);
+	/*
+	 * Check that there are no statements we don't want to allow.
+	 */
+	check_sql_fn_statements(queryTree_list);
 
 	/*
 	 * Check that the function returns the type it claims to.  Although in
@@ -740,7 +734,7 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
 	 * the rowtype column into multiple columns, since we have no way to
 	 * notify the caller that it should do that.)
 	 */
-	fcache->returnsTuple = check_sql_fn_retval(flat_query_list,
+	fcache->returnsTuple = check_sql_fn_retval(queryTree_list,
 											   rettype,
 											   rettupdesc,
 											   false,
@@ -1510,51 +1504,63 @@ ShutdownSQLFunction(Datum arg)
  * is not acceptable.
  */
 void
-check_sql_fn_statements(List *queryTreeList)
+check_sql_fn_statements(List *queryTreeLists)
 {
 	ListCell   *lc;
 
-	foreach(lc, queryTreeList)
+	/* We are given a list of sublists of Queries */
+	foreach(lc, queryTreeLists)
 	{
-		Query	   *query = lfirst_node(Query, lc);
+		List	   *sublist = lfirst_node(List, lc);
+		ListCell   *lc2;
 
-		/*
-		 * Disallow procedures with output arguments.  The current
-		 * implementation would just throw the output values away, unless the
-		 * statement is the last one.  Per SQL standard, we should assign the
-		 * output values by name.  By disallowing this here, we preserve an
-		 * opportunity for future improvement.
-		 */
-		if (query->commandType == CMD_UTILITY &&
-			IsA(query->utilityStmt, CallStmt))
+		foreach(lc2, sublist)
 		{
-			CallStmt   *stmt = castNode(CallStmt, query->utilityStmt);
-			HeapTuple	tuple;
-			int			numargs;
-			Oid		   *argtypes;
-			char	  **argnames;
-			char	   *argmodes;
-			int			i;
+			Query	   *query = lfirst_node(Query, lc2);
 
-			tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(stmt->funcexpr->funcid));
-			if (!HeapTupleIsValid(tuple))
-				elog(ERROR, "cache lookup failed for function %u", stmt->funcexpr->funcid);
-			numargs = get_func_arg_info(tuple, &argtypes, &argnames, &argmodes);
-			ReleaseSysCache(tuple);
-
-			for (i = 0; i < numargs; i++)
+			/*
+			 * Disallow procedures with output arguments.  The current
+			 * implementation would just throw the output values away, unless
+			 * the statement is the last one.  Per SQL standard, we should
+			 * assign the output values by name.  By disallowing this here, we
+			 * preserve an opportunity for future improvement.
+			 */
+			if (query->commandType == CMD_UTILITY &&
+				IsA(query->utilityStmt, CallStmt))
 			{
-				if (argmodes && (argmodes[i] == PROARGMODE_INOUT || argmodes[i] == PROARGMODE_OUT))
-					ereport(ERROR,
-							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							 errmsg("calling procedures with output arguments is not supported in SQL functions")));
+				CallStmt   *stmt = castNode(CallStmt, query->utilityStmt);
+				HeapTuple	tuple;
+				int			numargs;
+				Oid		   *argtypes;
+				char	  **argnames;
+				char	   *argmodes;
+				int			i;
+
+				tuple = SearchSysCache1(PROCOID,
+										ObjectIdGetDatum(stmt->funcexpr->funcid));
+				if (!HeapTupleIsValid(tuple))
+					elog(ERROR, "cache lookup failed for function %u",
+						 stmt->funcexpr->funcid);
+				numargs = get_func_arg_info(tuple,
+											&argtypes, &argnames, &argmodes);
+				ReleaseSysCache(tuple);
+
+				for (i = 0; i < numargs; i++)
+				{
+					if (argmodes && (argmodes[i] == PROARGMODE_INOUT ||
+									 argmodes[i] == PROARGMODE_OUT))
+						ereport(ERROR,
+								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								 errmsg("calling procedures with output arguments is not supported in SQL functions")));
+				}
 			}
 		}
 	}
 }
 
 /*
- * check_sql_fn_retval() -- check return value of a list of sql parse trees.
+ * check_sql_fn_retval()
+ *		Check return value of a list of lists of sql parse trees.
  *
  * The return value of a sql function is the value returned by the last
  * canSetTag query in the function.  We do some ad-hoc type checking and
@@ -1592,7 +1598,7 @@ check_sql_fn_statements(List *queryTreeList)
  * function is defined to return VOID then *resultTargetList is set to NIL.
  */
 bool
-check_sql_fn_retval(List *queryTreeList,
+check_sql_fn_retval(List *queryTreeLists,
 					Oid rettype, TupleDesc rettupdesc,
 					bool insertDroppedCols,
 					List **resultTargetList)
@@ -1619,20 +1625,30 @@ check_sql_fn_retval(List *queryTreeList,
 		return false;
 
 	/*
-	 * Find the last canSetTag query in the list.  This isn't necessarily the
-	 * last parsetree, because rule rewriting can insert queries after what
-	 * the user wrote.
+	 * Find the last canSetTag query in the function body (which is presented
+	 * to us as a list of sublists of Query nodes).  This isn't necessarily
+	 * the last parsetree, because rule rewriting can insert queries after
+	 * what the user wrote.  Note that it might not even be in the last
+	 * sublist, for example if the last query rewrites to DO INSTEAD NOTHING.
+	 * (It might not be unreasonable to throw an error in such a case, but
+	 * this is the historical behavior and it doesn't seem worth changing.)
 	 */
 	parse = NULL;
 	parse_cell = NULL;
-	foreach(lc, queryTreeList)
+	foreach(lc, queryTreeLists)
 	{
-		Query	   *q = lfirst_node(Query, lc);
+		List	   *sublist = lfirst_node(List, lc);
+		ListCell   *lc2;
 
-		if (q->canSetTag)
+		foreach(lc2, sublist)
 		{
-			parse = q;
-			parse_cell = lc;
+			Query	   *q = lfirst_node(Query, lc2);
+
+			if (q->canSetTag)
+			{
+				parse = q;
+				parse_cell = lc2;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 750586fceb..e7d814651b 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -4522,7 +4522,8 @@ inline_function(Oid funcid, Oid result_type, Oid result_collid,
 	 * needed; that's probably not important, but let's be careful.
 	 */
 	querytree_list = list_make1(querytree);
-	if (check_sql_fn_retval(querytree_list, result_type, rettupdesc,
+	if (check_sql_fn_retval(list_make1(querytree_list),
+							result_type, rettupdesc,
 							false, NULL))
 		goto fail;				/* reject whole-tuple-result cases */
 
@@ -5040,7 +5041,7 @@ inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte)
 	 * shows it's returning a whole tuple result; otherwise what it's
 	 * returning is a single composite column which is not what we need.
 	 */
-	if (!check_sql_fn_retval(querytree_list,
+	if (!check_sql_fn_retval(list_make1(querytree_list),
 							 fexpr->funcresulttype, rettupdesc,
 							 true, NULL) &&
 		(functypclass == TYPEFUNC_COMPOSITE ||
@@ -5052,7 +5053,7 @@ inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte)
 	 * check_sql_fn_retval might've inserted a projection step, but that's
 	 * fine; just make sure we use the upper Query.
 	 */
-	querytree = linitial(querytree_list);
+	querytree = linitial_node(Query, querytree_list);
 
 	/*
 	 * Looks good --- substitute parameters into the query.
diff --git a/src/include/executor/functions.h b/src/include/executor/functions.h
index cb13428a5a..a0db24bde6 100644
--- a/src/include/executor/functions.h
+++ b/src/include/executor/functions.h
@@ -29,9 +29,9 @@ extern SQLFunctionParseInfoPtr prepare_sql_fn_parse_info(HeapTuple procedureTupl
 extern void sql_fn_parser_setup(struct ParseState *pstate,
 								SQLFunctionParseInfoPtr pinfo);
 
-extern void check_sql_fn_statements(List *queryTreeList);
+extern void check_sql_fn_statements(List *queryTreeLists);
 
-extern bool check_sql_fn_retval(List *queryTreeList,
+extern bool check_sql_fn_retval(List *queryTreeLists,
 								Oid rettype, TupleDesc rettupdesc,
 								bool insertDroppedCols,
 								List **resultTargetList);
diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out
index e618aec2eb..06bd129fd2 100644
--- a/src/test/regress/expected/rangefuncs.out
+++ b/src/test/regress/expected/rangefuncs.out
@@ -2109,6 +2109,50 @@ select * from testrngfunc();
  7.136178 | 7.14
 (1 row)
 
+create or replace function testrngfunc() returns setof rngfunc_type as $$
+  select 1, 2 union select 3, 4 order by 1;
+$$ language sql immutable;
+explain (verbose, costs off)
+select testrngfunc();
+       QUERY PLAN        
+-------------------------
+ ProjectSet
+   Output: testrngfunc()
+   ->  Result
+(3 rows)
+
+select testrngfunc();
+   testrngfunc   
+-----------------
+ (1.000000,2.00)
+ (3.000000,4.00)
+(2 rows)
+
+explain (verbose, costs off)
+select * from testrngfunc();
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Subquery Scan on "*SELECT*"
+   Output: "*SELECT*"."?column?", "*SELECT*"."?column?_1"
+   ->  Unique
+         Output: (1), (2)
+         ->  Sort
+               Output: (1), (2)
+               Sort Key: (1), (2)
+               ->  Append
+                     ->  Result
+                           Output: 1, 2
+                     ->  Result
+                           Output: 3, 4
+(12 rows)
+
+select * from testrngfunc();
+    f1    |  f2  
+----------+------
+ 1.000000 | 2.00
+ 3.000000 | 4.00
+(2 rows)
+
 -- Check a couple of error cases while we're here
 select * from testrngfunc() as t(f1 int8,f2 int8);  -- fail, composite result
 ERROR:  a column definition list is redundant for a function returning a named composite type
diff --git a/src/test/regress/sql/rangefuncs.sql b/src/test/regress/sql/rangefuncs.sql
index 5f41cb2d8d..3c436028da 100644
--- a/src/test/regress/sql/rangefuncs.sql
+++ b/src/test/regress/sql/rangefuncs.sql
@@ -629,6 +629,17 @@ explain (verbose, costs off)
 select * from testrngfunc();
 select * from testrngfunc();
 
+create or replace function testrngfunc() returns setof rngfunc_type as $$
+  select 1, 2 union select 3, 4 order by 1;
+$$ language sql immutable;
+
+explain (verbose, costs off)
+select testrngfunc();
+select testrngfunc();
+explain (verbose, costs off)
+select * from testrngfunc();
+select * from testrngfunc();
+
 -- Check a couple of error cases while we're here
 select * from testrngfunc() as t(f1 int8,f2 int8);  -- fail, composite result
 select * from pg_get_keywords() as t(f1 int8,f2 int8);  -- fail, OUT params