From 1e2f96f0a56c2d67a84ffb58383e6354546cf96f Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 28 Jul 2016 16:09:15 -0400
Subject: [PATCH] Fix assorted fallout from IS [NOT] NULL patch.

Commits 4452000f3 et al established semantics for NullTest.argisrow that
are a bit different from its initial conception: rather than being merely
a cache of whether we've determined the input to have composite type,
the flag now has the further meaning that we should apply field-by-field
testing as per the standard's definition of IS [NOT] NULL.  If argisrow
is false and yet the input has composite type, the construct instead has
the semantics of IS [NOT] DISTINCT FROM NULL.  Update the comments in
primnodes.h to clarify this, and fix ruleutils.c and deparse.c to print
such cases correctly.  In the case of ruleutils.c, this merely results in
cosmetic changes in EXPLAIN output, since the case can't currently arise
in stored rules.  However, it represents a live bug for deparse.c, which
would formerly have sent a remote query that had semantics different
from the local behavior.  (From the user's standpoint, this means that
testing a remote nested-composite column for null-ness could have had
unexpected recursive behavior much like that fixed in 4452000f3.)

In a related but somewhat independent fix, make plancat.c set argisrow
to false in all NullTest expressions constructed to represent "attnotnull"
constructs.  Since attnotnull is actually enforced as a simple null-value
check, this is a more accurate representation of the semantics; we were
previously overpromising what it meant for composite columns, which might
possibly lead to incorrect planner optimizations.  (It seems that what the
SQL spec expects a NOT NULL constraint to mean is an IS NOT NULL test, so
arguably we are violating the spec and should fix attnotnull to do the
other thing.  If we ever do, this part should get reverted.)

Back-patch, same as the previous commit.

Discussion: <10682.1469566308@sss.pgh.pa.us>
---
 contrib/postgres_fdw/deparse.c         | 23 +++++++++++--
 src/backend/optimizer/util/plancat.c   |  8 ++++-
 src/backend/utils/adt/ruleutils.c      | 46 ++++++++++++++++++++------
 src/include/nodes/primnodes.h          | 14 ++++++--
 src/test/regress/expected/rowtypes.out |  6 ++--
 5 files changed, 77 insertions(+), 20 deletions(-)

diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 8f09cd51ca..dd25699d83 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -1806,10 +1806,27 @@ deparseNullTest(NullTest *node, deparse_expr_cxt *context)
 
 	appendStringInfoChar(buf, '(');
 	deparseExpr(node->arg, context);
-	if (node->nulltesttype == IS_NULL)
-		appendStringInfoString(buf, " IS NULL)");
+
+	/*
+	 * For scalar inputs, we prefer to print as IS [NOT] NULL, which is
+	 * shorter and traditional.  If it's a rowtype input but we're applying a
+	 * scalar test, must print IS [NOT] DISTINCT FROM NULL to be semantically
+	 * correct.
+	 */
+	if (node->argisrow || !type_is_rowtype(exprType((Node *) node->arg)))
+	{
+		if (node->nulltesttype == IS_NULL)
+			appendStringInfoString(buf, " IS NULL)");
+		else
+			appendStringInfoString(buf, " IS NOT NULL)");
+	}
 	else
-		appendStringInfoString(buf, " IS NOT NULL)");
+	{
+		if (node->nulltesttype == IS_NULL)
+			appendStringInfoString(buf, " IS NOT DISTINCT FROM NULL)");
+		else
+			appendStringInfoString(buf, " IS DISTINCT FROM NULL)");
+	}
 }
 
 /*
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 2be0793cc0..f037f90d98 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -1090,7 +1090,13 @@ get_relation_constraints(PlannerInfo *root,
 												  att->attcollation,
 												  0);
 					ntest->nulltesttype = IS_NOT_NULL;
-					ntest->argisrow = type_is_rowtype(att->atttypid);
+
+					/*
+					 * argisrow=false is correct even for a composite column,
+					 * because attnotnull does not represent a SQL-spec IS NOT
+					 * NULL test in such a case, just IS DISTINCT FROM NULL.
+					 */
+					ntest->argisrow = false;
 					ntest->location = -1;
 					result = lappend(result, ntest);
 				}
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 97bdccf6ec..8a6d0ad966 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -7890,17 +7890,43 @@ get_rule_expr(Node *node, deparse_context *context,
 				if (!PRETTY_PAREN(context))
 					appendStringInfoChar(buf, '(');
 				get_rule_expr_paren((Node *) ntest->arg, context, true, node);
-				switch (ntest->nulltesttype)
+
+				/*
+				 * For scalar inputs, we prefer to print as IS [NOT] NULL,
+				 * which is shorter and traditional.  If it's a rowtype input
+				 * but we're applying a scalar test, must print IS [NOT]
+				 * DISTINCT FROM NULL to be semantically correct.
+				 */
+				if (ntest->argisrow ||
+					!type_is_rowtype(exprType((Node *) ntest->arg)))
 				{
-					case IS_NULL:
-						appendStringInfoString(buf, " IS NULL");
-						break;
-					case IS_NOT_NULL:
-						appendStringInfoString(buf, " IS NOT NULL");
-						break;
-					default:
-						elog(ERROR, "unrecognized nulltesttype: %d",
-							 (int) ntest->nulltesttype);
+					switch (ntest->nulltesttype)
+					{
+						case IS_NULL:
+							appendStringInfoString(buf, " IS NULL");
+							break;
+						case IS_NOT_NULL:
+							appendStringInfoString(buf, " IS NOT NULL");
+							break;
+						default:
+							elog(ERROR, "unrecognized nulltesttype: %d",
+								 (int) ntest->nulltesttype);
+					}
+				}
+				else
+				{
+					switch (ntest->nulltesttype)
+					{
+						case IS_NULL:
+							appendStringInfoString(buf, " IS NOT DISTINCT FROM NULL");
+							break;
+						case IS_NOT_NULL:
+							appendStringInfoString(buf, " IS DISTINCT FROM NULL");
+							break;
+						default:
+							elog(ERROR, "unrecognized nulltesttype: %d",
+								 (int) ntest->nulltesttype);
+					}
 				}
 				if (!PRETTY_PAREN(context))
 					appendStringInfoChar(buf, ')');
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index c8b1f907a8..a8a2c4e4e7 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1076,8 +1076,16 @@ typedef struct XmlExpr
  * NullTest represents the operation of testing a value for NULLness.
  * The appropriate test is performed and returned as a boolean Datum.
  *
- * NOTE: the semantics of this for rowtype inputs are noticeably different
- * from the scalar case.  We provide an "argisrow" flag to reflect that.
+ * When argisrow is false, this simply represents a test for the null value.
+ *
+ * When argisrow is true, the input expression must yield a rowtype, and
+ * the node implements "row IS [NOT] NULL" per the SQL standard.  This
+ * includes checking individual fields for NULLness when the row datum
+ * itself isn't NULL.
+ *
+ * NOTE: the combination of a rowtype input and argisrow==false does NOT
+ * correspond to the SQL notation "row IS [NOT] NULL"; instead, this case
+ * represents the SQL notation "row IS [NOT] DISTINCT FROM NULL".
  * ----------------
  */
 
@@ -1091,7 +1099,7 @@ typedef struct NullTest
 	Expr		xpr;
 	Expr	   *arg;			/* input expression */
 	NullTestType nulltesttype;	/* IS NULL, IS NOT NULL */
-	bool		argisrow;		/* T if input is of a composite type */
+	bool		argisrow;		/* T to perform field-by-field null checks */
 	int			location;		/* token location, or -1 if unknown */
 } NullTest;
 
diff --git a/src/test/regress/expected/rowtypes.out b/src/test/regress/expected/rowtypes.out
index 2971640b4b..25b08281c8 100644
--- a/src/test/regress/expected/rowtypes.out
+++ b/src/test/regress/expected/rowtypes.out
@@ -664,10 +664,10 @@ explain (verbose, costs off)
 select r, r is null as isnull, r is not null as isnotnull
 from (values (1,row(1,2)), (1,row(null,null)), (1,null),
              (null,row(1,2)), (null,row(null,null)), (null,null) ) r(a,b);
-                                                                                           QUERY PLAN                                                                                            
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                                                         QUERY PLAN                                                                                                          
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Values Scan on "*VALUES*"
-   Output: ROW("*VALUES*".column1, "*VALUES*".column2), (("*VALUES*".column1 IS NULL) AND ("*VALUES*".column2 IS NULL)), (("*VALUES*".column1 IS NOT NULL) AND ("*VALUES*".column2 IS NOT NULL))
+   Output: ROW("*VALUES*".column1, "*VALUES*".column2), (("*VALUES*".column1 IS NULL) AND ("*VALUES*".column2 IS NOT DISTINCT FROM NULL)), (("*VALUES*".column1 IS NOT NULL) AND ("*VALUES*".column2 IS DISTINCT FROM NULL))
 (2 rows)
 
 select r, r is null as isnull, r is not null as isnotnull