From e502150f7d0be41e3c8784be007fa871a32d8a7f Mon Sep 17 00:00:00 2001
From: David Rowley <drowley@postgresql.org>
Date: Wed, 24 Nov 2021 10:06:59 +1300
Subject: [PATCH] Allow Memoize to operate in binary comparison mode

Memoize would always use the hash equality operator for the cache key
types to determine if the current set of parameters were the same as some
previously cached set.  Certain types such as floating points where -0.0
and +0.0 differ in their binary representation but are classed as equal by
the hash equality operator may cause problems as unless the join uses the
same operator it's possible that whichever join operator is being used
would be able to distinguish the two values.  In which case we may
accidentally return in the incorrect rows out of the cache.

To fix this here we add a binary mode to Memoize to allow it to the
current set of parameters to previously cached values by comparing
bit-by-bit rather than logically using the hash equality operator.  This
binary mode is always used for LATERAL joins and it's used for normal
joins when any of the join operators are not hashable.

Reported-by: Tom Lane
Author: David Rowley
Discussion: https://postgr.es/m/3004308.1632952496@sss.pgh.pa.us
Backpatch-through: 14, where Memoize was added
---
 .../postgres_fdw/expected/postgres_fdw.out    |  3 +-
 src/backend/commands/explain.c                |  3 +
 src/backend/executor/nodeMemoize.c            | 94 +++++++++++++++----
 src/backend/nodes/copyfuncs.c                 |  1 +
 src/backend/nodes/outfuncs.c                  |  2 +
 src/backend/nodes/readfuncs.c                 |  1 +
 src/backend/optimizer/path/joinpath.c         | 38 +++++++-
 src/backend/optimizer/plan/createplan.c       | 10 +-
 src/backend/optimizer/util/pathnode.c         |  4 +-
 src/backend/utils/adt/datum.c                 | 52 ++++++++++
 src/include/nodes/execnodes.h                 |  2 +
 src/include/nodes/pathnodes.h                 |  2 +
 src/include/nodes/plannodes.h                 |  2 +
 src/include/optimizer/pathnode.h              |  1 +
 src/include/utils/datum.h                     |  8 ++
 src/test/regress/expected/join.out            | 28 ++++--
 src/test/regress/expected/memoize.out         | 93 +++++++++++++++++-
 src/test/regress/expected/subselect.out       |  3 +-
 src/test/regress/sql/memoize.sql              | 39 ++++++++
 19 files changed, 346 insertions(+), 40 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 786781db4b..5196e4797a 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2247,6 +2247,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
                Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
          ->  Memoize
                Cache Key: t1.c2
+               Cache Mode: binary
                ->  Subquery Scan on q
                      ->  HashAggregate
                            Output: t2.c1, t3.c1
@@ -2255,7 +2256,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
                                  Output: t2.c1, t3.c1
                                  Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
                                  Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")) AND ((r1.c2 = $1::integer))))
-(16 rows)
+(17 rows)
 
 SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
  C 1 
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 10644dfac4..09f5253abb 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3127,11 +3127,14 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
 	if (es->format != EXPLAIN_FORMAT_TEXT)
 	{
 		ExplainPropertyText("Cache Key", keystr.data, es);
+		ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es);
 	}
 	else
 	{
 		ExplainIndentText(es);
 		appendStringInfo(es->str, "Cache Key: %s\n", keystr.data);
+		ExplainIndentText(es);
+		appendStringInfo(es->str, "Cache Mode: %s\n", mstate->binary_mode ? "binary" : "logical");
 	}
 
 	pfree(keystr.data);
diff --git a/src/backend/executor/nodeMemoize.c b/src/backend/executor/nodeMemoize.c
index bec588b3a0..683502dd90 100644
--- a/src/backend/executor/nodeMemoize.c
+++ b/src/backend/executor/nodeMemoize.c
@@ -71,6 +71,7 @@
 #include "executor/nodeMemoize.h"
 #include "lib/ilist.h"
 #include "miscadmin.h"
+#include "utils/datum.h"
 #include "utils/lsyscache.h"
 
 /* States of the ExecMemoize state machine */
@@ -131,7 +132,7 @@ typedef struct MemoizeEntry
 
 static uint32 MemoizeHash_hash(struct memoize_hash *tb,
 							   const MemoizeKey *key);
-static int	MemoizeHash_equal(struct memoize_hash *tb,
+static bool MemoizeHash_equal(struct memoize_hash *tb,
 							  const MemoizeKey *params1,
 							  const MemoizeKey *params2);
 
@@ -140,7 +141,7 @@ static int	MemoizeHash_equal(struct memoize_hash *tb,
 #define SH_KEY_TYPE MemoizeKey *
 #define SH_KEY key
 #define SH_HASH_KEY(tb, key) MemoizeHash_hash(tb, key)
-#define SH_EQUAL(tb, a, b) (MemoizeHash_equal(tb, a, b) == 0)
+#define SH_EQUAL(tb, a, b) MemoizeHash_equal(tb, a, b)
 #define SH_SCOPE static inline
 #define SH_STORE_HASH
 #define SH_GET_HASH(tb, a) a->hash
@@ -160,21 +161,45 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key)
 	TupleTableSlot *pslot = mstate->probeslot;
 	uint32		hashkey = 0;
 	int			numkeys = mstate->nkeys;
-	FmgrInfo   *hashfunctions = mstate->hashfunctions;
-	Oid		   *collations = mstate->collations;
 
-	for (int i = 0; i < numkeys; i++)
+	if (mstate->binary_mode)
 	{
-		/* rotate hashkey left 1 bit at each step */
-		hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
-
-		if (!pslot->tts_isnull[i])	/* treat nulls as having hash key 0 */
+		for (int i = 0; i < numkeys; i++)
 		{
-			uint32		hkey;
+			/* rotate hashkey left 1 bit at each step */
+			hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
 
-			hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
-													collations[i], pslot->tts_values[i]));
-			hashkey ^= hkey;
+			if (!pslot->tts_isnull[i])	/* treat nulls as having hash key 0 */
+			{
+				FormData_pg_attribute *attr;
+				uint32		hkey;
+
+				attr = &pslot->tts_tupleDescriptor->attrs[i];
+
+				hkey = datum_image_hash(pslot->tts_values[i], attr->attbyval, attr->attlen);
+
+				hashkey ^= hkey;
+			}
+		}
+	}
+	else
+	{
+		FmgrInfo   *hashfunctions = mstate->hashfunctions;
+		Oid		   *collations = mstate->collations;
+
+		for (int i = 0; i < numkeys; i++)
+		{
+			/* rotate hashkey left 1 bit at each step */
+			hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+
+			if (!pslot->tts_isnull[i])	/* treat nulls as having hash key 0 */
+			{
+				uint32		hkey;
+
+				hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
+														collations[i], pslot->tts_values[i]));
+				hashkey ^= hkey;
+			}
 		}
 	}
 
@@ -187,7 +212,7 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key)
  *		table lookup.  'key2' is never used.  Instead the MemoizeState's
  *		probeslot is always populated with details of what's being looked up.
  */
-static int
+static bool
 MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1,
 				  const MemoizeKey *key2)
 {
@@ -199,9 +224,38 @@ MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1,
 	/* probeslot should have already been prepared by prepare_probe_slot() */
 	ExecStoreMinimalTuple(key1->params, tslot, false);
 
-	econtext->ecxt_innertuple = tslot;
-	econtext->ecxt_outertuple = pslot;
-	return !ExecQualAndReset(mstate->cache_eq_expr, econtext);
+	if (mstate->binary_mode)
+	{
+		int			numkeys = mstate->nkeys;
+
+		slot_getallattrs(tslot);
+		slot_getallattrs(pslot);
+
+		for (int i = 0; i < numkeys; i++)
+		{
+			FormData_pg_attribute *attr;
+
+			if (tslot->tts_isnull[i] != pslot->tts_isnull[i])
+				return false;
+
+			/* both NULL? they're equal */
+			if (tslot->tts_isnull[i])
+				continue;
+
+			/* perform binary comparison on the two datums */
+			attr = &tslot->tts_tupleDescriptor->attrs[i];
+			if (!datum_image_eq(tslot->tts_values[i], pslot->tts_values[i],
+								attr->attbyval, attr->attlen))
+				return false;
+		}
+		return true;
+	}
+	else
+	{
+		econtext->ecxt_innertuple = tslot;
+		econtext->ecxt_outertuple = pslot;
+		return ExecQualAndReset(mstate->cache_eq_expr, econtext);
+	}
 }
 
 /*
@@ -926,6 +980,12 @@ ExecInitMemoize(Memoize *node, EState *estate, int eflags)
 	 */
 	mstate->singlerow = node->singlerow;
 
+	/*
+	 * Record if the cache keys should be compared bit by bit, or logically
+	 * using the type's hash equality operator
+	 */
+	mstate->binary_mode = node->binary_mode;
+
 	/* Zero the statistics counters */
 	memset(&mstate->stats, 0, sizeof(MemoizeInstrumentation));
 
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index ad1ea2ff2f..7d55fd69ab 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -971,6 +971,7 @@ _copyMemoize(const Memoize *from)
 	COPY_POINTER_FIELD(collations, sizeof(Oid) * from->numKeys);
 	COPY_NODE_FIELD(param_exprs);
 	COPY_SCALAR_FIELD(singlerow);
+	COPY_SCALAR_FIELD(binary_mode);
 	COPY_SCALAR_FIELD(est_entries);
 
 	return newnode;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 23f23f11dc..be374a0d70 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -866,6 +866,7 @@ _outMemoize(StringInfo str, const Memoize *node)
 	WRITE_OID_ARRAY(collations, node->numKeys);
 	WRITE_NODE_FIELD(param_exprs);
 	WRITE_BOOL_FIELD(singlerow);
+	WRITE_BOOL_FIELD(binary_mode);
 	WRITE_UINT_FIELD(est_entries);
 }
 
@@ -1966,6 +1967,7 @@ _outMemoizePath(StringInfo str, const MemoizePath *node)
 	WRITE_NODE_FIELD(hash_operators);
 	WRITE_NODE_FIELD(param_exprs);
 	WRITE_BOOL_FIELD(singlerow);
+	WRITE_BOOL_FIELD(binary_mode);
 	WRITE_FLOAT_FIELD(calls, "%.0f");
 	WRITE_UINT_FIELD(est_entries);
 }
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index abf08b7a2f..a82c53ec0d 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -2230,6 +2230,7 @@ _readMemoize(void)
 	READ_OID_ARRAY(collations, local_node->numKeys);
 	READ_NODE_FIELD(param_exprs);
 	READ_BOOL_FIELD(singlerow);
+	READ_BOOL_FIELD(binary_mode);
 	READ_UINT_FIELD(est_entries);
 
 	READ_DONE();
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 0f3ad8aa65..322460e968 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -371,19 +371,21 @@ allow_star_schema_join(PlannerInfo *root,
  *		Returns true the hashing is possible, otherwise return false.
  *
  * Additionally we also collect the outer exprs and the hash operators for
- * each parameter to innerrel.  These set in 'param_exprs' and 'operators'
- * when we return true.
+ * each parameter to innerrel.  These set in 'param_exprs', 'operators' and
+ * 'binary_mode' when we return true.
  */
 static bool
 paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
 							RelOptInfo *outerrel, RelOptInfo *innerrel,
-							List **param_exprs, List **operators)
+							List **param_exprs, List **operators,
+							bool *binary_mode)
 
 {
 	ListCell   *lc;
 
 	*param_exprs = NIL;
 	*operators = NIL;
+	*binary_mode = false;
 
 	if (param_info != NULL)
 	{
@@ -431,6 +433,20 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
 
 			*operators = lappend_oid(*operators, hasheqoperator);
 			*param_exprs = lappend(*param_exprs, expr);
+
+			/*
+			 * When the join operator is not hashable then it's possible that
+			 * the operator will be able to distinguish something that the
+			 * hash equality operator could not. For example with floating
+			 * point types -0.0 and +0.0 are classed as equal by the hash
+			 * function and equality function, but some other operator may be
+			 * able to tell those values apart.  This means that we must put
+			 * memoize into binary comparison mode so that it does bit-by-bit
+			 * comparisons rather than a "logical" comparison as it would
+			 * using the hash equality operator.
+			 */
+			if (!OidIsValid(rinfo->hashjoinoperator))
+				*binary_mode = true;
 		}
 	}
 
@@ -461,6 +477,17 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
 
 		*operators = lappend_oid(*operators, typentry->eq_opr);
 		*param_exprs = lappend(*param_exprs, expr);
+
+		/*
+		 * We must go into binary mode as we don't have too much of an idea of
+		 * how these lateral Vars are being used.  See comment above when we
+		 * set *binary_mode for the non-lateral Var case. This could be
+		 * relaxed a bit if we had the RestrictInfos and knew the operators
+		 * being used, however for cases like Vars that are arguments to
+		 * functions we must operate in binary mode as we don't have
+		 * visibility into what the function is doing with the Vars.
+		 */
+		*binary_mode = true;
 	}
 
 	/* We're okay to use memoize */
@@ -481,6 +508,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
 	List	   *param_exprs;
 	List	   *hash_operators;
 	ListCell   *lc;
+	bool		binary_mode;
 
 	/* Obviously not if it's disabled */
 	if (!enable_memoize)
@@ -572,7 +600,8 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
 									outerrel,
 									innerrel,
 									&param_exprs,
-									&hash_operators))
+									&hash_operators,
+									&binary_mode))
 	{
 		return (Path *) create_memoize_path(root,
 											innerrel,
@@ -580,6 +609,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
 											param_exprs,
 											hash_operators,
 											extra->inner_unique,
+											binary_mode,
 											outer_path->parent->rows);
 	}
 
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 3dc0176a51..866f19f64c 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -279,7 +279,8 @@ static Sort *make_sort_from_groupcols(List *groupcls,
 static Material *make_material(Plan *lefttree);
 static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
 							 Oid *collations, List *param_exprs,
-							 bool singlerow, uint32 est_entries);
+							 bool singlerow, bool binary_mode,
+							 uint32 est_entries);
 static WindowAgg *make_windowagg(List *tlist, Index winref,
 								 int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
 								 int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
@@ -1617,7 +1618,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
 	}
 
 	plan = make_memoize(subplan, operators, collations, param_exprs,
-						best_path->singlerow, best_path->est_entries);
+						best_path->singlerow, best_path->binary_mode,
+						best_path->est_entries);
 
 	copy_generic_path_info(&plan->plan, (Path *) best_path);
 
@@ -6417,7 +6419,8 @@ materialize_finished_plan(Plan *subplan)
 
 static Memoize *
 make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
-			 List *param_exprs, bool singlerow, uint32 est_entries)
+			 List *param_exprs, bool singlerow, bool binary_mode,
+			 uint32 est_entries)
 {
 	Memoize    *node = makeNode(Memoize);
 	Plan	   *plan = &node->plan;
@@ -6432,6 +6435,7 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
 	node->collations = collations;
 	node->param_exprs = param_exprs;
 	node->singlerow = singlerow;
+	node->binary_mode = binary_mode;
 	node->est_entries = est_entries;
 
 	return node;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e53d381e19..af5e8df26b 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1583,7 +1583,7 @@ create_material_path(RelOptInfo *rel, Path *subpath)
 MemoizePath *
 create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 					List *param_exprs, List *hash_operators,
-					bool singlerow, double calls)
+					bool singlerow, bool binary_mode, double calls)
 {
 	MemoizePath *pathnode = makeNode(MemoizePath);
 
@@ -1603,6 +1603,7 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	pathnode->hash_operators = hash_operators;
 	pathnode->param_exprs = param_exprs;
 	pathnode->singlerow = singlerow;
+	pathnode->binary_mode = binary_mode;
 	pathnode->calls = calls;
 
 	/*
@@ -3942,6 +3943,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
 													mpath->param_exprs,
 													mpath->hash_operators,
 													mpath->singlerow,
+													mpath->binary_mode,
 													mpath->calls);
 			}
 		default:
diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c
index 6a317fc0a6..2f22939574 100644
--- a/src/backend/utils/adt/datum.c
+++ b/src/backend/utils/adt/datum.c
@@ -43,6 +43,7 @@
 #include "postgres.h"
 
 #include "access/detoast.h"
+#include "common/hashfn.h"
 #include "fmgr.h"
 #include "utils/builtins.h"
 #include "utils/datum.h"
@@ -324,6 +325,57 @@ datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
 	return result;
 }
 
+/*-------------------------------------------------------------------------
+ * datum_image_hash
+ *
+ * Generate a hash value based on the binary representation of 'value'.  Most
+ * use cases will want to use the hash function specific to the Datum's type,
+ * however, some corner cases require generating a hash value based on the
+ * actual bits rather than the logical value.
+ *-------------------------------------------------------------------------
+ */
+uint32
+datum_image_hash(Datum value, bool typByVal, int typLen)
+{
+	Size		len;
+	uint32		result;
+
+	if (typByVal)
+		result = hash_bytes((unsigned char *) &value, sizeof(Datum));
+	else if (typLen > 0)
+		result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen);
+	else if (typLen == -1)
+	{
+		struct varlena *val;
+
+		len = toast_raw_datum_size(value);
+
+		val = PG_DETOAST_DATUM_PACKED(value);
+
+		result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ);
+
+		/* Only free memory if it's a copy made here. */
+		if ((Pointer) val != (Pointer) value)
+			pfree(val);
+	}
+	else if (typLen == -2)
+	{
+		char	   *s;
+
+		s = DatumGetCString(value);
+		len = strlen(s) + 1;
+
+		result = hash_bytes((unsigned char *) s, len);
+	}
+	else
+	{
+		elog(ERROR, "unexpected typLen: %d", typLen);
+		result = 0;				/* keep compiler quiet */
+	}
+
+	return result;
+}
+
 /*-------------------------------------------------------------------------
  * btequalimage
  *
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 2e8cbee69f..d96ace32e4 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2109,6 +2109,8 @@ typedef struct MemoizeState
 								 * NULL if 'last_tuple' is NULL. */
 	bool		singlerow;		/* true if the cache entry is to be marked as
 								 * complete after caching the first tuple. */
+	bool		binary_mode;	/* true when cache key should be compared bit
+								 * by bit, false when using hash equality ops */
 	MemoizeInstrumentation stats;	/* execution statistics */
 	SharedMemoizeInfo *shared_info; /* statistics for parallel workers */
 } MemoizeState;
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 186e89905b..324d92880b 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1515,6 +1515,8 @@ typedef struct MemoizePath
 	List	   *param_exprs;	/* cache keys */
 	bool		singlerow;		/* true if the cache entry is to be marked as
 								 * complete after caching the first record. */
+	bool		binary_mode;	/* true when cache key should be compared bit
+								 * by bit, false when using hash equality ops */
 	Cardinality	calls;			/* expected number of rescans */
 	uint32		est_entries;	/* The maximum number of entries that the
 								 * planner expects will fit in the cache, or 0
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 01a246d50e..f1328be354 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -799,6 +799,8 @@ typedef struct Memoize
 	bool		singlerow;		/* true if the cache entry should be marked as
 								 * complete after we store the first tuple in
 								 * it. */
+	bool		binary_mode;	/* true when cache key should be compared bit
+								 * by bit, false when using hash equality ops */
 	uint32		est_entries;	/* The maximum number of entries that the
 								 * planner expects will fit in the cache, or 0
 								 * if unknown */
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index f704d39980..2922c0cdc1 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -88,6 +88,7 @@ extern MemoizePath *create_memoize_path(PlannerInfo *root,
 										List *param_exprs,
 										List *hash_operators,
 										bool singlerow,
+										bool binary_mode,
 										double calls);
 extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
 									  Path *subpath, SpecialJoinInfo *sjinfo);
diff --git a/src/include/utils/datum.h b/src/include/utils/datum.h
index d4cf62bed7..8a59f11006 100644
--- a/src/include/utils/datum.h
+++ b/src/include/utils/datum.h
@@ -55,6 +55,14 @@ extern bool datumIsEqual(Datum value1, Datum value2,
 extern bool datum_image_eq(Datum value1, Datum value2,
 						   bool typByVal, int typLen);
 
+/*
+ * datum_image_hash
+ *
+ * Generates hash value for 'value' based on its bits rather than logical
+ * value.
+ */
+extern uint32 datum_image_hash(Datum value, bool typByVal, int typLen);
+
 /*
  * Serialize and restore datums so that we can transfer them to parallel
  * workers.
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 84331659e7..d5b5b775fd 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3686,9 +3686,10 @@ where t1.unique1 = 1;
                      Index Cond: (hundred = t1.hundred)
          ->  Memoize
                Cache Key: t2.thousand
+               Cache Mode: logical
                ->  Index Scan using tenk1_unique2 on tenk1 t3
                      Index Cond: (unique2 = t2.thousand)
-(13 rows)
+(14 rows)
 
 explain (costs off)
 select * from tenk1 t1 left join
@@ -3708,9 +3709,10 @@ where t1.unique1 = 1;
                      Index Cond: (hundred = t1.hundred)
          ->  Memoize
                Cache Key: t2.thousand
+               Cache Mode: logical
                ->  Index Scan using tenk1_unique2 on tenk1 t3
                      Index Cond: (unique2 = t2.thousand)
-(13 rows)
+(14 rows)
 
 explain (costs off)
 select count(*) from
@@ -4238,11 +4240,12 @@ where t1.f1 = ss.f1;
    ->  Memoize
          Output: (i8.q1), t2.f1
          Cache Key: i8.q1
+         Cache Mode: binary
          ->  Limit
                Output: (i8.q1), t2.f1
                ->  Seq Scan on public.text_tbl t2
                      Output: i8.q1, t2.f1
-(19 rows)
+(20 rows)
 
 select * from
   text_tbl t1
@@ -4282,6 +4285,7 @@ where t1.f1 = ss2.f1;
          ->  Memoize
                Output: (i8.q1), t2.f1
                Cache Key: i8.q1
+               Cache Mode: binary
                ->  Limit
                      Output: (i8.q1), t2.f1
                      ->  Seq Scan on public.text_tbl t2
@@ -4289,11 +4293,12 @@ where t1.f1 = ss2.f1;
    ->  Memoize
          Output: ((i8.q1)), (t2.f1)
          Cache Key: (i8.q1), t2.f1
+         Cache Mode: binary
          ->  Limit
                Output: ((i8.q1)), (t2.f1)
                ->  Seq Scan on public.text_tbl t3
                      Output: (i8.q1), t2.f1
-(28 rows)
+(30 rows)
 
 select * from
   text_tbl t1
@@ -4342,6 +4347,7 @@ where tt1.f1 = ss1.c0;
    ->  Memoize
          Output: ss1.c0
          Cache Key: tt4.f1
+         Cache Mode: binary
          ->  Subquery Scan on ss1
                Output: ss1.c0
                Filter: (ss1.c0 = 'foo'::text)
@@ -4349,7 +4355,7 @@ where tt1.f1 = ss1.c0;
                      Output: (tt4.f1)
                      ->  Seq Scan on public.text_tbl tt5
                            Output: tt4.f1
-(32 rows)
+(33 rows)
 
 select 1 from
   text_tbl as tt1
@@ -5058,8 +5064,9 @@ explain (costs off)
          ->  Seq Scan on tenk1 a
          ->  Memoize
                Cache Key: a.two
+               Cache Mode: binary
                ->  Function Scan on generate_series g
-(6 rows)
+(7 rows)
 
 explain (costs off)
   select count(*) from tenk1 a cross join lateral generate_series(1,two) g;
@@ -5070,8 +5077,9 @@ explain (costs off)
          ->  Seq Scan on tenk1 a
          ->  Memoize
                Cache Key: a.two
+               Cache Mode: binary
                ->  Function Scan on generate_series g
-(6 rows)
+(7 rows)
 
 -- don't need the explicit LATERAL keyword for functions
 explain (costs off)
@@ -5083,8 +5091,9 @@ explain (costs off)
          ->  Seq Scan on tenk1 a
          ->  Memoize
                Cache Key: a.two
+               Cache Mode: binary
                ->  Function Scan on generate_series g
-(6 rows)
+(7 rows)
 
 -- lateral with UNION ALL subselect
 explain (costs off)
@@ -5145,9 +5154,10 @@ explain (costs off)
                ->  Values Scan on "*VALUES*"
          ->  Memoize
                Cache Key: "*VALUES*".column1
+               Cache Mode: logical
                ->  Index Only Scan using tenk1_unique2 on tenk1 b
                      Index Cond: (unique2 = "*VALUES*".column1)
-(9 rows)
+(10 rows)
 
 select count(*) from tenk1 a,
   tenk1 b join lateral (values(a.unique1),(-1)) ss(x) on b.unique2 = ss.x;
diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out
index 9a025c4a7a..0ed5d8474a 100644
--- a/src/test/regress/expected/memoize.out
+++ b/src/test/regress/expected/memoize.out
@@ -44,11 +44,12 @@ WHERE t2.unique1 < 1000;', false);
                Rows Removed by Filter: 9000
          ->  Memoize (actual rows=1 loops=N)
                Cache Key: t2.twenty
+               Cache Mode: logical
                Hits: 980  Misses: 20  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1 loops=N)
                      Index Cond: (unique1 = t2.twenty)
                      Heap Fetches: N
-(11 rows)
+(12 rows)
 
 -- And check we get the expected results.
 SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1
@@ -73,11 +74,12 @@ WHERE t1.unique1 < 1000;', false);
                Rows Removed by Filter: 9000
          ->  Memoize (actual rows=1 loops=N)
                Cache Key: t1.twenty
+               Cache Mode: logical
                Hits: 980  Misses: 20  Evictions: Zero  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1 loops=N)
                      Index Cond: (unique1 = t1.twenty)
                      Heap Fetches: N
-(11 rows)
+(12 rows)
 
 -- And check we get the expected results.
 SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
@@ -107,12 +109,94 @@ WHERE t2.unique1 < 1200;', true);
                Rows Removed by Filter: 8800
          ->  Memoize (actual rows=1 loops=N)
                Cache Key: t2.thousand
+               Cache Mode: logical
                Hits: N  Misses: N  Evictions: N  Overflows: 0  Memory Usage: NkB
                ->  Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1 loops=N)
                      Index Cond: (unique1 = t2.thousand)
                      Heap Fetches: N
-(11 rows)
+(12 rows)
 
+CREATE TABLE flt (f float);
+CREATE INDEX flt_f_idx ON flt (f);
+INSERT INTO flt VALUES('-0.0'::float),('+0.0'::float);
+ANALYZE flt;
+SET enable_seqscan TO off;
+-- Ensure memoize operates in logical mode
+SELECT explain_memoize('
+SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false);
+                                explain_memoize                                
+-------------------------------------------------------------------------------
+ Nested Loop (actual rows=4 loops=N)
+   ->  Index Only Scan using flt_f_idx on flt f1 (actual rows=2 loops=N)
+         Heap Fetches: N
+   ->  Memoize (actual rows=2 loops=N)
+         Cache Key: f1.f
+         Cache Mode: logical
+         Hits: 1  Misses: 1  Evictions: Zero  Overflows: 0  Memory Usage: NkB
+         ->  Index Only Scan using flt_f_idx on flt f2 (actual rows=2 loops=N)
+               Index Cond: (f = f1.f)
+               Heap Fetches: N
+(10 rows)
+
+-- Ensure memoize operates in binary mode
+SELECT explain_memoize('
+SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f >= f2.f;', false);
+                                explain_memoize                                
+-------------------------------------------------------------------------------
+ Nested Loop (actual rows=4 loops=N)
+   ->  Index Only Scan using flt_f_idx on flt f1 (actual rows=2 loops=N)
+         Heap Fetches: N
+   ->  Memoize (actual rows=2 loops=N)
+         Cache Key: f1.f
+         Cache Mode: binary
+         Hits: 0  Misses: 2  Evictions: Zero  Overflows: 0  Memory Usage: NkB
+         ->  Index Only Scan using flt_f_idx on flt f2 (actual rows=2 loops=N)
+               Index Cond: (f <= f1.f)
+               Heap Fetches: N
+(10 rows)
+
+DROP TABLE flt;
+-- Exercise Memoize in binary mode with a large fixed width type and a
+-- varlena type.
+CREATE TABLE strtest (n name, t text);
+CREATE INDEX strtest_n_idx ON strtest (n);
+CREATE INDEX strtest_t_idx ON strtest (t);
+INSERT INTO strtest VALUES('one','one'),('two','two'),('three',repeat(md5('three'),100));
+-- duplicate rows so we get some cache hits
+INSERT INTO strtest SELECT * FROM strtest;
+ANALYZE strtest;
+-- Ensure we get 3 hits and 3 misses
+SELECT explain_memoize('
+SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.n >= s2.n;', false);
+                                 explain_memoize                                  
+----------------------------------------------------------------------------------
+ Nested Loop (actual rows=24 loops=N)
+   ->  Seq Scan on strtest s1 (actual rows=6 loops=N)
+   ->  Memoize (actual rows=4 loops=N)
+         Cache Key: s1.n
+         Cache Mode: binary
+         Hits: 3  Misses: 3  Evictions: Zero  Overflows: 0  Memory Usage: NkB
+         ->  Index Scan using strtest_n_idx on strtest s2 (actual rows=4 loops=N)
+               Index Cond: (n <= s1.n)
+(8 rows)
+
+-- Ensure we get 3 hits and 3 misses
+SELECT explain_memoize('
+SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false);
+                                 explain_memoize                                  
+----------------------------------------------------------------------------------
+ Nested Loop (actual rows=24 loops=N)
+   ->  Seq Scan on strtest s1 (actual rows=6 loops=N)
+   ->  Memoize (actual rows=4 loops=N)
+         Cache Key: s1.t
+         Cache Mode: binary
+         Hits: 3  Misses: 3  Evictions: Zero  Overflows: 0  Memory Usage: NkB
+         ->  Index Scan using strtest_t_idx on strtest s2 (actual rows=4 loops=N)
+               Index Cond: (t <= s1.t)
+(8 rows)
+
+DROP TABLE strtest;
+RESET enable_seqscan;
 RESET enable_mergejoin;
 RESET work_mem;
 RESET enable_bitmapscan;
@@ -140,9 +224,10 @@ WHERE t1.unique1 < 1000;
                                  Index Cond: (unique1 < 1000)
                      ->  Memoize
                            Cache Key: t1.twenty
+                           Cache Mode: logical
                            ->  Index Only Scan using tenk1_unique1 on tenk1 t2
                                  Index Cond: (unique1 = t1.twenty)
-(13 rows)
+(14 rows)
 
 -- And ensure the parallel plan gives us the correct results.
 SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1,
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 0742626033..4e8ddc7061 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1139,13 +1139,14 @@ where o.ten = 1;
                Filter: (ten = 1)
          ->  Memoize
                Cache Key: o.four
+               Cache Mode: binary
                ->  CTE Scan on x
                      CTE x
                        ->  Recursive Union
                              ->  Result
                              ->  WorkTable Scan on x x_1
                                    Filter: (a < 10)
-(12 rows)
+(13 rows)
 
 select sum(o.four), sum(ss.a) from
   onek o cross join lateral (
diff --git a/src/test/regress/sql/memoize.sql b/src/test/regress/sql/memoize.sql
index 548cc3eee3..3c7360adf9 100644
--- a/src/test/regress/sql/memoize.sql
+++ b/src/test/regress/sql/memoize.sql
@@ -65,6 +65,45 @@ SELECT explain_memoize('
 SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1
 INNER JOIN tenk1 t2 ON t1.unique1 = t2.thousand
 WHERE t2.unique1 < 1200;', true);
+
+CREATE TABLE flt (f float);
+CREATE INDEX flt_f_idx ON flt (f);
+INSERT INTO flt VALUES('-0.0'::float),('+0.0'::float);
+ANALYZE flt;
+
+SET enable_seqscan TO off;
+
+-- Ensure memoize operates in logical mode
+SELECT explain_memoize('
+SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false);
+
+-- Ensure memoize operates in binary mode
+SELECT explain_memoize('
+SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f >= f2.f;', false);
+
+DROP TABLE flt;
+
+-- Exercise Memoize in binary mode with a large fixed width type and a
+-- varlena type.
+CREATE TABLE strtest (n name, t text);
+CREATE INDEX strtest_n_idx ON strtest (n);
+CREATE INDEX strtest_t_idx ON strtest (t);
+INSERT INTO strtest VALUES('one','one'),('two','two'),('three',repeat(md5('three'),100));
+-- duplicate rows so we get some cache hits
+INSERT INTO strtest SELECT * FROM strtest;
+ANALYZE strtest;
+
+-- Ensure we get 3 hits and 3 misses
+SELECT explain_memoize('
+SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.n >= s2.n;', false);
+
+-- Ensure we get 3 hits and 3 misses
+SELECT explain_memoize('
+SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false);
+
+DROP TABLE strtest;
+
+RESET enable_seqscan;
 RESET enable_mergejoin;
 RESET work_mem;
 RESET enable_bitmapscan;