diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 786781db4b..5196e4797a 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -2247,6 +2247,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8 -> Memoize Cache Key: t1.c2 + Cache Mode: binary -> Subquery Scan on q -> HashAggregate Output: t2.c1, t3.c1 @@ -2255,7 +2256,7 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM Output: t2.c1, t3.c1 Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3) Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")) AND ((r1.c2 = $1::integer)))) -(16 rows) +(17 rows) SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10; C 1 diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 10644dfac4..09f5253abb 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -3127,11 +3127,14 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es) if (es->format != EXPLAIN_FORMAT_TEXT) { ExplainPropertyText("Cache Key", keystr.data, es); + ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es); } else { ExplainIndentText(es); appendStringInfo(es->str, "Cache Key: %s\n", keystr.data); + ExplainIndentText(es); + appendStringInfo(es->str, "Cache Mode: %s\n", mstate->binary_mode ? "binary" : "logical"); } pfree(keystr.data); diff --git a/src/backend/executor/nodeMemoize.c b/src/backend/executor/nodeMemoize.c index bec588b3a0..683502dd90 100644 --- a/src/backend/executor/nodeMemoize.c +++ b/src/backend/executor/nodeMemoize.c @@ -71,6 +71,7 @@ #include "executor/nodeMemoize.h" #include "lib/ilist.h" #include "miscadmin.h" +#include "utils/datum.h" #include "utils/lsyscache.h" /* States of the ExecMemoize state machine */ @@ -131,7 +132,7 @@ typedef struct MemoizeEntry static uint32 MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key); -static int MemoizeHash_equal(struct memoize_hash *tb, +static bool MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *params1, const MemoizeKey *params2); @@ -140,7 +141,7 @@ static int MemoizeHash_equal(struct memoize_hash *tb, #define SH_KEY_TYPE MemoizeKey * #define SH_KEY key #define SH_HASH_KEY(tb, key) MemoizeHash_hash(tb, key) -#define SH_EQUAL(tb, a, b) (MemoizeHash_equal(tb, a, b) == 0) +#define SH_EQUAL(tb, a, b) MemoizeHash_equal(tb, a, b) #define SH_SCOPE static inline #define SH_STORE_HASH #define SH_GET_HASH(tb, a) a->hash @@ -160,21 +161,45 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key) TupleTableSlot *pslot = mstate->probeslot; uint32 hashkey = 0; int numkeys = mstate->nkeys; - FmgrInfo *hashfunctions = mstate->hashfunctions; - Oid *collations = mstate->collations; - for (int i = 0; i < numkeys; i++) + if (mstate->binary_mode) { - /* rotate hashkey left 1 bit at each step */ - hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); - - if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */ + for (int i = 0; i < numkeys; i++) { - uint32 hkey; + /* rotate hashkey left 1 bit at each step */ + hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); - hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], - collations[i], pslot->tts_values[i])); - hashkey ^= hkey; + if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */ + { + FormData_pg_attribute *attr; + uint32 hkey; + + attr = &pslot->tts_tupleDescriptor->attrs[i]; + + hkey = datum_image_hash(pslot->tts_values[i], attr->attbyval, attr->attlen); + + hashkey ^= hkey; + } + } + } + else + { + FmgrInfo *hashfunctions = mstate->hashfunctions; + Oid *collations = mstate->collations; + + for (int i = 0; i < numkeys; i++) + { + /* rotate hashkey left 1 bit at each step */ + hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); + + if (!pslot->tts_isnull[i]) /* treat nulls as having hash key 0 */ + { + uint32 hkey; + + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], + collations[i], pslot->tts_values[i])); + hashkey ^= hkey; + } } } @@ -187,7 +212,7 @@ MemoizeHash_hash(struct memoize_hash *tb, const MemoizeKey *key) * table lookup. 'key2' is never used. Instead the MemoizeState's * probeslot is always populated with details of what's being looked up. */ -static int +static bool MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1, const MemoizeKey *key2) { @@ -199,9 +224,38 @@ MemoizeHash_equal(struct memoize_hash *tb, const MemoizeKey *key1, /* probeslot should have already been prepared by prepare_probe_slot() */ ExecStoreMinimalTuple(key1->params, tslot, false); - econtext->ecxt_innertuple = tslot; - econtext->ecxt_outertuple = pslot; - return !ExecQualAndReset(mstate->cache_eq_expr, econtext); + if (mstate->binary_mode) + { + int numkeys = mstate->nkeys; + + slot_getallattrs(tslot); + slot_getallattrs(pslot); + + for (int i = 0; i < numkeys; i++) + { + FormData_pg_attribute *attr; + + if (tslot->tts_isnull[i] != pslot->tts_isnull[i]) + return false; + + /* both NULL? they're equal */ + if (tslot->tts_isnull[i]) + continue; + + /* perform binary comparison on the two datums */ + attr = &tslot->tts_tupleDescriptor->attrs[i]; + if (!datum_image_eq(tslot->tts_values[i], pslot->tts_values[i], + attr->attbyval, attr->attlen)) + return false; + } + return true; + } + else + { + econtext->ecxt_innertuple = tslot; + econtext->ecxt_outertuple = pslot; + return ExecQualAndReset(mstate->cache_eq_expr, econtext); + } } /* @@ -926,6 +980,12 @@ ExecInitMemoize(Memoize *node, EState *estate, int eflags) */ mstate->singlerow = node->singlerow; + /* + * Record if the cache keys should be compared bit by bit, or logically + * using the type's hash equality operator + */ + mstate->binary_mode = node->binary_mode; + /* Zero the statistics counters */ memset(&mstate->stats, 0, sizeof(MemoizeInstrumentation)); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index ad1ea2ff2f..7d55fd69ab 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -971,6 +971,7 @@ _copyMemoize(const Memoize *from) COPY_POINTER_FIELD(collations, sizeof(Oid) * from->numKeys); COPY_NODE_FIELD(param_exprs); COPY_SCALAR_FIELD(singlerow); + COPY_SCALAR_FIELD(binary_mode); COPY_SCALAR_FIELD(est_entries); return newnode; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 23f23f11dc..be374a0d70 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -866,6 +866,7 @@ _outMemoize(StringInfo str, const Memoize *node) WRITE_OID_ARRAY(collations, node->numKeys); WRITE_NODE_FIELD(param_exprs); WRITE_BOOL_FIELD(singlerow); + WRITE_BOOL_FIELD(binary_mode); WRITE_UINT_FIELD(est_entries); } @@ -1966,6 +1967,7 @@ _outMemoizePath(StringInfo str, const MemoizePath *node) WRITE_NODE_FIELD(hash_operators); WRITE_NODE_FIELD(param_exprs); WRITE_BOOL_FIELD(singlerow); + WRITE_BOOL_FIELD(binary_mode); WRITE_FLOAT_FIELD(calls, "%.0f"); WRITE_UINT_FIELD(est_entries); } diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index abf08b7a2f..a82c53ec0d 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -2230,6 +2230,7 @@ _readMemoize(void) READ_OID_ARRAY(collations, local_node->numKeys); READ_NODE_FIELD(param_exprs); READ_BOOL_FIELD(singlerow); + READ_BOOL_FIELD(binary_mode); READ_UINT_FIELD(est_entries); READ_DONE(); diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 0f3ad8aa65..322460e968 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -371,19 +371,21 @@ allow_star_schema_join(PlannerInfo *root, * Returns true the hashing is possible, otherwise return false. * * Additionally we also collect the outer exprs and the hash operators for - * each parameter to innerrel. These set in 'param_exprs' and 'operators' - * when we return true. + * each parameter to innerrel. These set in 'param_exprs', 'operators' and + * 'binary_mode' when we return true. */ static bool paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info, RelOptInfo *outerrel, RelOptInfo *innerrel, - List **param_exprs, List **operators) + List **param_exprs, List **operators, + bool *binary_mode) { ListCell *lc; *param_exprs = NIL; *operators = NIL; + *binary_mode = false; if (param_info != NULL) { @@ -431,6 +433,20 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info, *operators = lappend_oid(*operators, hasheqoperator); *param_exprs = lappend(*param_exprs, expr); + + /* + * When the join operator is not hashable then it's possible that + * the operator will be able to distinguish something that the + * hash equality operator could not. For example with floating + * point types -0.0 and +0.0 are classed as equal by the hash + * function and equality function, but some other operator may be + * able to tell those values apart. This means that we must put + * memoize into binary comparison mode so that it does bit-by-bit + * comparisons rather than a "logical" comparison as it would + * using the hash equality operator. + */ + if (!OidIsValid(rinfo->hashjoinoperator)) + *binary_mode = true; } } @@ -461,6 +477,17 @@ paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info, *operators = lappend_oid(*operators, typentry->eq_opr); *param_exprs = lappend(*param_exprs, expr); + + /* + * We must go into binary mode as we don't have too much of an idea of + * how these lateral Vars are being used. See comment above when we + * set *binary_mode for the non-lateral Var case. This could be + * relaxed a bit if we had the RestrictInfos and knew the operators + * being used, however for cases like Vars that are arguments to + * functions we must operate in binary mode as we don't have + * visibility into what the function is doing with the Vars. + */ + *binary_mode = true; } /* We're okay to use memoize */ @@ -481,6 +508,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, List *param_exprs; List *hash_operators; ListCell *lc; + bool binary_mode; /* Obviously not if it's disabled */ if (!enable_memoize) @@ -572,7 +600,8 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, outerrel, innerrel, ¶m_exprs, - &hash_operators)) + &hash_operators, + &binary_mode)) { return (Path *) create_memoize_path(root, innerrel, @@ -580,6 +609,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, param_exprs, hash_operators, extra->inner_unique, + binary_mode, outer_path->parent->rows); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 3dc0176a51..866f19f64c 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -279,7 +279,8 @@ static Sort *make_sort_from_groupcols(List *groupcls, static Material *make_material(Plan *lefttree); static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, List *param_exprs, - bool singlerow, uint32 est_entries); + bool singlerow, bool binary_mode, + uint32 est_entries); static WindowAgg *make_windowagg(List *tlist, Index winref, int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, @@ -1617,7 +1618,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags) } plan = make_memoize(subplan, operators, collations, param_exprs, - best_path->singlerow, best_path->est_entries); + best_path->singlerow, best_path->binary_mode, + best_path->est_entries); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -6417,7 +6419,8 @@ materialize_finished_plan(Plan *subplan) static Memoize * make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, - List *param_exprs, bool singlerow, uint32 est_entries) + List *param_exprs, bool singlerow, bool binary_mode, + uint32 est_entries) { Memoize *node = makeNode(Memoize); Plan *plan = &node->plan; @@ -6432,6 +6435,7 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations, node->collations = collations; node->param_exprs = param_exprs; node->singlerow = singlerow; + node->binary_mode = binary_mode; node->est_entries = est_entries; return node; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e53d381e19..af5e8df26b 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1583,7 +1583,7 @@ create_material_path(RelOptInfo *rel, Path *subpath) MemoizePath * create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *param_exprs, List *hash_operators, - bool singlerow, double calls) + bool singlerow, bool binary_mode, double calls) { MemoizePath *pathnode = makeNode(MemoizePath); @@ -1603,6 +1603,7 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, pathnode->hash_operators = hash_operators; pathnode->param_exprs = param_exprs; pathnode->singlerow = singlerow; + pathnode->binary_mode = binary_mode; pathnode->calls = calls; /* @@ -3942,6 +3943,7 @@ reparameterize_path(PlannerInfo *root, Path *path, mpath->param_exprs, mpath->hash_operators, mpath->singlerow, + mpath->binary_mode, mpath->calls); } default: diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c index 6a317fc0a6..2f22939574 100644 --- a/src/backend/utils/adt/datum.c +++ b/src/backend/utils/adt/datum.c @@ -43,6 +43,7 @@ #include "postgres.h" #include "access/detoast.h" +#include "common/hashfn.h" #include "fmgr.h" #include "utils/builtins.h" #include "utils/datum.h" @@ -324,6 +325,57 @@ datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen) return result; } +/*------------------------------------------------------------------------- + * datum_image_hash + * + * Generate a hash value based on the binary representation of 'value'. Most + * use cases will want to use the hash function specific to the Datum's type, + * however, some corner cases require generating a hash value based on the + * actual bits rather than the logical value. + *------------------------------------------------------------------------- + */ +uint32 +datum_image_hash(Datum value, bool typByVal, int typLen) +{ + Size len; + uint32 result; + + if (typByVal) + result = hash_bytes((unsigned char *) &value, sizeof(Datum)); + else if (typLen > 0) + result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen); + else if (typLen == -1) + { + struct varlena *val; + + len = toast_raw_datum_size(value); + + val = PG_DETOAST_DATUM_PACKED(value); + + result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ); + + /* Only free memory if it's a copy made here. */ + if ((Pointer) val != (Pointer) value) + pfree(val); + } + else if (typLen == -2) + { + char *s; + + s = DatumGetCString(value); + len = strlen(s) + 1; + + result = hash_bytes((unsigned char *) s, len); + } + else + { + elog(ERROR, "unexpected typLen: %d", typLen); + result = 0; /* keep compiler quiet */ + } + + return result; +} + /*------------------------------------------------------------------------- * btequalimage * diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 2e8cbee69f..d96ace32e4 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2109,6 +2109,8 @@ typedef struct MemoizeState * NULL if 'last_tuple' is NULL. */ bool singlerow; /* true if the cache entry is to be marked as * complete after caching the first tuple. */ + bool binary_mode; /* true when cache key should be compared bit + * by bit, false when using hash equality ops */ MemoizeInstrumentation stats; /* execution statistics */ SharedMemoizeInfo *shared_info; /* statistics for parallel workers */ } MemoizeState; diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 186e89905b..324d92880b 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1515,6 +1515,8 @@ typedef struct MemoizePath List *param_exprs; /* cache keys */ bool singlerow; /* true if the cache entry is to be marked as * complete after caching the first record. */ + bool binary_mode; /* true when cache key should be compared bit + * by bit, false when using hash equality ops */ Cardinality calls; /* expected number of rescans */ uint32 est_entries; /* The maximum number of entries that the * planner expects will fit in the cache, or 0 diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 01a246d50e..f1328be354 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -799,6 +799,8 @@ typedef struct Memoize bool singlerow; /* true if the cache entry should be marked as * complete after we store the first tuple in * it. */ + bool binary_mode; /* true when cache key should be compared bit + * by bit, false when using hash equality ops */ uint32 est_entries; /* The maximum number of entries that the * planner expects will fit in the cache, or 0 * if unknown */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index f704d39980..2922c0cdc1 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -88,6 +88,7 @@ extern MemoizePath *create_memoize_path(PlannerInfo *root, List *param_exprs, List *hash_operators, bool singlerow, + bool binary_mode, double calls); extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SpecialJoinInfo *sjinfo); diff --git a/src/include/utils/datum.h b/src/include/utils/datum.h index d4cf62bed7..8a59f11006 100644 --- a/src/include/utils/datum.h +++ b/src/include/utils/datum.h @@ -55,6 +55,14 @@ extern bool datumIsEqual(Datum value1, Datum value2, extern bool datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen); +/* + * datum_image_hash + * + * Generates hash value for 'value' based on its bits rather than logical + * value. + */ +extern uint32 datum_image_hash(Datum value, bool typByVal, int typLen); + /* * Serialize and restore datums so that we can transfer them to parallel * workers. diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 84331659e7..d5b5b775fd 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3686,9 +3686,10 @@ where t1.unique1 = 1; Index Cond: (hundred = t1.hundred) -> Memoize Cache Key: t2.thousand + Cache Mode: logical -> Index Scan using tenk1_unique2 on tenk1 t3 Index Cond: (unique2 = t2.thousand) -(13 rows) +(14 rows) explain (costs off) select * from tenk1 t1 left join @@ -3708,9 +3709,10 @@ where t1.unique1 = 1; Index Cond: (hundred = t1.hundred) -> Memoize Cache Key: t2.thousand + Cache Mode: logical -> Index Scan using tenk1_unique2 on tenk1 t3 Index Cond: (unique2 = t2.thousand) -(13 rows) +(14 rows) explain (costs off) select count(*) from @@ -4238,11 +4240,12 @@ where t1.f1 = ss.f1; -> Memoize Output: (i8.q1), t2.f1 Cache Key: i8.q1 + Cache Mode: binary -> Limit Output: (i8.q1), t2.f1 -> Seq Scan on public.text_tbl t2 Output: i8.q1, t2.f1 -(19 rows) +(20 rows) select * from text_tbl t1 @@ -4282,6 +4285,7 @@ where t1.f1 = ss2.f1; -> Memoize Output: (i8.q1), t2.f1 Cache Key: i8.q1 + Cache Mode: binary -> Limit Output: (i8.q1), t2.f1 -> Seq Scan on public.text_tbl t2 @@ -4289,11 +4293,12 @@ where t1.f1 = ss2.f1; -> Memoize Output: ((i8.q1)), (t2.f1) Cache Key: (i8.q1), t2.f1 + Cache Mode: binary -> Limit Output: ((i8.q1)), (t2.f1) -> Seq Scan on public.text_tbl t3 Output: (i8.q1), t2.f1 -(28 rows) +(30 rows) select * from text_tbl t1 @@ -4342,6 +4347,7 @@ where tt1.f1 = ss1.c0; -> Memoize Output: ss1.c0 Cache Key: tt4.f1 + Cache Mode: binary -> Subquery Scan on ss1 Output: ss1.c0 Filter: (ss1.c0 = 'foo'::text) @@ -4349,7 +4355,7 @@ where tt1.f1 = ss1.c0; Output: (tt4.f1) -> Seq Scan on public.text_tbl tt5 Output: tt4.f1 -(32 rows) +(33 rows) select 1 from text_tbl as tt1 @@ -5058,8 +5064,9 @@ explain (costs off) -> Seq Scan on tenk1 a -> Memoize Cache Key: a.two + Cache Mode: binary -> Function Scan on generate_series g -(6 rows) +(7 rows) explain (costs off) select count(*) from tenk1 a cross join lateral generate_series(1,two) g; @@ -5070,8 +5077,9 @@ explain (costs off) -> Seq Scan on tenk1 a -> Memoize Cache Key: a.two + Cache Mode: binary -> Function Scan on generate_series g -(6 rows) +(7 rows) -- don't need the explicit LATERAL keyword for functions explain (costs off) @@ -5083,8 +5091,9 @@ explain (costs off) -> Seq Scan on tenk1 a -> Memoize Cache Key: a.two + Cache Mode: binary -> Function Scan on generate_series g -(6 rows) +(7 rows) -- lateral with UNION ALL subselect explain (costs off) @@ -5145,9 +5154,10 @@ explain (costs off) -> Values Scan on "*VALUES*" -> Memoize Cache Key: "*VALUES*".column1 + Cache Mode: logical -> Index Only Scan using tenk1_unique2 on tenk1 b Index Cond: (unique2 = "*VALUES*".column1) -(9 rows) +(10 rows) select count(*) from tenk1 a, tenk1 b join lateral (values(a.unique1),(-1)) ss(x) on b.unique2 = ss.x; diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out index 9a025c4a7a..0ed5d8474a 100644 --- a/src/test/regress/expected/memoize.out +++ b/src/test/regress/expected/memoize.out @@ -44,11 +44,12 @@ WHERE t2.unique1 < 1000;', false); Rows Removed by Filter: 9000 -> Memoize (actual rows=1 loops=N) Cache Key: t2.twenty + Cache Mode: logical Hits: 980 Misses: 20 Evictions: Zero Overflows: 0 Memory Usage: NkB -> Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1 loops=N) Index Cond: (unique1 = t2.twenty) Heap Fetches: N -(11 rows) +(12 rows) -- And check we get the expected results. SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1 @@ -73,11 +74,12 @@ WHERE t1.unique1 < 1000;', false); Rows Removed by Filter: 9000 -> Memoize (actual rows=1 loops=N) Cache Key: t1.twenty + Cache Mode: logical Hits: 980 Misses: 20 Evictions: Zero Overflows: 0 Memory Usage: NkB -> Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1 loops=N) Index Cond: (unique1 = t1.twenty) Heap Fetches: N -(11 rows) +(12 rows) -- And check we get the expected results. SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1, @@ -107,12 +109,94 @@ WHERE t2.unique1 < 1200;', true); Rows Removed by Filter: 8800 -> Memoize (actual rows=1 loops=N) Cache Key: t2.thousand + Cache Mode: logical Hits: N Misses: N Evictions: N Overflows: 0 Memory Usage: NkB -> Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1 loops=N) Index Cond: (unique1 = t2.thousand) Heap Fetches: N -(11 rows) +(12 rows) +CREATE TABLE flt (f float); +CREATE INDEX flt_f_idx ON flt (f); +INSERT INTO flt VALUES('-0.0'::float),('+0.0'::float); +ANALYZE flt; +SET enable_seqscan TO off; +-- Ensure memoize operates in logical mode +SELECT explain_memoize(' +SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false); + explain_memoize +------------------------------------------------------------------------------- + Nested Loop (actual rows=4 loops=N) + -> Index Only Scan using flt_f_idx on flt f1 (actual rows=2 loops=N) + Heap Fetches: N + -> Memoize (actual rows=2 loops=N) + Cache Key: f1.f + Cache Mode: logical + Hits: 1 Misses: 1 Evictions: Zero Overflows: 0 Memory Usage: NkB + -> Index Only Scan using flt_f_idx on flt f2 (actual rows=2 loops=N) + Index Cond: (f = f1.f) + Heap Fetches: N +(10 rows) + +-- Ensure memoize operates in binary mode +SELECT explain_memoize(' +SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f >= f2.f;', false); + explain_memoize +------------------------------------------------------------------------------- + Nested Loop (actual rows=4 loops=N) + -> Index Only Scan using flt_f_idx on flt f1 (actual rows=2 loops=N) + Heap Fetches: N + -> Memoize (actual rows=2 loops=N) + Cache Key: f1.f + Cache Mode: binary + Hits: 0 Misses: 2 Evictions: Zero Overflows: 0 Memory Usage: NkB + -> Index Only Scan using flt_f_idx on flt f2 (actual rows=2 loops=N) + Index Cond: (f <= f1.f) + Heap Fetches: N +(10 rows) + +DROP TABLE flt; +-- Exercise Memoize in binary mode with a large fixed width type and a +-- varlena type. +CREATE TABLE strtest (n name, t text); +CREATE INDEX strtest_n_idx ON strtest (n); +CREATE INDEX strtest_t_idx ON strtest (t); +INSERT INTO strtest VALUES('one','one'),('two','two'),('three',repeat(md5('three'),100)); +-- duplicate rows so we get some cache hits +INSERT INTO strtest SELECT * FROM strtest; +ANALYZE strtest; +-- Ensure we get 3 hits and 3 misses +SELECT explain_memoize(' +SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.n >= s2.n;', false); + explain_memoize +---------------------------------------------------------------------------------- + Nested Loop (actual rows=24 loops=N) + -> Seq Scan on strtest s1 (actual rows=6 loops=N) + -> Memoize (actual rows=4 loops=N) + Cache Key: s1.n + Cache Mode: binary + Hits: 3 Misses: 3 Evictions: Zero Overflows: 0 Memory Usage: NkB + -> Index Scan using strtest_n_idx on strtest s2 (actual rows=4 loops=N) + Index Cond: (n <= s1.n) +(8 rows) + +-- Ensure we get 3 hits and 3 misses +SELECT explain_memoize(' +SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false); + explain_memoize +---------------------------------------------------------------------------------- + Nested Loop (actual rows=24 loops=N) + -> Seq Scan on strtest s1 (actual rows=6 loops=N) + -> Memoize (actual rows=4 loops=N) + Cache Key: s1.t + Cache Mode: binary + Hits: 3 Misses: 3 Evictions: Zero Overflows: 0 Memory Usage: NkB + -> Index Scan using strtest_t_idx on strtest s2 (actual rows=4 loops=N) + Index Cond: (t <= s1.t) +(8 rows) + +DROP TABLE strtest; +RESET enable_seqscan; RESET enable_mergejoin; RESET work_mem; RESET enable_bitmapscan; @@ -140,9 +224,10 @@ WHERE t1.unique1 < 1000; Index Cond: (unique1 < 1000) -> Memoize Cache Key: t1.twenty + Cache Mode: logical -> Index Only Scan using tenk1_unique1 on tenk1 t2 Index Cond: (unique1 = t1.twenty) -(13 rows) +(14 rows) -- And ensure the parallel plan gives us the correct results. SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1, diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 0742626033..4e8ddc7061 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1139,13 +1139,14 @@ where o.ten = 1; Filter: (ten = 1) -> Memoize Cache Key: o.four + Cache Mode: binary -> CTE Scan on x CTE x -> Recursive Union -> Result -> WorkTable Scan on x x_1 Filter: (a < 10) -(12 rows) +(13 rows) select sum(o.four), sum(ss.a) from onek o cross join lateral ( diff --git a/src/test/regress/sql/memoize.sql b/src/test/regress/sql/memoize.sql index 548cc3eee3..3c7360adf9 100644 --- a/src/test/regress/sql/memoize.sql +++ b/src/test/regress/sql/memoize.sql @@ -65,6 +65,45 @@ SELECT explain_memoize(' SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.thousand WHERE t2.unique1 < 1200;', true); + +CREATE TABLE flt (f float); +CREATE INDEX flt_f_idx ON flt (f); +INSERT INTO flt VALUES('-0.0'::float),('+0.0'::float); +ANALYZE flt; + +SET enable_seqscan TO off; + +-- Ensure memoize operates in logical mode +SELECT explain_memoize(' +SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false); + +-- Ensure memoize operates in binary mode +SELECT explain_memoize(' +SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f >= f2.f;', false); + +DROP TABLE flt; + +-- Exercise Memoize in binary mode with a large fixed width type and a +-- varlena type. +CREATE TABLE strtest (n name, t text); +CREATE INDEX strtest_n_idx ON strtest (n); +CREATE INDEX strtest_t_idx ON strtest (t); +INSERT INTO strtest VALUES('one','one'),('two','two'),('three',repeat(md5('three'),100)); +-- duplicate rows so we get some cache hits +INSERT INTO strtest SELECT * FROM strtest; +ANALYZE strtest; + +-- Ensure we get 3 hits and 3 misses +SELECT explain_memoize(' +SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.n >= s2.n;', false); + +-- Ensure we get 3 hits and 3 misses +SELECT explain_memoize(' +SELECT * FROM strtest s1 INNER JOIN strtest s2 ON s1.t >= s2.t;', false); + +DROP TABLE strtest; + +RESET enable_seqscan; RESET enable_mergejoin; RESET work_mem; RESET enable_bitmapscan;