Fix misuse of RelOptInfo.unique_for_rels cache by SJE

When SJE uses RelOptInfo.unique_for_rels cache, it passes filtered quals to
innerrel_is_unique_ext().  That might lead to an invalid match to cache entries
made by previous non self-join checking calls.  Add UniqueRelInfo.self_join
flag to prevent such cases.  Also, fix that SJE should require a strict match
of outerrelids to make sure UniqueRelInfo.extra_clauses are valid.

Reported-by: Alexander Lakhin
Discussion: https://postgr.es/m/4788f781-31bd-9796-d7d6-588a751c8787%40gmail.com
This commit is contained in:
Alexander Korotkov 2024-01-09 00:08:35 +02:00
parent d3c5f37dd5
commit 30b4955a46
4 changed files with 59 additions and 9 deletions

View File

@ -1247,8 +1247,10 @@ innerrel_is_unique(PlannerInfo *root,
/*
* innerrel_is_unique_ext
* Do the same as innerrel_is_unique(), but also return additional clauses
* from a baserestrictinfo list that were used to prove uniqueness.
* Do the same as innerrel_is_unique(), but also set to '*extra_clauses'
* additional clauses from a baserestrictinfo list that were used to prove
* uniqueness. A non NULL 'extra_clauses' indicates that we're checking
* for self-join and correspondingly dealing with filtered clauses.
*/
bool
innerrel_is_unique_ext(PlannerInfo *root,
@ -1264,6 +1266,7 @@ innerrel_is_unique_ext(PlannerInfo *root,
ListCell *lc;
UniqueRelInfo *uniqueRelInfo;
List *outer_exprs = NIL;
bool self_join = (extra_clauses != NULL);
/* Certainly can't prove uniqueness when there are no joinclauses */
if (restrictlist == NIL)
@ -1278,16 +1281,23 @@ innerrel_is_unique_ext(PlannerInfo *root,
/*
* Query the cache to see if we've managed to prove that innerrel is
* unique for any subset of this outerrel. We don't need an exact match,
* as extra outerrels can't make the innerrel any less unique (or more
* formally, the restrictlist for a join to a superset outerrel must be a
* superset of the conditions we successfully used before).
* unique for any subset of this outerrel. For non self-join search, we
* don't need an exact match, as extra outerrels can't make the innerrel
* any less unique (or more formally, the restrictlist for a join to a
* superset outerrel must be a superset of the conditions we successfully
* used before). For self-join search, we require an exact match of
* outerrels, because we need extra clauses to be valid for our case.
* Also, for self-join checking we've filtered the clauses list. Thus,
* for a self-join search, we can match only the result cached for another
* self-join check.
*/
foreach(lc, innerrel->unique_for_rels)
{
uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
if (bms_is_subset(uniqueRelInfo->outerrelids, outerrelids))
if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
(self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
uniqueRelInfo->self_join))
{
if (extra_clauses)
*extra_clauses = uniqueRelInfo->extra_clauses;
@ -1309,7 +1319,8 @@ innerrel_is_unique_ext(PlannerInfo *root,
/* No cached information, so try to make the proof. */
if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
jointype, restrictlist, &outer_exprs))
jointype, restrictlist,
self_join ? &outer_exprs : NULL))
{
/*
* Cache the positive result for future probes, being sure to keep it
@ -1323,8 +1334,9 @@ innerrel_is_unique_ext(PlannerInfo *root,
*/
old_context = MemoryContextSwitchTo(root->planner_cxt);
uniqueRelInfo = makeNode(UniqueRelInfo);
uniqueRelInfo->extra_clauses = outer_exprs;
uniqueRelInfo->outerrelids = bms_copy(outerrelids);
uniqueRelInfo->self_join = self_join;
uniqueRelInfo->extra_clauses = outer_exprs;
innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
uniqueRelInfo);
MemoryContextSwitchTo(old_context);

View File

@ -3407,6 +3407,12 @@ typedef struct UniqueRelInfo
*/
Relids outerrelids;
/*
* The relation in consideration is unique when considering only clauses
* suitable for self-join (passed split_selfjoin_quals()).
*/
bool self_join;
/*
* Additional clauses from a baserestrictinfo list that were used to prove
* the uniqueness. We cache it for the self-join checking procedure: a

View File

@ -6905,6 +6905,29 @@ where false;
----------
(0 rows)
-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness
-- made with different set of quals
insert into emp1 values (2, 1);
explain (costs off)
select * from emp1 t1 where exists (select * from emp1 t2
where t2.id = t1.code and t2.code > 0);
QUERY PLAN
---------------------------------------------
Nested Loop
-> Seq Scan on emp1 t1
-> Index Scan using emp1_pkey on emp1 t2
Index Cond: (id = t1.code)
Filter: (code > 0)
(5 rows)
select * from emp1 t1 where exists (select * from emp1 t2
where t2.id = t1.code and t2.code > 0);
id | code
----+------
1 | 1
2 | 1
(2 rows)
-- We can remove the join even if we find the join can't duplicate rows and
-- the base quals of each side are different. In the following case we end up
-- moving quals over to s1 to make it so it can't match any rows.

View File

@ -2638,6 +2638,15 @@ select 1 from emp1 full join
where false) s on true
where false;
-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness
-- made with different set of quals
insert into emp1 values (2, 1);
explain (costs off)
select * from emp1 t1 where exists (select * from emp1 t2
where t2.id = t1.code and t2.code > 0);
select * from emp1 t1 where exists (select * from emp1 t2
where t2.id = t1.code and t2.code > 0);
-- We can remove the join even if we find the join can't duplicate rows and
-- the base quals of each side are different. In the following case we end up
-- moving quals over to s1 to make it so it can't match any rows.