Allow simplification of EXISTS() subqueries containing LIMIT.

The locution "EXISTS(SELECT ... LIMIT 1)" seems to be rather common among
people who don't realize that the database already performs optimizations
equivalent to putting LIMIT 1 in the sub-select.  Unfortunately, this was
actually making things worse, because it prevented us from optimizing such
EXISTS clauses into semi or anti joins.  Teach simplify_EXISTS_query() to
suppress constant-positive LIMIT clauses.  That fixes the semi/anti-join
case, and may help marginally even for cases that have to be left as
sub-SELECTs.

Marti Raudsepp, reviewed by David Rowley
This commit is contained in:
Tom Lane 2014-11-22 19:12:38 -05:00
parent 9c58101117
commit b62f94c603
3 changed files with 96 additions and 9 deletions

View File

@ -70,7 +70,7 @@ static Node *convert_testexpr_mutator(Node *node,
static bool subplan_is_hashable(Plan *plan);
static bool testexpr_is_hashable(Node *testexpr);
static bool hash_ok_operator(OpExpr *expr);
static bool simplify_EXISTS_query(Query *query);
static bool simplify_EXISTS_query(PlannerInfo *root, Query *query);
static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
Node **testexpr, List **paramIds);
static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
@ -452,7 +452,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
* If it's an EXISTS subplan, we might be able to simplify it.
*/
if (subLinkType == EXISTS_SUBLINK)
simple_exists = simplify_EXISTS_query(subquery);
simple_exists = simplify_EXISTS_query(root, subquery);
/*
* For an EXISTS subplan, tell lower-level planner to expect that only the
@ -518,7 +518,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
/* Make a second copy of the original subquery */
subquery = (Query *) copyObject(orig_subquery);
/* and re-simplify */
simple_exists = simplify_EXISTS_query(subquery);
simple_exists = simplify_EXISTS_query(root, subquery);
Assert(simple_exists);
/* See if it can be converted to an ANY query */
subquery = convert_EXISTS_to_ANY(root, subquery,
@ -1359,7 +1359,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
* targetlist, we have to fail, because the pullup operation leaves us
* with noplace to evaluate the targetlist.
*/
if (!simplify_EXISTS_query(subselect))
if (!simplify_EXISTS_query(root, subselect))
return NULL;
/*
@ -1486,13 +1486,14 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
* Returns TRUE if was able to discard the targetlist, else FALSE.
*/
static bool
simplify_EXISTS_query(Query *query)
simplify_EXISTS_query(PlannerInfo *root, Query *query)
{
/*
* We don't try to simplify at all if the query uses set operations,
* aggregates, modifying CTEs, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE;
* none of these seem likely in normal usage and their possible effects
* are complex.
* aggregates, modifying CTEs, HAVING, OFFSET, or FOR UPDATE/SHARE; none
* of these seem likely in normal usage and their possible effects are
* complex. (Note: we could ignore an "OFFSET 0" clause, but that
* traditionally is used as an optimization fence, so we don't.)
*/
if (query->commandType != CMD_SELECT ||
query->setOperations ||
@ -1501,10 +1502,43 @@ simplify_EXISTS_query(Query *query)
query->hasModifyingCTE ||
query->havingQual ||
query->limitOffset ||
query->limitCount ||
query->rowMarks)
return false;
/*
* LIMIT with a constant positive (or NULL) value doesn't affect the
* semantics of EXISTS, so let's ignore such clauses. This is worth doing
* because people accustomed to certain other DBMSes may be in the habit
* of writing EXISTS(SELECT ... LIMIT 1) as an optimization. If there's a
* LIMIT with anything else as argument, though, we can't simplify.
*/
if (query->limitCount)
{
/*
* The LIMIT clause has not yet been through eval_const_expressions,
* so we have to apply that here. It might seem like this is a waste
* of cycles, since the only case plausibly worth worrying about is
* "LIMIT 1" ... but what we'll actually see is "LIMIT int8(1::int4)",
* so we have to fold constants or we're not going to recognize it.
*/
Node *node = eval_const_expressions(root, query->limitCount);
Const *limit;
/* Might as well update the query if we simplified the clause. */
query->limitCount = node;
if (!IsA(node, Const))
return false;
limit = (Const *) node;
Assert(limit->consttype == INT8OID);
if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0)
return false;
/* Whether or not the targetlist is safe, we can drop the LIMIT. */
query->limitCount = NULL;
}
/*
* Mustn't throw away the targetlist if it contains set-returning
* functions; those could affect whether zero rows are returned!

View File

@ -221,6 +221,46 @@ from int8_tbl group by q1 order by q1;
4567890123456789 | 0.6
(2 rows)
--
-- Check EXISTS simplification with LIMIT
--
explain (costs off)
select * from int4_tbl o where exists
(select 1 from int4_tbl i where i.f1=o.f1 limit null);
QUERY PLAN
------------------------------------
Hash Semi Join
Hash Cond: (o.f1 = i.f1)
-> Seq Scan on int4_tbl o
-> Hash
-> Seq Scan on int4_tbl i
(5 rows)
explain (costs off)
select * from int4_tbl o where not exists
(select 1 from int4_tbl i where i.f1=o.f1 limit 1);
QUERY PLAN
------------------------------------
Hash Anti Join
Hash Cond: (o.f1 = i.f1)
-> Seq Scan on int4_tbl o
-> Hash
-> Seq Scan on int4_tbl i
(5 rows)
explain (costs off)
select * from int4_tbl o where exists
(select 1 from int4_tbl i where i.f1=o.f1 limit 0);
QUERY PLAN
--------------------------------------
Seq Scan on int4_tbl o
Filter: (SubPlan 1)
SubPlan 1
-> Limit
-> Seq Scan on int4_tbl i
Filter: (f1 = o.f1)
(6 rows)
--
-- Test cases to catch unpleasant interactions between IN-join processing
-- and subquery pullup.

View File

@ -92,6 +92,19 @@ SELECT '' AS eight, ss.f1 AS "Correlated Field", ss.f3 AS "Second Field"
select q1, float8(count(*)) / (select count(*) from int8_tbl)
from int8_tbl group by q1 order by q1;
--
-- Check EXISTS simplification with LIMIT
--
explain (costs off)
select * from int4_tbl o where exists
(select 1 from int4_tbl i where i.f1=o.f1 limit null);
explain (costs off)
select * from int4_tbl o where not exists
(select 1 from int4_tbl i where i.f1=o.f1 limit 1);
explain (costs off)
select * from int4_tbl o where exists
(select 1 from int4_tbl i where i.f1=o.f1 limit 0);
--
-- Test cases to catch unpleasant interactions between IN-join processing
-- and subquery pullup.