1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* initsplan.c
|
1997-09-07 07:04:48 +02:00
|
|
|
* Target list, qualification, joininfo initialization routines
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2019-01-02 18:44:25 +01:00
|
|
|
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/optimizer/plan/initsplan.c
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
2001-05-07 02:43:27 +02:00
|
|
|
#include "postgres.h"
|
|
|
|
|
1998-08-10 04:26:40 +02:00
|
|
|
#include "catalog/pg_type.h"
|
2017-09-14 21:41:08 +02:00
|
|
|
#include "catalog/pg_class.h"
|
2010-10-31 02:55:20 +01:00
|
|
|
#include "nodes/nodeFuncs.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "optimizer/clauses.h"
|
Improve planner's cost estimation in the presence of semijoins.
If we have a semijoin, say
SELECT * FROM x WHERE x1 IN (SELECT y1 FROM y)
and we're estimating the cost of a parameterized indexscan on x, the number
of repetitions of the indexscan should not be taken as the size of y; it'll
really only be the number of distinct values of y1, because the only valid
plan with y on the outside of a nestloop would require y to be unique-ified
before joining it to x. Most of the time this doesn't make that much
difference, but sometimes it can lead to drastically underestimating the
cost of the indexscan and hence choosing a bad plan, as pointed out by
David Kubečka.
Fixing this is a bit difficult because parameterized indexscans are costed
out quite early in the planning process, before we have the information
that would be needed to call estimate_num_groups() and thereby estimate the
number of distinct values of the join column(s). However we can move the
code that extracts a semijoin RHS's unique-ification columns, so that it's
done in initsplan.c rather than on-the-fly in create_unique_path(). That
shouldn't make any difference speed-wise and it's really a bit cleaner too.
The other bit of information we need is the size of the semijoin RHS,
which is easy if it's a single relation (we make those estimates before
considering indexscan costs) but problematic if it's a join relation.
The solution adopted here is just to use the product of the sizes of the
join component rels. That will generally be an overestimate, but since
estimate_num_groups() only uses this input as a clamp, an overestimate
shouldn't hurt us too badly. In any case we don't allow this new logic
to produce a value larger than we would have chosen before, so that at
worst an overestimate leaves us no wiser than we were before.
2015-03-12 02:21:00 +01:00
|
|
|
#include "optimizer/cost.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
#include "optimizer/joininfo.h"
|
|
|
|
#include "optimizer/pathnode.h"
|
2000-02-15 21:49:31 +01:00
|
|
|
#include "optimizer/paths.h"
|
2008-10-21 22:42:53 +02:00
|
|
|
#include "optimizer/placeholder.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "optimizer/planmain.h"
|
2012-08-27 04:48:55 +02:00
|
|
|
#include "optimizer/planner.h"
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
#include "optimizer/prep.h"
|
2004-01-04 01:07:32 +01:00
|
|
|
#include "optimizer/restrictinfo.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
#include "optimizer/var.h"
|
2013-07-23 20:03:09 +02:00
|
|
|
#include "parser/analyze.h"
|
2012-08-27 04:48:55 +02:00
|
|
|
#include "rewrite/rewriteManip.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "utils/lsyscache.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
|
2005-12-20 03:30:36 +01:00
|
|
|
/* These parameters are set by GUC */
|
|
|
|
int from_collapse_limit;
|
|
|
|
int join_collapse_limit;
|
|
|
|
|
|
|
|
|
2013-08-19 19:19:25 +02:00
|
|
|
/* Elements of the postponed_qual_list used during deconstruct_recurse */
|
|
|
|
typedef struct PostponedQual
|
|
|
|
{
|
|
|
|
Node *qual; /* a qual clause waiting to be processed */
|
|
|
|
Relids relids; /* the set of baserels it references */
|
|
|
|
} PostponedQual;
|
|
|
|
|
|
|
|
|
2012-08-27 04:48:55 +02:00
|
|
|
static void extract_lateral_references(PlannerInfo *root, RelOptInfo *brel,
|
|
|
|
Index rtindex);
|
2005-12-20 03:30:36 +01:00
|
|
|
static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode,
|
2007-08-31 03:44:06 +02:00
|
|
|
bool below_outer_join,
|
2013-08-19 19:19:25 +02:00
|
|
|
Relids *qualscope, Relids *inner_join_rels,
|
|
|
|
List **postponed_qual_list);
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
static void process_security_barrier_quals(PlannerInfo *root,
|
|
|
|
int rti, Relids qualscope,
|
|
|
|
bool below_outer_join);
|
2008-08-14 20:48:00 +02:00
|
|
|
static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root,
|
2006-10-04 02:30:14 +02:00
|
|
|
Relids left_rels, Relids right_rels,
|
2007-08-31 03:44:06 +02:00
|
|
|
Relids inner_join_rels,
|
2008-08-14 20:48:00 +02:00
|
|
|
JoinType jointype, List *clause);
|
Improve planner's cost estimation in the presence of semijoins.
If we have a semijoin, say
SELECT * FROM x WHERE x1 IN (SELECT y1 FROM y)
and we're estimating the cost of a parameterized indexscan on x, the number
of repetitions of the indexscan should not be taken as the size of y; it'll
really only be the number of distinct values of y1, because the only valid
plan with y on the outside of a nestloop would require y to be unique-ified
before joining it to x. Most of the time this doesn't make that much
difference, but sometimes it can lead to drastically underestimating the
cost of the indexscan and hence choosing a bad plan, as pointed out by
David Kubečka.
Fixing this is a bit difficult because parameterized indexscans are costed
out quite early in the planning process, before we have the information
that would be needed to call estimate_num_groups() and thereby estimate the
number of distinct values of the join column(s). However we can move the
code that extracts a semijoin RHS's unique-ification columns, so that it's
done in initsplan.c rather than on-the-fly in create_unique_path(). That
shouldn't make any difference speed-wise and it's really a bit cleaner too.
The other bit of information we need is the size of the semijoin RHS,
which is easy if it's a single relation (we make those estimates before
considering indexscan costs) but problematic if it's a join relation.
The solution adopted here is just to use the product of the sizes of the
join component rels. That will generally be an overestimate, but since
estimate_num_groups() only uses this input as a clamp, an overestimate
shouldn't hurt us too badly. In any case we don't allow this new logic
to produce a value larger than we would have chosen before, so that at
worst an overestimate leaves us no wiser than we were before.
2015-03-12 02:21:00 +01:00
|
|
|
static void compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause);
|
2005-06-06 00:32:58 +02:00
|
|
|
static void distribute_qual_to_rels(PlannerInfo *root, Node *clause,
|
2005-09-28 23:17:02 +02:00
|
|
|
bool is_deduced,
|
|
|
|
bool below_outer_join,
|
2008-10-25 21:51:32 +02:00
|
|
|
JoinType jointype,
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
Index security_level,
|
2005-12-20 03:30:36 +01:00
|
|
|
Relids qualscope,
|
|
|
|
Relids ojscope,
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
Relids outerjoin_nonnullable,
|
2013-08-19 19:19:25 +02:00
|
|
|
Relids deduced_nullable_relids,
|
|
|
|
List **postponed_qual_list);
|
2007-05-23 01:23:58 +02:00
|
|
|
static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p,
|
2009-04-16 22:42:16 +02:00
|
|
|
Relids *nullable_relids_p, bool is_pushed_down);
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
static bool check_equivalence_delay(PlannerInfo *root,
|
|
|
|
RestrictInfo *restrictinfo);
|
2008-08-14 20:48:00 +02:00
|
|
|
static bool check_redundant_nullability_qual(PlannerInfo *root, Node *clause);
|
1999-08-16 04:17:58 +02:00
|
|
|
static void check_mergejoinable(RestrictInfo *restrictinfo);
|
|
|
|
static void check_hashjoinable(RestrictInfo *restrictinfo);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
|
|
*
|
2002-03-12 01:52:10 +01:00
|
|
|
* JOIN TREES
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
2002-03-12 01:52:10 +01:00
|
|
|
* add_base_rels_to_query
|
|
|
|
*
|
|
|
|
* Scan the query's jointree and create baserel RelOptInfos for all
|
2003-01-15 20:35:48 +01:00
|
|
|
* the base relations (ie, table, subquery, and function RTEs)
|
|
|
|
* appearing in the jointree.
|
1999-10-07 06:23:24 +02:00
|
|
|
*
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
* The initial invocation must pass root->parse->jointree as the value of
|
2014-05-06 18:12:18 +02:00
|
|
|
* jtnode. Internally, the function recurses through the jointree.
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
*
|
2002-03-12 01:52:10 +01:00
|
|
|
* At the end of this process, there should be one baserel RelOptInfo for
|
|
|
|
* every non-join RTE that is used in the query. Therefore, this routine
|
2006-01-31 22:39:25 +01:00
|
|
|
* is the only place that should call build_simple_rel with reloptkind
|
2014-05-06 18:12:18 +02:00
|
|
|
* RELOPT_BASEREL. (Note: build_simple_rel recurses internally to build
|
2006-09-20 00:49:53 +02:00
|
|
|
* "other rel" RelOptInfos for the members of any appendrels we find here.)
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2003-01-15 20:35:48 +01:00
|
|
|
void
|
2005-06-06 00:32:58 +02:00
|
|
|
add_base_rels_to_query(PlannerInfo *root, Node *jtnode)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-09-12 23:07:18 +02:00
|
|
|
if (jtnode == NULL)
|
2003-01-15 20:35:48 +01:00
|
|
|
return;
|
2000-09-29 20:21:41 +02:00
|
|
|
if (IsA(jtnode, RangeTblRef))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2000-09-29 20:21:41 +02:00
|
|
|
int varno = ((RangeTblRef *) jtnode)->rtindex;
|
2001-03-22 05:01:46 +01:00
|
|
|
|
Abstract logic to allow for multiple kinds of child rels.
Currently, the only type of child relation is an "other member rel",
which is the child of a baserel, but in the future joins and even
upper relations may have child rels. To facilitate that, introduce
macros that test to test for particular RelOptKind values, and use
them in various places where they help to clarify the sense of a test.
(For example, a test may allow RELOPT_OTHER_MEMBER_REL either because
it intends to allow child rels, or because it intends to allow simple
rels.)
Also, remove find_childrel_top_parent, which will not work for a
child rel that is not a baserel. Instead, add a new RelOptInfo
member top_parent_relids to track the same kind of information in a
more generic manner.
Ashutosh Bapat, slightly tweaked by me. Review and testing of the
patch set from which this was taken by Rajkumar Raghuwanshi and Rafia
Sabih.
Discussion: http://postgr.es/m/CA+TgmoagTnF2yqR3PT2rv=om=wJiZ4-A+ATwdnriTGku1CLYxA@mail.gmail.com
2017-04-04 04:41:31 +02:00
|
|
|
(void) build_simple_rel(root, varno, NULL);
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
2000-09-29 20:21:41 +02:00
|
|
|
else if (IsA(jtnode, FromExpr))
|
2000-09-12 23:07:18 +02:00
|
|
|
{
|
2000-09-29 20:21:41 +02:00
|
|
|
FromExpr *f = (FromExpr *) jtnode;
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *l;
|
1999-08-10 05:00:15 +02:00
|
|
|
|
2000-09-29 20:21:41 +02:00
|
|
|
foreach(l, f->fromlist)
|
2003-01-15 20:35:48 +01:00
|
|
|
add_base_rels_to_query(root, lfirst(l));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2000-09-12 23:07:18 +02:00
|
|
|
else if (IsA(jtnode, JoinExpr))
|
|
|
|
{
|
|
|
|
JoinExpr *j = (JoinExpr *) jtnode;
|
2002-09-04 22:31:48 +02:00
|
|
|
|
2003-01-15 20:35:48 +01:00
|
|
|
add_base_rels_to_query(root, j->larg);
|
|
|
|
add_base_rels_to_query(root, j->rarg);
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
|
|
|
else
|
2003-07-25 02:01:09 +02:00
|
|
|
elog(ERROR, "unrecognized node type: %d",
|
|
|
|
(int) nodeTag(jtnode));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2000-07-24 05:11:01 +02:00
|
|
|
|
2002-03-12 01:52:10 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
*
|
|
|
|
* TARGET LISTS
|
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* build_base_rel_tlists
|
2003-06-30 01:05:05 +02:00
|
|
|
* Add targetlist entries for each var needed in the query's final tlist
|
2016-01-08 02:23:57 +01:00
|
|
|
* (and HAVING clause, if any) to the appropriate base relations.
|
2003-06-30 01:05:05 +02:00
|
|
|
*
|
|
|
|
* We mark such vars as needed by "relation 0" to ensure that they will
|
|
|
|
* propagate up through all join plan steps.
|
2002-03-12 01:52:10 +01:00
|
|
|
*/
|
|
|
|
void
|
2005-06-06 00:32:58 +02:00
|
|
|
build_base_rel_tlists(PlannerInfo *root, List *final_tlist)
|
2002-03-12 01:52:10 +01:00
|
|
|
{
|
2009-04-19 21:46:33 +02:00
|
|
|
List *tlist_vars = pull_var_clause((Node *) final_tlist,
|
2016-03-10 21:52:58 +01:00
|
|
|
PVC_RECURSE_AGGREGATES |
|
2016-03-10 22:23:40 +01:00
|
|
|
PVC_RECURSE_WINDOWFUNCS |
|
2009-04-19 21:46:33 +02:00
|
|
|
PVC_INCLUDE_PLACEHOLDERS);
|
2002-03-12 01:52:10 +01:00
|
|
|
|
2003-06-30 01:05:05 +02:00
|
|
|
if (tlist_vars != NIL)
|
|
|
|
{
|
2011-08-09 06:48:51 +02:00
|
|
|
add_vars_to_targetlist(root, tlist_vars, bms_make_singleton(0), true);
|
2004-05-31 01:40:41 +02:00
|
|
|
list_free(tlist_vars);
|
2003-06-30 01:05:05 +02:00
|
|
|
}
|
2016-01-08 02:23:57 +01:00
|
|
|
|
|
|
|
/*
|
2016-03-10 22:23:40 +01:00
|
|
|
* If there's a HAVING clause, we'll need the Vars it uses, too. Note
|
|
|
|
* that HAVING can contain Aggrefs but not WindowFuncs.
|
2016-01-08 02:23:57 +01:00
|
|
|
*/
|
|
|
|
if (root->parse->havingQual)
|
|
|
|
{
|
|
|
|
List *having_vars = pull_var_clause(root->parse->havingQual,
|
2016-03-10 21:52:58 +01:00
|
|
|
PVC_RECURSE_AGGREGATES |
|
2016-01-08 02:23:57 +01:00
|
|
|
PVC_INCLUDE_PLACEHOLDERS);
|
|
|
|
|
|
|
|
if (having_vars != NIL)
|
|
|
|
{
|
|
|
|
add_vars_to_targetlist(root, having_vars,
|
|
|
|
bms_make_singleton(0), true);
|
|
|
|
list_free(having_vars);
|
|
|
|
}
|
|
|
|
}
|
2002-03-12 01:52:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* add_vars_to_targetlist
|
|
|
|
* For each variable appearing in the list, add it to the owning
|
2003-06-30 01:05:05 +02:00
|
|
|
* relation's targetlist if not already present, and mark the variable
|
|
|
|
* as being needed for the indicated join (or for final output if
|
|
|
|
* where_needed includes "relation 0").
|
2008-10-21 22:42:53 +02:00
|
|
|
*
|
|
|
|
* The list may also contain PlaceHolderVars. These don't necessarily
|
|
|
|
* have a single owning relation; we keep their attr_needed info in
|
2011-08-09 06:48:51 +02:00
|
|
|
* root->placeholder_list instead. If create_new_ph is true, it's OK
|
2013-08-15 00:38:32 +02:00
|
|
|
* to create new PlaceHolderInfos; otherwise, the PlaceHolderInfos must
|
|
|
|
* already exist, and we should only update their ph_needed. (This should
|
|
|
|
* be true before deconstruct_jointree begins, and false after that.)
|
2002-03-12 01:52:10 +01:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
void
|
2011-08-09 06:48:51 +02:00
|
|
|
add_vars_to_targetlist(PlannerInfo *root, List *vars,
|
|
|
|
Relids where_needed, bool create_new_ph)
|
2002-03-12 01:52:10 +01:00
|
|
|
{
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *temp;
|
2002-03-12 01:52:10 +01:00
|
|
|
|
2003-06-30 01:05:05 +02:00
|
|
|
Assert(!bms_is_empty(where_needed));
|
|
|
|
|
2002-03-12 01:52:10 +01:00
|
|
|
foreach(temp, vars)
|
|
|
|
{
|
2008-10-21 22:42:53 +02:00
|
|
|
Node *node = (Node *) lfirst(temp);
|
|
|
|
|
|
|
|
if (IsA(node, Var))
|
|
|
|
{
|
|
|
|
Var *var = (Var *) node;
|
|
|
|
RelOptInfo *rel = find_base_rel(root, var->varno);
|
|
|
|
int attno = var->varattno;
|
2002-03-12 01:52:10 +01:00
|
|
|
|
2013-08-18 02:22:37 +02:00
|
|
|
if (bms_is_subset(where_needed, rel->relids))
|
|
|
|
continue;
|
2008-10-21 22:42:53 +02:00
|
|
|
Assert(attno >= rel->min_attr && attno <= rel->max_attr);
|
|
|
|
attno -= rel->min_attr;
|
|
|
|
if (rel->attr_needed[attno] == NULL)
|
|
|
|
{
|
Add an explicit representation of the output targetlist to Paths.
Up to now, there's been an assumption that all Paths for a given relation
compute the same output column set (targetlist). However, there are good
reasons to remove that assumption. For example, an indexscan on an
expression index might be able to return the value of an expensive function
"for free". While we have the ability to generate such a plan today in
simple cases, we don't have a way to model that it's cheaper than a plan
that computes the function from scratch, nor a way to create such a plan
in join cases (where the function computation would normally happen at
the topmost join node). Also, we need this so that we can have Paths
representing post-scan/join steps, where the targetlist may well change
from one step to the next. Therefore, invent a "struct PathTarget"
representing the columns we expect a plan step to emit. It's convenient
to include the output tuple width and tlist evaluation cost in this struct,
and there will likely be additional fields in future.
While Path nodes that actually do have custom outputs will need their own
PathTargets, it will still be true that most Paths for a given relation
will compute the same tlist. To reduce the overhead added by this patch,
keep a "default PathTarget" in RelOptInfo, and allow Paths that compute
that column set to just point to their parent RelOptInfo's reltarget.
(In the patch as committed, actually every Path is like that, since we
do not yet have any cases of custom PathTargets.)
I took this opportunity to provide some more-honest costing of
PlaceHolderVar evaluation. Up to now, the assumption that "scan/join
reltargetlists have cost zero" was applied not only to Vars, where it's
reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval
cost of a PlaceHolderVar's expression to the first plan level where it can
be computed, by including it in the PathTarget cost field and adding that
to the cost estimates for Paths. This isn't perfect yet but it's much
better than before, and there is a way forward to improve it more. This
costing change affects the join order chosen for a couple of the regression
tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
|
|
|
/* Variable not yet requested, so add to rel's targetlist */
|
2008-10-21 22:42:53 +02:00
|
|
|
/* XXX is copyObject necessary here? */
|
2016-03-14 21:59:59 +01:00
|
|
|
rel->reltarget->exprs = lappend(rel->reltarget->exprs,
|
|
|
|
copyObject(var));
|
Add an explicit representation of the output targetlist to Paths.
Up to now, there's been an assumption that all Paths for a given relation
compute the same output column set (targetlist). However, there are good
reasons to remove that assumption. For example, an indexscan on an
expression index might be able to return the value of an expensive function
"for free". While we have the ability to generate such a plan today in
simple cases, we don't have a way to model that it's cheaper than a plan
that computes the function from scratch, nor a way to create such a plan
in join cases (where the function computation would normally happen at
the topmost join node). Also, we need this so that we can have Paths
representing post-scan/join steps, where the targetlist may well change
from one step to the next. Therefore, invent a "struct PathTarget"
representing the columns we expect a plan step to emit. It's convenient
to include the output tuple width and tlist evaluation cost in this struct,
and there will likely be additional fields in future.
While Path nodes that actually do have custom outputs will need their own
PathTargets, it will still be true that most Paths for a given relation
will compute the same tlist. To reduce the overhead added by this patch,
keep a "default PathTarget" in RelOptInfo, and allow Paths that compute
that column set to just point to their parent RelOptInfo's reltarget.
(In the patch as committed, actually every Path is like that, since we
do not yet have any cases of custom PathTargets.)
I took this opportunity to provide some more-honest costing of
PlaceHolderVar evaluation. Up to now, the assumption that "scan/join
reltargetlists have cost zero" was applied not only to Vars, where it's
reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval
cost of a PlaceHolderVar's expression to the first plan level where it can
be computed, by including it in the PathTarget cost field and adding that
to the cost estimates for Paths. This isn't perfect yet but it's much
better than before, and there is a way forward to improve it more. This
costing change affects the join order chosen for a couple of the regression
tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
|
|
|
/* reltarget cost and width will be computed later */
|
2008-10-21 22:42:53 +02:00
|
|
|
}
|
|
|
|
rel->attr_needed[attno] = bms_add_members(rel->attr_needed[attno],
|
|
|
|
where_needed);
|
|
|
|
}
|
|
|
|
else if (IsA(node, PlaceHolderVar))
|
2003-06-30 01:05:05 +02:00
|
|
|
{
|
2008-10-21 22:42:53 +02:00
|
|
|
PlaceHolderVar *phv = (PlaceHolderVar *) node;
|
2011-08-09 06:48:51 +02:00
|
|
|
PlaceHolderInfo *phinfo = find_placeholder_info(root, phv,
|
|
|
|
create_new_ph);
|
2008-10-21 22:42:53 +02:00
|
|
|
|
|
|
|
phinfo->ph_needed = bms_add_members(phinfo->ph_needed,
|
|
|
|
where_needed);
|
2003-06-30 01:05:05 +02:00
|
|
|
}
|
2008-10-21 22:42:53 +02:00
|
|
|
else
|
|
|
|
elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
|
2002-03-12 01:52:10 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-08-27 04:48:55 +02:00
|
|
|
|
|
|
|
/*****************************************************************************
|
|
|
|
*
|
|
|
|
* LATERAL REFERENCES
|
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
2012-08-08 01:02:54 +02:00
|
|
|
/*
|
2012-08-27 04:48:55 +02:00
|
|
|
* find_lateral_references
|
|
|
|
* For each LATERAL subquery, extract all its references to Vars and
|
|
|
|
* PlaceHolderVars of the current query level, and make sure those values
|
|
|
|
* will be available for evaluation of the subquery.
|
2012-08-08 01:02:54 +02:00
|
|
|
*
|
2012-08-27 04:48:55 +02:00
|
|
|
* While later planning steps ensure that the Var/PHV source rels are on the
|
|
|
|
* outside of nestloops relative to the LATERAL subquery, we also need to
|
|
|
|
* ensure that the Vars/PHVs propagate up to the nestloop join level; this
|
|
|
|
* means setting suitable where_needed values for them.
|
|
|
|
*
|
2013-08-18 02:22:37 +02:00
|
|
|
* Note that this only deals with lateral references in unflattened LATERAL
|
2014-05-06 18:12:18 +02:00
|
|
|
* subqueries. When we flatten a LATERAL subquery, its lateral references
|
2013-08-18 02:22:37 +02:00
|
|
|
* become plain Vars in the parent query, but they may have to be wrapped in
|
|
|
|
* PlaceHolderVars if they need to be forced NULL by outer joins that don't
|
2014-05-06 18:12:18 +02:00
|
|
|
* also null the LATERAL subquery. That's all handled elsewhere.
|
2013-08-18 02:22:37 +02:00
|
|
|
*
|
2012-08-27 04:48:55 +02:00
|
|
|
* This has to run before deconstruct_jointree, since it might result in
|
2013-08-15 00:38:32 +02:00
|
|
|
* creation of PlaceHolderInfos.
|
2012-08-08 01:02:54 +02:00
|
|
|
*/
|
2012-08-27 04:48:55 +02:00
|
|
|
void
|
|
|
|
find_lateral_references(PlannerInfo *root)
|
|
|
|
{
|
|
|
|
Index rti;
|
|
|
|
|
|
|
|
/* We need do nothing if the query contains no LATERAL RTEs */
|
|
|
|
if (!root->hasLateralRTEs)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Examine all baserels (the rel array has been set up by now).
|
|
|
|
*/
|
|
|
|
for (rti = 1; rti < root->simple_rel_array_size; rti++)
|
|
|
|
{
|
|
|
|
RelOptInfo *brel = root->simple_rel_array[rti];
|
|
|
|
|
|
|
|
/* there may be empty slots corresponding to non-baserel RTEs */
|
|
|
|
if (brel == NULL)
|
|
|
|
continue;
|
|
|
|
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
Assert(brel->relid == rti); /* sanity check on array */
|
2012-08-27 04:48:55 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This bit is less obvious than it might look. We ignore appendrel
|
|
|
|
* otherrels and consider only their parent baserels. In a case where
|
|
|
|
* a LATERAL-containing UNION ALL subquery was pulled up, it is the
|
2013-08-18 02:22:37 +02:00
|
|
|
* otherrel that is actually going to be in the plan. However, we
|
|
|
|
* want to mark all its lateral references as needed by the parent,
|
2012-08-27 04:48:55 +02:00
|
|
|
* because it is the parent's relid that will be used for join
|
|
|
|
* planning purposes. And the parent's RTE will contain all the
|
2013-08-18 02:22:37 +02:00
|
|
|
* lateral references we need to know, since the pulled-up member is
|
|
|
|
* nothing but a copy of parts of the original RTE's subquery. We
|
|
|
|
* could visit the parent's children instead and transform their
|
|
|
|
* references back to the parent's relid, but it would be much more
|
|
|
|
* complicated for no real gain. (Important here is that the child
|
|
|
|
* members have not yet received any processing beyond being pulled
|
|
|
|
* up.) Similarly, in appendrels created by inheritance expansion,
|
|
|
|
* it's sufficient to look at the parent relation.
|
2012-08-27 04:48:55 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
/* ignore RTEs that are "other rels" */
|
|
|
|
if (brel->reloptkind != RELOPT_BASEREL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
extract_lateral_references(root, brel, rti);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-08-08 01:02:54 +02:00
|
|
|
static void
|
2012-08-27 04:48:55 +02:00
|
|
|
extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex)
|
2012-08-08 01:02:54 +02:00
|
|
|
{
|
|
|
|
RangeTblEntry *rte = root->simple_rte_array[rtindex];
|
|
|
|
List *vars;
|
|
|
|
List *newvars;
|
|
|
|
Relids where_needed;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/* No cross-references are possible if it's not LATERAL */
|
|
|
|
if (!rte->lateral)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Fetch the appropriate variables */
|
Redesign tablesample method API, and do extensive code review.
The original implementation of TABLESAMPLE modeled the tablesample method
API on index access methods, which wasn't a good choice because, without
specialized DDL commands, there's no way to build an extension that can
implement a TSM. (Raw inserts into system catalogs are not an acceptable
thing to do, because we can't undo them during DROP EXTENSION, nor will
pg_upgrade behave sanely.) Instead adopt an API more like procedural
language handlers or foreign data wrappers, wherein the only SQL-level
support object needed is a single handler function identified by having
a special return type. This lets us get rid of the supporting catalog
altogether, so that no custom DDL support is needed for the feature.
Adjust the API so that it can support non-constant tablesample arguments
(the original coding assumed we could evaluate the argument expressions at
ExecInitSampleScan time, which is undesirable even if it weren't outright
unsafe), and discourage sampling methods from looking at invisible tuples.
Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable
within and across queries, as required by the SQL standard, and deal more
honestly with methods that can't support that requirement.
Make a full code-review pass over the tablesample additions, and fix
assorted bugs, omissions, infelicities, and cosmetic issues (such as
failure to put the added code stanzas in a consistent ordering).
Improve EXPLAIN's output of tablesample plans, too.
Back-patch to 9.5 so that we don't have to support the original API
in production.
2015-07-25 20:39:00 +02:00
|
|
|
if (rte->rtekind == RTE_RELATION)
|
|
|
|
vars = pull_vars_of_level((Node *) rte->tablesample, 0);
|
|
|
|
else if (rte->rtekind == RTE_SUBQUERY)
|
2012-08-08 01:02:54 +02:00
|
|
|
vars = pull_vars_of_level((Node *) rte->subquery, 1);
|
|
|
|
else if (rte->rtekind == RTE_FUNCTION)
|
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
2013-11-22 01:37:02 +01:00
|
|
|
vars = pull_vars_of_level((Node *) rte->functions, 0);
|
2017-03-08 16:39:37 +01:00
|
|
|
else if (rte->rtekind == RTE_TABLEFUNC)
|
|
|
|
vars = pull_vars_of_level((Node *) rte->tablefunc, 0);
|
2012-08-12 22:01:26 +02:00
|
|
|
else if (rte->rtekind == RTE_VALUES)
|
|
|
|
vars = pull_vars_of_level((Node *) rte->values_lists, 0);
|
2012-08-08 01:02:54 +02:00
|
|
|
else
|
2012-08-27 04:48:55 +02:00
|
|
|
{
|
|
|
|
Assert(false);
|
|
|
|
return; /* keep compiler quiet */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vars == NIL)
|
|
|
|
return; /* nothing to do */
|
2012-08-08 01:02:54 +02:00
|
|
|
|
2012-08-18 20:10:17 +02:00
|
|
|
/* Copy each Var (or PlaceHolderVar) and adjust it to match our level */
|
2012-08-08 01:02:54 +02:00
|
|
|
newvars = NIL;
|
|
|
|
foreach(lc, vars)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
Node *node = (Node *) lfirst(lc);
|
2012-08-08 01:02:54 +02:00
|
|
|
|
2012-08-27 04:48:55 +02:00
|
|
|
node = copyObject(node);
|
|
|
|
if (IsA(node, Var))
|
2012-08-18 20:10:17 +02:00
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
Var *var = (Var *) node;
|
2012-08-27 04:48:55 +02:00
|
|
|
|
|
|
|
/* Adjustment is easy since it's just one node */
|
|
|
|
var->varlevelsup = 0;
|
2012-08-18 20:10:17 +02:00
|
|
|
}
|
2012-08-27 04:48:55 +02:00
|
|
|
else if (IsA(node, PlaceHolderVar))
|
2012-08-18 20:10:17 +02:00
|
|
|
{
|
2012-08-27 04:48:55 +02:00
|
|
|
PlaceHolderVar *phv = (PlaceHolderVar *) node;
|
2013-05-29 22:58:43 +02:00
|
|
|
int levelsup = phv->phlevelsup;
|
2012-08-27 04:48:55 +02:00
|
|
|
|
|
|
|
/* Have to work harder to adjust the contained expression too */
|
|
|
|
if (levelsup != 0)
|
|
|
|
IncrementVarSublevelsUp(node, -levelsup, 0);
|
|
|
|
|
2012-08-18 20:10:17 +02:00
|
|
|
/*
|
2012-08-27 04:48:55 +02:00
|
|
|
* If we pulled the PHV out of a subquery RTE, its expression
|
|
|
|
* needs to be preprocessed. subquery_planner() already did this
|
|
|
|
* for level-zero PHVs in function and values RTEs, though.
|
2012-08-18 20:10:17 +02:00
|
|
|
*/
|
2012-08-27 04:48:55 +02:00
|
|
|
if (levelsup > 0)
|
|
|
|
phv->phexpr = preprocess_phv_expression(root, phv->phexpr);
|
2012-08-18 20:10:17 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
Assert(false);
|
2012-08-27 04:48:55 +02:00
|
|
|
newvars = lappend(newvars, node);
|
2012-08-08 01:02:54 +02:00
|
|
|
}
|
|
|
|
|
2012-08-27 04:48:55 +02:00
|
|
|
list_free(vars);
|
|
|
|
|
2012-08-08 01:02:54 +02:00
|
|
|
/*
|
|
|
|
* We mark the Vars as being "needed" at the LATERAL RTE. This is a bit
|
|
|
|
* of a cheat: a more formal approach would be to mark each one as needed
|
2014-05-06 18:12:18 +02:00
|
|
|
* at the join of the LATERAL RTE with its source RTE. But it will work,
|
2012-08-08 01:02:54 +02:00
|
|
|
* and it's much less tedious than computing a separate where_needed for
|
|
|
|
* each Var.
|
|
|
|
*/
|
|
|
|
where_needed = bms_make_singleton(rtindex);
|
|
|
|
|
2013-08-18 02:22:37 +02:00
|
|
|
/*
|
|
|
|
* Push Vars into their source relations' targetlists, and PHVs into
|
|
|
|
* root->placeholder_list.
|
|
|
|
*/
|
2012-08-27 04:48:55 +02:00
|
|
|
add_vars_to_targetlist(root, newvars, where_needed, true);
|
2012-08-08 01:02:54 +02:00
|
|
|
|
2012-08-27 04:48:55 +02:00
|
|
|
/* Remember the lateral references for create_lateral_join_info */
|
|
|
|
brel->lateral_vars = newvars;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* create_lateral_join_info
|
2015-12-11 21:52:16 +01:00
|
|
|
* Fill in the per-base-relation direct_lateral_relids, lateral_relids
|
|
|
|
* and lateral_referencers sets.
|
2012-08-27 04:48:55 +02:00
|
|
|
*
|
|
|
|
* This has to run after deconstruct_jointree, because we need to know the
|
2013-08-18 02:22:37 +02:00
|
|
|
* final ph_eval_at values for PlaceHolderVars.
|
2012-08-27 04:48:55 +02:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
create_lateral_join_info(PlannerInfo *root)
|
|
|
|
{
|
2015-12-11 21:52:16 +01:00
|
|
|
bool found_laterals = false;
|
2012-08-27 04:48:55 +02:00
|
|
|
Index rti;
|
2013-08-18 02:22:37 +02:00
|
|
|
ListCell *lc;
|
2012-08-27 04:48:55 +02:00
|
|
|
|
|
|
|
/* We need do nothing if the query contains no LATERAL RTEs */
|
|
|
|
if (!root->hasLateralRTEs)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Examine all baserels (the rel array has been set up by now).
|
|
|
|
*/
|
|
|
|
for (rti = 1; rti < root->simple_rel_array_size; rti++)
|
|
|
|
{
|
|
|
|
RelOptInfo *brel = root->simple_rel_array[rti];
|
|
|
|
Relids lateral_relids;
|
|
|
|
|
|
|
|
/* there may be empty slots corresponding to non-baserel RTEs */
|
|
|
|
if (brel == NULL)
|
|
|
|
continue;
|
|
|
|
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
Assert(brel->relid == rti); /* sanity check on array */
|
2012-08-27 04:48:55 +02:00
|
|
|
|
|
|
|
/* ignore RTEs that are "other rels" */
|
|
|
|
if (brel->reloptkind != RELOPT_BASEREL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
lateral_relids = NULL;
|
|
|
|
|
|
|
|
/* consider each laterally-referenced Var or PHV */
|
|
|
|
foreach(lc, brel->lateral_vars)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
Node *node = (Node *) lfirst(lc);
|
2012-08-27 04:48:55 +02:00
|
|
|
|
|
|
|
if (IsA(node, Var))
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
Var *var = (Var *) node;
|
2012-08-27 04:48:55 +02:00
|
|
|
|
2015-12-11 21:52:16 +01:00
|
|
|
found_laterals = true;
|
2012-08-27 04:48:55 +02:00
|
|
|
lateral_relids = bms_add_member(lateral_relids,
|
|
|
|
var->varno);
|
|
|
|
}
|
|
|
|
else if (IsA(node, PlaceHolderVar))
|
|
|
|
{
|
|
|
|
PlaceHolderVar *phv = (PlaceHolderVar *) node;
|
|
|
|
PlaceHolderInfo *phinfo = find_placeholder_info(root, phv,
|
|
|
|
false);
|
|
|
|
|
2015-12-11 21:52:16 +01:00
|
|
|
found_laterals = true;
|
2012-08-27 04:48:55 +02:00
|
|
|
lateral_relids = bms_add_members(lateral_relids,
|
|
|
|
phinfo->ph_eval_at);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
Assert(false);
|
|
|
|
}
|
|
|
|
|
2015-12-11 21:52:16 +01:00
|
|
|
/* We now have all the simple lateral refs from this rel */
|
|
|
|
brel->direct_lateral_relids = lateral_relids;
|
|
|
|
brel->lateral_relids = bms_copy(lateral_relids);
|
2013-08-18 02:22:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2015-12-11 21:52:16 +01:00
|
|
|
* Now check for lateral references within PlaceHolderVars, and mark their
|
|
|
|
* eval_at rels as having lateral references to the source rels.
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
*
|
2015-12-11 21:52:16 +01:00
|
|
|
* For a PHV that is due to be evaluated at a baserel, mark its source(s)
|
|
|
|
* as direct lateral dependencies of the baserel (adding onto the ones
|
|
|
|
* recorded above). If it's due to be evaluated at a join, mark its
|
|
|
|
* source(s) as indirect lateral dependencies of each baserel in the join,
|
|
|
|
* ie put them into lateral_relids but not direct_lateral_relids. This is
|
|
|
|
* appropriate because we can't put any such baserel on the outside of a
|
|
|
|
* join to one of the PHV's lateral dependencies, but on the other hand we
|
|
|
|
* also can't yet join it directly to the dependency.
|
2013-08-18 02:22:37 +02:00
|
|
|
*/
|
|
|
|
foreach(lc, root->placeholder_list)
|
|
|
|
{
|
|
|
|
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
|
|
|
|
Relids eval_at = phinfo->ph_eval_at;
|
2015-12-11 21:52:16 +01:00
|
|
|
int varno;
|
2013-08-18 02:22:37 +02:00
|
|
|
|
2015-12-11 21:52:16 +01:00
|
|
|
if (phinfo->ph_lateral == NULL)
|
|
|
|
continue; /* PHV is uninteresting if no lateral refs */
|
2013-08-18 02:22:37 +02:00
|
|
|
|
2015-12-11 21:52:16 +01:00
|
|
|
found_laterals = true;
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
|
2015-12-11 21:52:16 +01:00
|
|
|
if (bms_get_singleton_member(eval_at, &varno))
|
|
|
|
{
|
|
|
|
/* Evaluation site is a baserel */
|
|
|
|
RelOptInfo *brel = find_base_rel(root, varno);
|
|
|
|
|
|
|
|
brel->direct_lateral_relids =
|
|
|
|
bms_add_members(brel->direct_lateral_relids,
|
|
|
|
phinfo->ph_lateral);
|
|
|
|
brel->lateral_relids =
|
|
|
|
bms_add_members(brel->lateral_relids,
|
|
|
|
phinfo->ph_lateral);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Evaluation site is a join */
|
|
|
|
varno = -1;
|
|
|
|
while ((varno = bms_next_member(eval_at, varno)) >= 0)
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
{
|
2015-12-11 21:52:16 +01:00
|
|
|
RelOptInfo *brel = find_base_rel(root, varno);
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
|
|
|
|
brel->lateral_relids = bms_add_members(brel->lateral_relids,
|
|
|
|
phinfo->ph_lateral);
|
|
|
|
}
|
2013-08-18 02:22:37 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-11 21:52:16 +01:00
|
|
|
/*
|
|
|
|
* If we found no actual lateral references, we're done; but reset the
|
|
|
|
* hasLateralRTEs flag to avoid useless work later.
|
|
|
|
*/
|
|
|
|
if (!found_laterals)
|
|
|
|
{
|
|
|
|
root->hasLateralRTEs = false;
|
2013-08-18 02:22:37 +02:00
|
|
|
return;
|
2015-12-11 21:52:16 +01:00
|
|
|
}
|
2013-08-18 02:22:37 +02:00
|
|
|
|
|
|
|
/*
|
2015-12-11 21:52:16 +01:00
|
|
|
* Calculate the transitive closure of the lateral_relids sets, so that
|
|
|
|
* they describe both direct and indirect lateral references. If relation
|
|
|
|
* X references Y laterally, and Y references Z laterally, then we will
|
|
|
|
* have to scan X on the inside of a nestloop with Z, so for all intents
|
|
|
|
* and purposes X is laterally dependent on Z too.
|
2013-08-18 02:22:37 +02:00
|
|
|
*
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
* This code is essentially Warshall's algorithm for transitive closure.
|
|
|
|
* The outer loop considers each baserel, and propagates its lateral
|
|
|
|
* dependencies to those baserels that have a lateral dependency on it.
|
2013-08-18 02:22:37 +02:00
|
|
|
*/
|
|
|
|
for (rti = 1; rti < root->simple_rel_array_size; rti++)
|
|
|
|
{
|
|
|
|
RelOptInfo *brel = root->simple_rel_array[rti];
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
Relids outer_lateral_relids;
|
|
|
|
Index rti2;
|
2013-08-18 02:22:37 +02:00
|
|
|
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
if (brel == NULL || brel->reloptkind != RELOPT_BASEREL)
|
2013-08-18 02:22:37 +02:00
|
|
|
continue;
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
|
|
|
|
/* need not consider baserel further if it has no lateral refs */
|
|
|
|
outer_lateral_relids = brel->lateral_relids;
|
|
|
|
if (outer_lateral_relids == NULL)
|
2013-08-18 02:22:37 +02:00
|
|
|
continue;
|
|
|
|
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
/* else scan all baserels */
|
|
|
|
for (rti2 = 1; rti2 < root->simple_rel_array_size; rti2++)
|
2013-08-18 02:22:37 +02:00
|
|
|
{
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
RelOptInfo *brel2 = root->simple_rel_array[rti2];
|
|
|
|
|
|
|
|
if (brel2 == NULL || brel2->reloptkind != RELOPT_BASEREL)
|
|
|
|
continue;
|
2013-08-18 02:22:37 +02:00
|
|
|
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
/* if brel2 has lateral ref to brel, propagate brel's refs */
|
|
|
|
if (bms_is_member(rti, brel2->lateral_relids))
|
|
|
|
brel2->lateral_relids = bms_add_members(brel2->lateral_relids,
|
|
|
|
outer_lateral_relids);
|
2013-08-18 02:22:37 +02:00
|
|
|
}
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now that we've identified all lateral references, mark each baserel
|
|
|
|
* with the set of relids of rels that reference it laterally (possibly
|
|
|
|
* indirectly) --- that is, the inverse mapping of lateral_relids.
|
|
|
|
*/
|
|
|
|
for (rti = 1; rti < root->simple_rel_array_size; rti++)
|
|
|
|
{
|
|
|
|
RelOptInfo *brel = root->simple_rel_array[rti];
|
|
|
|
Relids lateral_relids;
|
|
|
|
int rti2;
|
|
|
|
|
|
|
|
if (brel == NULL || brel->reloptkind != RELOPT_BASEREL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Nothing to do at rels with no lateral refs */
|
|
|
|
lateral_relids = brel->lateral_relids;
|
|
|
|
if (lateral_relids == NULL)
|
|
|
|
continue;
|
2012-08-27 04:48:55 +02:00
|
|
|
|
|
|
|
/*
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
* We should not have broken the invariant that lateral_relids is
|
|
|
|
* exactly NULL if empty.
|
2012-08-27 04:48:55 +02:00
|
|
|
*/
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
Assert(!bms_is_empty(lateral_relids));
|
|
|
|
|
|
|
|
/* Also, no rel should have a lateral dependency on itself */
|
|
|
|
Assert(!bms_is_member(rti, lateral_relids));
|
|
|
|
|
|
|
|
/* Mark this rel's referencees */
|
|
|
|
rti2 = -1;
|
|
|
|
while ((rti2 = bms_next_member(lateral_relids, rti2)) >= 0)
|
|
|
|
{
|
|
|
|
RelOptInfo *brel2 = root->simple_rel_array[rti2];
|
|
|
|
|
|
|
|
Assert(brel2 != NULL && brel2->reloptkind == RELOPT_BASEREL);
|
|
|
|
brel2->lateral_referencers =
|
|
|
|
bms_add_member(brel2->lateral_referencers, rti);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lastly, propagate lateral_relids and lateral_referencers from appendrel
|
|
|
|
* parent rels to their child rels. We intentionally give each child rel
|
|
|
|
* the same minimum parameterization, even though it's quite possible that
|
|
|
|
* some don't reference all the lateral rels. This is because any append
|
|
|
|
* path for the parent will have to have the same parameterization for
|
|
|
|
* every child anyway, and there's no value in forcing extra
|
|
|
|
* reparameterize_path() calls. Similarly, a lateral reference to the
|
|
|
|
* parent prevents use of otherwise-movable join rels for each child.
|
|
|
|
*/
|
|
|
|
for (rti = 1; rti < root->simple_rel_array_size; rti++)
|
|
|
|
{
|
|
|
|
RelOptInfo *brel = root->simple_rel_array[rti];
|
2017-09-14 21:41:08 +02:00
|
|
|
RangeTblEntry *brte = root->simple_rte_array[rti];
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
|
2017-09-20 16:20:10 +02:00
|
|
|
/*
|
|
|
|
* Skip empty slots. Also skip non-simple relations i.e. dead
|
|
|
|
* relations.
|
|
|
|
*/
|
|
|
|
if (brel == NULL || !IS_SIMPLE_REL(brel))
|
2017-09-14 21:41:08 +02:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In the case of table inheritance, the parent RTE is directly linked
|
|
|
|
* to every child table via an AppendRelInfo. In the case of table
|
|
|
|
* partitioning, the inheritance hierarchy is expanded one level at a
|
|
|
|
* time rather than flattened. Therefore, an other member rel that is
|
|
|
|
* a partitioned table may have children of its own, and must
|
|
|
|
* therefore be marked with the appropriate lateral info so that those
|
|
|
|
* children eventually get marked also.
|
|
|
|
*/
|
|
|
|
Assert(brte);
|
|
|
|
if (brel->reloptkind == RELOPT_OTHER_MEMBER_REL &&
|
|
|
|
(brte->rtekind != RTE_RELATION ||
|
|
|
|
brte->relkind != RELKIND_PARTITIONED_TABLE))
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
continue;
|
|
|
|
|
2017-09-14 21:41:08 +02:00
|
|
|
if (brte->inh)
|
2012-08-27 04:48:55 +02:00
|
|
|
{
|
|
|
|
foreach(lc, root->append_rel_list)
|
|
|
|
{
|
|
|
|
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
|
|
|
|
RelOptInfo *childrel;
|
|
|
|
|
|
|
|
if (appinfo->parent_relid != rti)
|
|
|
|
continue;
|
|
|
|
childrel = root->simple_rel_array[appinfo->child_relid];
|
|
|
|
Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
|
2015-12-11 21:52:16 +01:00
|
|
|
Assert(childrel->direct_lateral_relids == NULL);
|
|
|
|
childrel->direct_lateral_relids = brel->direct_lateral_relids;
|
2012-08-27 04:48:55 +02:00
|
|
|
Assert(childrel->lateral_relids == NULL);
|
2013-08-18 02:22:37 +02:00
|
|
|
childrel->lateral_relids = brel->lateral_relids;
|
|
|
|
Assert(childrel->lateral_referencers == NULL);
|
|
|
|
childrel->lateral_referencers = brel->lateral_referencers;
|
2012-08-27 04:48:55 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-03-12 01:52:10 +01:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
*
|
2005-12-20 03:30:36 +01:00
|
|
|
* JOIN TREE PROCESSING
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
/*
|
2005-12-20 03:30:36 +01:00
|
|
|
* deconstruct_jointree
|
2000-09-29 20:21:41 +02:00
|
|
|
* Recursively scan the query's join tree for WHERE and JOIN/ON qual
|
2005-06-09 06:19:00 +02:00
|
|
|
* clauses, and add these to the appropriate restrictinfo and joininfo
|
2014-05-06 18:12:18 +02:00
|
|
|
* lists belonging to base RelOptInfos. Also, add SpecialJoinInfo nodes
|
2008-08-14 20:48:00 +02:00
|
|
|
* to root->join_info_list for any outer joins appearing in the query tree.
|
2005-12-20 03:30:36 +01:00
|
|
|
* Return a "joinlist" data structure showing the join order decisions
|
|
|
|
* that need to be made by make_one_rel().
|
2000-09-12 23:07:18 +02:00
|
|
|
*
|
2005-12-20 03:30:36 +01:00
|
|
|
* The "joinlist" result is a list of items that are either RangeTblRef
|
|
|
|
* jointree nodes or sub-joinlists. All the items at the same level of
|
|
|
|
* joinlist must be joined in an order to be determined by make_one_rel()
|
2008-08-14 20:48:00 +02:00
|
|
|
* (note that legal orders may be constrained by SpecialJoinInfo nodes).
|
2005-12-20 03:30:36 +01:00
|
|
|
* A sub-joinlist represents a subproblem to be planned separately. Currently
|
|
|
|
* sub-joinlists arise only from FULL OUTER JOIN or when collapsing of
|
|
|
|
* subproblems is stopped by join_collapse_limit or from_collapse_limit.
|
2005-09-28 23:17:02 +02:00
|
|
|
*
|
2000-09-12 23:07:18 +02:00
|
|
|
* NOTE: when dealing with inner joins, it is appropriate to let a qual clause
|
|
|
|
* be evaluated at the lowest level where all the variables it mentions are
|
2000-09-29 20:21:41 +02:00
|
|
|
* available. However, we cannot push a qual down into the nullable side(s)
|
|
|
|
* of an outer join since the qual might eliminate matching rows and cause a
|
2014-05-06 18:12:18 +02:00
|
|
|
* NULL row to be incorrectly emitted by the join. Therefore, we artificially
|
2005-12-20 03:30:36 +01:00
|
|
|
* OR the minimum-relids of such an outer join into the required_relids of
|
2014-05-06 18:12:18 +02:00
|
|
|
* clauses appearing above it. This forces those clauses to be delayed until
|
2005-12-20 03:30:36 +01:00
|
|
|
* application of the outer join (or maybe even higher in the join tree).
|
|
|
|
*/
|
|
|
|
List *
|
|
|
|
deconstruct_jointree(PlannerInfo *root)
|
|
|
|
{
|
2013-08-19 19:19:25 +02:00
|
|
|
List *result;
|
2005-12-20 03:30:36 +01:00
|
|
|
Relids qualscope;
|
2007-08-31 03:44:06 +02:00
|
|
|
Relids inner_join_rels;
|
2013-08-19 19:19:25 +02:00
|
|
|
List *postponed_qual_list = NIL;
|
2005-12-20 03:30:36 +01:00
|
|
|
|
|
|
|
/* Start recursion at top of jointree */
|
|
|
|
Assert(root->parse->jointree != NULL &&
|
|
|
|
IsA(root->parse->jointree, FromExpr));
|
|
|
|
|
Compute correct em_nullable_relids in get_eclass_for_sort_expr().
Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr
must be able to compute valid em_nullable_relids for any new equivalence
class members it creates. I'd worried about this in the commit message
for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a
problem because multi-member ECs should already exist when it runs. That
is transparently wrong, though, because this function is also called by
initialize_mergeclause_eclasses, which runs during deconstruct_jointree.
The example given in the bug report (which the new regression test item
is based upon) fails because the COALESCE() expression is first seen by
initialize_mergeclause_eclasses rather than process_equivalence.
Fixing this requires passing the appropriate nullable_relids set to
get_eclass_for_sort_expr, and it requires new code to compute that set
for top-level expressions such as ORDER BY, GROUP BY, etc. We store
the top-level nullable_relids in a new field in PlannerInfo to avoid
computing it many times. In the back branches, I've added the new
field at the end of the struct to minimize ABI breakage for planner
plugins. There doesn't seem to be a good alternative to changing
get_eclass_for_sort_expr's API signature, though. There probably aren't
any third-party extensions calling that function directly; moreover,
if there are, they probably need to think about what to pass for
nullable_relids anyway.
Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
|
|
|
/* this is filled as we scan the jointree */
|
|
|
|
root->nullable_baserels = NULL;
|
|
|
|
|
2013-08-19 19:19:25 +02:00
|
|
|
result = deconstruct_recurse(root, (Node *) root->parse->jointree, false,
|
|
|
|
&qualscope, &inner_join_rels,
|
|
|
|
&postponed_qual_list);
|
|
|
|
|
|
|
|
/* Shouldn't be any leftover quals */
|
|
|
|
Assert(postponed_qual_list == NIL);
|
|
|
|
|
|
|
|
return result;
|
2005-12-20 03:30:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* deconstruct_recurse
|
|
|
|
* One recursion level of deconstruct_jointree processing.
|
2000-09-12 23:07:18 +02:00
|
|
|
*
|
2005-12-20 03:30:36 +01:00
|
|
|
* Inputs:
|
|
|
|
* jtnode is the jointree node to examine
|
2017-08-16 06:22:32 +02:00
|
|
|
* below_outer_join is true if this node is within the nullable side of a
|
2005-12-20 03:30:36 +01:00
|
|
|
* higher-level outer join
|
|
|
|
* Outputs:
|
|
|
|
* *qualscope gets the set of base Relids syntactically included in this
|
|
|
|
* jointree node (do not modify or free this, as it may also be pointed
|
2008-08-14 20:48:00 +02:00
|
|
|
* to by RestrictInfo and SpecialJoinInfo nodes)
|
2007-08-31 03:44:06 +02:00
|
|
|
* *inner_join_rels gets the set of base Relids syntactically included in
|
|
|
|
* inner joins appearing at or below this jointree node (do not modify
|
|
|
|
* or free this, either)
|
2013-08-19 19:19:25 +02:00
|
|
|
* *postponed_qual_list is a list of PostponedQual structs, which we can
|
|
|
|
* add quals to if they turn out to belong to a higher join level
|
2005-12-20 03:30:36 +01:00
|
|
|
* Return value is the appropriate joinlist for this jointree node
|
|
|
|
*
|
2008-08-14 20:48:00 +02:00
|
|
|
* In addition, entries will be added to root->join_info_list for outer joins.
|
2000-09-12 23:07:18 +02:00
|
|
|
*/
|
2005-12-20 03:30:36 +01:00
|
|
|
static List *
|
|
|
|
deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
|
2013-08-19 19:19:25 +02:00
|
|
|
Relids *qualscope, Relids *inner_join_rels,
|
|
|
|
List **postponed_qual_list)
|
2000-09-12 23:07:18 +02:00
|
|
|
{
|
2005-12-20 03:30:36 +01:00
|
|
|
List *joinlist;
|
2000-09-12 23:07:18 +02:00
|
|
|
|
|
|
|
if (jtnode == NULL)
|
2005-12-20 03:30:36 +01:00
|
|
|
{
|
|
|
|
*qualscope = NULL;
|
2007-08-31 03:44:06 +02:00
|
|
|
*inner_join_rels = NULL;
|
2005-12-20 03:30:36 +01:00
|
|
|
return NIL;
|
|
|
|
}
|
2000-09-29 20:21:41 +02:00
|
|
|
if (IsA(jtnode, RangeTblRef))
|
2000-09-12 23:07:18 +02:00
|
|
|
{
|
2000-09-29 20:21:41 +02:00
|
|
|
int varno = ((RangeTblRef *) jtnode)->rtindex;
|
|
|
|
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
/* qualscope is just the one RTE */
|
2005-12-20 03:30:36 +01:00
|
|
|
*qualscope = bms_make_singleton(varno);
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
/* Deal with any securityQuals attached to the RTE */
|
|
|
|
if (root->qual_security_level > 0)
|
|
|
|
process_security_barrier_quals(root,
|
|
|
|
varno,
|
|
|
|
*qualscope,
|
|
|
|
below_outer_join);
|
2007-08-31 03:44:06 +02:00
|
|
|
/* A single baserel does not create an inner join */
|
|
|
|
*inner_join_rels = NULL;
|
2005-12-20 03:30:36 +01:00
|
|
|
joinlist = list_make1(jtnode);
|
2000-09-29 20:21:41 +02:00
|
|
|
}
|
|
|
|
else if (IsA(jtnode, FromExpr))
|
|
|
|
{
|
|
|
|
FromExpr *f = (FromExpr *) jtnode;
|
2013-08-19 19:19:25 +02:00
|
|
|
List *child_postponed_quals = NIL;
|
2005-12-20 03:30:36 +01:00
|
|
|
int remaining;
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *l;
|
2000-09-12 23:07:18 +02:00
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* First, recurse to handle child joins. We collapse subproblems into
|
|
|
|
* a single joinlist whenever the resulting joinlist wouldn't exceed
|
|
|
|
* from_collapse_limit members. Also, always collapse one-element
|
|
|
|
* subproblems, since that won't lengthen the joinlist anyway.
|
2000-09-12 23:07:18 +02:00
|
|
|
*/
|
2005-12-20 03:30:36 +01:00
|
|
|
*qualscope = NULL;
|
2007-08-31 03:44:06 +02:00
|
|
|
*inner_join_rels = NULL;
|
2005-12-20 03:30:36 +01:00
|
|
|
joinlist = NIL;
|
|
|
|
remaining = list_length(f->fromlist);
|
2000-09-29 20:21:41 +02:00
|
|
|
foreach(l, f->fromlist)
|
|
|
|
{
|
2006-10-04 02:30:14 +02:00
|
|
|
Relids sub_qualscope;
|
|
|
|
List *sub_joinlist;
|
|
|
|
int sub_members;
|
2005-12-20 03:30:36 +01:00
|
|
|
|
|
|
|
sub_joinlist = deconstruct_recurse(root, lfirst(l),
|
|
|
|
below_outer_join,
|
2007-08-31 03:44:06 +02:00
|
|
|
&sub_qualscope,
|
2013-08-19 19:19:25 +02:00
|
|
|
inner_join_rels,
|
|
|
|
&child_postponed_quals);
|
2005-12-20 03:30:36 +01:00
|
|
|
*qualscope = bms_add_members(*qualscope, sub_qualscope);
|
|
|
|
sub_members = list_length(sub_joinlist);
|
|
|
|
remaining--;
|
|
|
|
if (sub_members <= 1 ||
|
|
|
|
list_length(joinlist) + sub_members + remaining <= from_collapse_limit)
|
|
|
|
joinlist = list_concat(joinlist, sub_joinlist);
|
|
|
|
else
|
|
|
|
joinlist = lappend(joinlist, sub_joinlist);
|
2000-09-29 20:21:41 +02:00
|
|
|
}
|
2000-09-12 23:07:18 +02:00
|
|
|
|
2007-08-31 03:44:06 +02:00
|
|
|
/*
|
|
|
|
* A FROM with more than one list element is an inner join subsuming
|
2007-11-15 22:14:46 +01:00
|
|
|
* all below it, so we should report inner_join_rels = qualscope. If
|
|
|
|
* there was exactly one element, we should (and already did) report
|
|
|
|
* whatever its inner_join_rels were. If there were no elements (is
|
|
|
|
* that possible?) the initialization before the loop fixed it.
|
2007-08-31 03:44:06 +02:00
|
|
|
*/
|
|
|
|
if (list_length(f->fromlist) > 1)
|
|
|
|
*inner_join_rels = *qualscope;
|
|
|
|
|
2013-08-19 19:19:25 +02:00
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Try to process any quals postponed by children. If they need
|
2013-08-19 19:19:25 +02:00
|
|
|
* further postponement, add them to my output postponed_qual_list.
|
|
|
|
*/
|
|
|
|
foreach(l, child_postponed_quals)
|
|
|
|
{
|
|
|
|
PostponedQual *pq = (PostponedQual *) lfirst(l);
|
|
|
|
|
|
|
|
if (bms_is_subset(pq->relids, *qualscope))
|
|
|
|
distribute_qual_to_rels(root, pq->qual,
|
|
|
|
false, below_outer_join, JOIN_INNER,
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
root->qual_security_level,
|
2013-08-19 19:19:25 +02:00
|
|
|
*qualscope, NULL, NULL, NULL,
|
|
|
|
NULL);
|
|
|
|
else
|
|
|
|
*postponed_qual_list = lappend(*postponed_qual_list, pq);
|
|
|
|
}
|
|
|
|
|
2000-09-29 20:21:41 +02:00
|
|
|
/*
|
2007-02-16 21:57:19 +01:00
|
|
|
* Now process the top-level quals.
|
2000-09-29 20:21:41 +02:00
|
|
|
*/
|
2004-05-26 06:41:50 +02:00
|
|
|
foreach(l, (List *) f->quals)
|
2008-08-14 20:48:00 +02:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
Node *qual = (Node *) lfirst(l);
|
2008-08-14 20:48:00 +02:00
|
|
|
|
2009-02-25 04:30:38 +01:00
|
|
|
distribute_qual_to_rels(root, qual,
|
|
|
|
false, below_outer_join, JOIN_INNER,
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
root->qual_security_level,
|
2013-08-19 19:19:25 +02:00
|
|
|
*qualscope, NULL, NULL, NULL,
|
|
|
|
postponed_qual_list);
|
2008-08-14 20:48:00 +02:00
|
|
|
}
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
|
|
|
else if (IsA(jtnode, JoinExpr))
|
|
|
|
{
|
|
|
|
JoinExpr *j = (JoinExpr *) jtnode;
|
2013-08-19 19:19:25 +02:00
|
|
|
List *child_postponed_quals = NIL;
|
2000-09-12 23:07:18 +02:00
|
|
|
Relids leftids,
|
2003-03-03 00:46:34 +01:00
|
|
|
rightids,
|
2007-08-31 03:44:06 +02:00
|
|
|
left_inners,
|
|
|
|
right_inners,
|
2003-03-03 00:46:34 +01:00
|
|
|
nonnullable_rels,
|
Compute correct em_nullable_relids in get_eclass_for_sort_expr().
Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr
must be able to compute valid em_nullable_relids for any new equivalence
class members it creates. I'd worried about this in the commit message
for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a
problem because multi-member ECs should already exist when it runs. That
is transparently wrong, though, because this function is also called by
initialize_mergeclause_eclasses, which runs during deconstruct_jointree.
The example given in the bug report (which the new regression test item
is based upon) fails because the COALESCE() expression is first seen by
initialize_mergeclause_eclasses rather than process_equivalence.
Fixing this requires passing the appropriate nullable_relids set to
get_eclass_for_sort_expr, and it requires new code to compute that set
for top-level expressions such as ORDER BY, GROUP BY, etc. We store
the top-level nullable_relids in a new field in PlannerInfo to avoid
computing it many times. In the back branches, I've added the new
field at the end of the struct to minimize ABI breakage for planner
plugins. There doesn't seem to be a good alternative to changing
get_eclass_for_sort_expr's API signature, though. There probably aren't
any third-party extensions calling that function directly; moreover,
if there are, they probably need to think about what to pass for
nullable_relids anyway.
Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
|
|
|
nullable_rels,
|
2005-12-20 03:30:36 +01:00
|
|
|
ojscope;
|
|
|
|
List *leftjoinlist,
|
|
|
|
*rightjoinlist;
|
2014-01-30 20:51:16 +01:00
|
|
|
List *my_quals;
|
2008-08-14 20:48:00 +02:00
|
|
|
SpecialJoinInfo *sjinfo;
|
|
|
|
ListCell *l;
|
2000-09-12 23:07:18 +02:00
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Order of operations here is subtle and critical. First we recurse
|
|
|
|
* to handle sub-JOINs. Their join quals will be placed without
|
|
|
|
* regard for whether this level is an outer join, which is correct.
|
|
|
|
* Then we place our own join quals, which are restricted by lower
|
|
|
|
* outer joins in any case, and are forced to this level if this is an
|
2014-05-06 18:12:18 +02:00
|
|
|
* outer join and they mention the outer side. Finally, if this is an
|
2008-08-14 20:48:00 +02:00
|
|
|
* outer join, we create a join_info_list entry for the join. This
|
2005-12-20 03:30:36 +01:00
|
|
|
* will prevent quals above us in the join tree that use those rels
|
|
|
|
* from being pushed down below this level. (It's okay for upper
|
|
|
|
* quals to be pushed down to the outer side, however.)
|
2000-09-12 23:07:18 +02:00
|
|
|
*/
|
|
|
|
switch (j->jointype)
|
|
|
|
{
|
|
|
|
case JOIN_INNER:
|
2005-12-20 03:30:36 +01:00
|
|
|
leftjoinlist = deconstruct_recurse(root, j->larg,
|
|
|
|
below_outer_join,
|
2013-08-19 19:19:25 +02:00
|
|
|
&leftids, &left_inners,
|
|
|
|
&child_postponed_quals);
|
2005-12-20 03:30:36 +01:00
|
|
|
rightjoinlist = deconstruct_recurse(root, j->rarg,
|
|
|
|
below_outer_join,
|
2013-08-19 19:19:25 +02:00
|
|
|
&rightids, &right_inners,
|
|
|
|
&child_postponed_quals);
|
2005-12-20 03:30:36 +01:00
|
|
|
*qualscope = bms_union(leftids, rightids);
|
2007-08-31 03:44:06 +02:00
|
|
|
*inner_join_rels = *qualscope;
|
2000-09-12 23:07:18 +02:00
|
|
|
/* Inner join adds no restrictions for quals */
|
2005-09-28 23:17:02 +02:00
|
|
|
nonnullable_rels = NULL;
|
Compute correct em_nullable_relids in get_eclass_for_sort_expr().
Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr
must be able to compute valid em_nullable_relids for any new equivalence
class members it creates. I'd worried about this in the commit message
for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a
problem because multi-member ECs should already exist when it runs. That
is transparently wrong, though, because this function is also called by
initialize_mergeclause_eclasses, which runs during deconstruct_jointree.
The example given in the bug report (which the new regression test item
is based upon) fails because the COALESCE() expression is first seen by
initialize_mergeclause_eclasses rather than process_equivalence.
Fixing this requires passing the appropriate nullable_relids set to
get_eclass_for_sort_expr, and it requires new code to compute that set
for top-level expressions such as ORDER BY, GROUP BY, etc. We store
the top-level nullable_relids in a new field in PlannerInfo to avoid
computing it many times. In the back branches, I've added the new
field at the end of the struct to minimize ABI breakage for planner
plugins. There doesn't seem to be a good alternative to changing
get_eclass_for_sort_expr's API signature, though. There probably aren't
any third-party extensions calling that function directly; moreover,
if there are, they probably need to think about what to pass for
nullable_relids anyway.
Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
|
|
|
/* and it doesn't force anything to null, either */
|
|
|
|
nullable_rels = NULL;
|
2000-09-12 23:07:18 +02:00
|
|
|
break;
|
|
|
|
case JOIN_LEFT:
|
2008-08-14 20:48:00 +02:00
|
|
|
case JOIN_ANTI:
|
2005-12-20 03:30:36 +01:00
|
|
|
leftjoinlist = deconstruct_recurse(root, j->larg,
|
|
|
|
below_outer_join,
|
2013-08-19 19:19:25 +02:00
|
|
|
&leftids, &left_inners,
|
|
|
|
&child_postponed_quals);
|
2005-12-20 03:30:36 +01:00
|
|
|
rightjoinlist = deconstruct_recurse(root, j->rarg,
|
|
|
|
true,
|
2013-08-19 19:19:25 +02:00
|
|
|
&rightids, &right_inners,
|
|
|
|
&child_postponed_quals);
|
2005-12-20 03:30:36 +01:00
|
|
|
*qualscope = bms_union(leftids, rightids);
|
2007-08-31 03:44:06 +02:00
|
|
|
*inner_join_rels = bms_union(left_inners, right_inners);
|
2003-03-03 00:46:34 +01:00
|
|
|
nonnullable_rels = leftids;
|
Compute correct em_nullable_relids in get_eclass_for_sort_expr().
Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr
must be able to compute valid em_nullable_relids for any new equivalence
class members it creates. I'd worried about this in the commit message
for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a
problem because multi-member ECs should already exist when it runs. That
is transparently wrong, though, because this function is also called by
initialize_mergeclause_eclasses, which runs during deconstruct_jointree.
The example given in the bug report (which the new regression test item
is based upon) fails because the COALESCE() expression is first seen by
initialize_mergeclause_eclasses rather than process_equivalence.
Fixing this requires passing the appropriate nullable_relids set to
get_eclass_for_sort_expr, and it requires new code to compute that set
for top-level expressions such as ORDER BY, GROUP BY, etc. We store
the top-level nullable_relids in a new field in PlannerInfo to avoid
computing it many times. In the back branches, I've added the new
field at the end of the struct to minimize ABI breakage for planner
plugins. There doesn't seem to be a good alternative to changing
get_eclass_for_sort_expr's API signature, though. There probably aren't
any third-party extensions calling that function directly; moreover,
if there are, they probably need to think about what to pass for
nullable_relids anyway.
Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
|
|
|
nullable_rels = rightids;
|
2000-09-12 23:07:18 +02:00
|
|
|
break;
|
2009-02-25 04:30:38 +01:00
|
|
|
case JOIN_SEMI:
|
|
|
|
leftjoinlist = deconstruct_recurse(root, j->larg,
|
|
|
|
below_outer_join,
|
2013-08-19 19:19:25 +02:00
|
|
|
&leftids, &left_inners,
|
|
|
|
&child_postponed_quals);
|
2009-02-25 04:30:38 +01:00
|
|
|
rightjoinlist = deconstruct_recurse(root, j->rarg,
|
|
|
|
below_outer_join,
|
2013-08-19 19:19:25 +02:00
|
|
|
&rightids, &right_inners,
|
|
|
|
&child_postponed_quals);
|
2009-02-25 04:30:38 +01:00
|
|
|
*qualscope = bms_union(leftids, rightids);
|
|
|
|
*inner_join_rels = bms_union(left_inners, right_inners);
|
|
|
|
/* Semi join adds no restrictions for quals */
|
|
|
|
nonnullable_rels = NULL;
|
Compute correct em_nullable_relids in get_eclass_for_sort_expr().
Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr
must be able to compute valid em_nullable_relids for any new equivalence
class members it creates. I'd worried about this in the commit message
for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a
problem because multi-member ECs should already exist when it runs. That
is transparently wrong, though, because this function is also called by
initialize_mergeclause_eclasses, which runs during deconstruct_jointree.
The example given in the bug report (which the new regression test item
is based upon) fails because the COALESCE() expression is first seen by
initialize_mergeclause_eclasses rather than process_equivalence.
Fixing this requires passing the appropriate nullable_relids set to
get_eclass_for_sort_expr, and it requires new code to compute that set
for top-level expressions such as ORDER BY, GROUP BY, etc. We store
the top-level nullable_relids in a new field in PlannerInfo to avoid
computing it many times. In the back branches, I've added the new
field at the end of the struct to minimize ABI breakage for planner
plugins. There doesn't seem to be a good alternative to changing
get_eclass_for_sort_expr's API signature, though. There probably aren't
any third-party extensions calling that function directly; moreover,
if there are, they probably need to think about what to pass for
nullable_relids anyway.
Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Theoretically, a semijoin would null the RHS; but since the
|
|
|
|
* RHS can't be accessed above the join, this is immaterial
|
|
|
|
* and we needn't account for it.
|
|
|
|
*/
|
|
|
|
nullable_rels = NULL;
|
2009-02-25 04:30:38 +01:00
|
|
|
break;
|
2000-09-12 23:07:18 +02:00
|
|
|
case JOIN_FULL:
|
2005-12-20 03:30:36 +01:00
|
|
|
leftjoinlist = deconstruct_recurse(root, j->larg,
|
|
|
|
true,
|
2013-08-19 19:19:25 +02:00
|
|
|
&leftids, &left_inners,
|
|
|
|
&child_postponed_quals);
|
2005-12-20 03:30:36 +01:00
|
|
|
rightjoinlist = deconstruct_recurse(root, j->rarg,
|
|
|
|
true,
|
2013-08-19 19:19:25 +02:00
|
|
|
&rightids, &right_inners,
|
|
|
|
&child_postponed_quals);
|
2005-12-20 03:30:36 +01:00
|
|
|
*qualscope = bms_union(leftids, rightids);
|
2007-08-31 03:44:06 +02:00
|
|
|
*inner_join_rels = bms_union(left_inners, right_inners);
|
2003-03-03 00:46:34 +01:00
|
|
|
/* each side is both outer and inner */
|
2005-12-20 03:30:36 +01:00
|
|
|
nonnullable_rels = *qualscope;
|
Compute correct em_nullable_relids in get_eclass_for_sort_expr().
Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr
must be able to compute valid em_nullable_relids for any new equivalence
class members it creates. I'd worried about this in the commit message
for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a
problem because multi-member ECs should already exist when it runs. That
is transparently wrong, though, because this function is also called by
initialize_mergeclause_eclasses, which runs during deconstruct_jointree.
The example given in the bug report (which the new regression test item
is based upon) fails because the COALESCE() expression is first seen by
initialize_mergeclause_eclasses rather than process_equivalence.
Fixing this requires passing the appropriate nullable_relids set to
get_eclass_for_sort_expr, and it requires new code to compute that set
for top-level expressions such as ORDER BY, GROUP BY, etc. We store
the top-level nullable_relids in a new field in PlannerInfo to avoid
computing it many times. In the back branches, I've added the new
field at the end of the struct to minimize ABI breakage for planner
plugins. There doesn't seem to be a good alternative to changing
get_eclass_for_sort_expr's API signature, though. There probably aren't
any third-party extensions calling that function directly; moreover,
if there are, they probably need to think about what to pass for
nullable_relids anyway.
Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
|
|
|
nullable_rels = *qualscope;
|
2000-09-12 23:07:18 +02:00
|
|
|
break;
|
|
|
|
default:
|
2008-08-14 20:48:00 +02:00
|
|
|
/* JOIN_RIGHT was eliminated during reduce_outer_joins() */
|
2003-07-25 02:01:09 +02:00
|
|
|
elog(ERROR, "unrecognized join type: %d",
|
2000-09-12 23:07:18 +02:00
|
|
|
(int) j->jointype);
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
nonnullable_rels = NULL; /* keep compiler quiet */
|
Compute correct em_nullable_relids in get_eclass_for_sort_expr().
Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr
must be able to compute valid em_nullable_relids for any new equivalence
class members it creates. I'd worried about this in the commit message
for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a
problem because multi-member ECs should already exist when it runs. That
is transparently wrong, though, because this function is also called by
initialize_mergeclause_eclasses, which runs during deconstruct_jointree.
The example given in the bug report (which the new regression test item
is based upon) fails because the COALESCE() expression is first seen by
initialize_mergeclause_eclasses rather than process_equivalence.
Fixing this requires passing the appropriate nullable_relids set to
get_eclass_for_sort_expr, and it requires new code to compute that set
for top-level expressions such as ORDER BY, GROUP BY, etc. We store
the top-level nullable_relids in a new field in PlannerInfo to avoid
computing it many times. In the back branches, I've added the new
field at the end of the struct to minimize ABI breakage for planner
plugins. There doesn't seem to be a good alternative to changing
get_eclass_for_sort_expr's API signature, though. There probably aren't
any third-party extensions calling that function directly; moreover,
if there are, they probably need to think about what to pass for
nullable_relids anyway.
Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
|
|
|
nullable_rels = NULL;
|
2005-12-20 03:30:36 +01:00
|
|
|
leftjoinlist = rightjoinlist = NIL;
|
2000-09-12 23:07:18 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
Compute correct em_nullable_relids in get_eclass_for_sort_expr().
Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr
must be able to compute valid em_nullable_relids for any new equivalence
class members it creates. I'd worried about this in the commit message
for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a
problem because multi-member ECs should already exist when it runs. That
is transparently wrong, though, because this function is also called by
initialize_mergeclause_eclasses, which runs during deconstruct_jointree.
The example given in the bug report (which the new regression test item
is based upon) fails because the COALESCE() expression is first seen by
initialize_mergeclause_eclasses rather than process_equivalence.
Fixing this requires passing the appropriate nullable_relids set to
get_eclass_for_sort_expr, and it requires new code to compute that set
for top-level expressions such as ORDER BY, GROUP BY, etc. We store
the top-level nullable_relids in a new field in PlannerInfo to avoid
computing it many times. In the back branches, I've added the new
field at the end of the struct to minimize ABI breakage for planner
plugins. There doesn't seem to be a good alternative to changing
get_eclass_for_sort_expr's API signature, though. There probably aren't
any third-party extensions calling that function directly; moreover,
if there are, they probably need to think about what to pass for
nullable_relids anyway.
Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
|
|
|
/* Report all rels that will be nulled anywhere in the jointree */
|
|
|
|
root->nullable_baserels = bms_add_members(root->nullable_baserels,
|
|
|
|
nullable_rels);
|
|
|
|
|
2014-01-30 20:51:16 +01:00
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Try to process any quals postponed by children. If they need
|
2014-01-30 20:51:16 +01:00
|
|
|
* further postponement, add them to my output postponed_qual_list.
|
|
|
|
* Quals that can be processed now must be included in my_quals, so
|
|
|
|
* that they'll be handled properly in make_outerjoininfo.
|
|
|
|
*/
|
|
|
|
my_quals = NIL;
|
|
|
|
foreach(l, child_postponed_quals)
|
|
|
|
{
|
|
|
|
PostponedQual *pq = (PostponedQual *) lfirst(l);
|
|
|
|
|
|
|
|
if (bms_is_subset(pq->relids, *qualscope))
|
|
|
|
my_quals = lappend(my_quals, pq->qual);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We should not be postponing any quals past an outer join.
|
|
|
|
* If this Assert fires, pull_up_subqueries() messed up.
|
|
|
|
*/
|
|
|
|
Assert(j->jointype == JOIN_INNER);
|
|
|
|
*postponed_qual_list = lappend(*postponed_qual_list, pq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* list_concat is nondestructive of its second argument */
|
|
|
|
my_quals = list_concat(my_quals, (List *) j->quals);
|
|
|
|
|
2005-12-20 03:30:36 +01:00
|
|
|
/*
|
2008-08-14 20:48:00 +02:00
|
|
|
* For an OJ, form the SpecialJoinInfo now, because we need the OJ's
|
2007-02-13 03:31:03 +01:00
|
|
|
* semantic scope (ojscope) to pass to distribute_qual_to_rels. But
|
2008-08-14 20:48:00 +02:00
|
|
|
* we mustn't add it to join_info_list just yet, because we don't want
|
2007-02-13 03:31:03 +01:00
|
|
|
* distribute_qual_to_rels to think it is an outer join below us.
|
2009-02-25 04:30:38 +01:00
|
|
|
*
|
2009-06-11 16:49:15 +02:00
|
|
|
* Semijoins are a bit of a hybrid: we build a SpecialJoinInfo, but we
|
|
|
|
* want ojscope = NULL for distribute_qual_to_rels.
|
2005-12-20 03:30:36 +01:00
|
|
|
*/
|
|
|
|
if (j->jointype != JOIN_INNER)
|
|
|
|
{
|
2008-08-14 20:48:00 +02:00
|
|
|
sjinfo = make_outerjoininfo(root,
|
2007-08-31 03:44:06 +02:00
|
|
|
leftids, rightids,
|
|
|
|
*inner_join_rels,
|
2008-08-14 20:48:00 +02:00
|
|
|
j->jointype,
|
2014-01-30 20:51:16 +01:00
|
|
|
my_quals);
|
2009-02-25 04:30:38 +01:00
|
|
|
if (j->jointype == JOIN_SEMI)
|
|
|
|
ojscope = NULL;
|
|
|
|
else
|
|
|
|
ojscope = bms_union(sjinfo->min_lefthand,
|
|
|
|
sjinfo->min_righthand);
|
2005-12-20 03:30:36 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2008-08-14 20:48:00 +02:00
|
|
|
sjinfo = NULL;
|
2005-12-20 03:30:36 +01:00
|
|
|
ojscope = NULL;
|
|
|
|
}
|
|
|
|
|
2013-08-19 19:19:25 +02:00
|
|
|
/* Process the JOIN's qual clauses */
|
2014-01-30 20:51:16 +01:00
|
|
|
foreach(l, my_quals)
|
2008-08-14 20:48:00 +02:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
Node *qual = (Node *) lfirst(l);
|
2008-08-14 20:48:00 +02:00
|
|
|
|
2009-02-25 04:30:38 +01:00
|
|
|
distribute_qual_to_rels(root, qual,
|
|
|
|
false, below_outer_join, j->jointype,
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
root->qual_security_level,
|
2009-02-25 04:30:38 +01:00
|
|
|
*qualscope,
|
2013-08-19 19:19:25 +02:00
|
|
|
ojscope, nonnullable_rels, NULL,
|
|
|
|
postponed_qual_list);
|
2008-08-14 20:48:00 +02:00
|
|
|
}
|
2005-12-20 03:30:36 +01:00
|
|
|
|
2008-08-14 20:48:00 +02:00
|
|
|
/* Now we can add the SpecialJoinInfo to join_info_list */
|
|
|
|
if (sjinfo)
|
2010-09-28 18:08:56 +02:00
|
|
|
{
|
2008-08-14 20:48:00 +02:00
|
|
|
root->join_info_list = lappend(root->join_info_list, sjinfo);
|
2010-09-28 18:08:56 +02:00
|
|
|
/* Each time we do that, recheck placeholder eval levels */
|
|
|
|
update_placeholder_eval_levels(root, sjinfo);
|
|
|
|
}
|
2003-03-03 00:46:34 +01:00
|
|
|
|
2005-12-20 03:30:36 +01:00
|
|
|
/*
|
|
|
|
* Finally, compute the output joinlist. We fold subproblems together
|
|
|
|
* except at a FULL JOIN or where join_collapse_limit would be
|
|
|
|
* exceeded.
|
|
|
|
*/
|
2007-01-08 17:47:30 +01:00
|
|
|
if (j->jointype == JOIN_FULL)
|
|
|
|
{
|
|
|
|
/* force the join order exactly at this node */
|
|
|
|
joinlist = list_make1(list_make2(leftjoinlist, rightjoinlist));
|
|
|
|
}
|
|
|
|
else if (list_length(leftjoinlist) + list_length(rightjoinlist) <=
|
|
|
|
join_collapse_limit)
|
|
|
|
{
|
|
|
|
/* OK to combine subproblems */
|
2005-12-20 03:30:36 +01:00
|
|
|
joinlist = list_concat(leftjoinlist, rightjoinlist);
|
2007-01-08 17:47:30 +01:00
|
|
|
}
|
2006-10-04 02:30:14 +02:00
|
|
|
else
|
2007-01-08 17:47:30 +01:00
|
|
|
{
|
|
|
|
/* can't combine, but needn't force join order above here */
|
2007-11-15 22:14:46 +01:00
|
|
|
Node *leftpart,
|
|
|
|
*rightpart;
|
2007-01-08 17:47:30 +01:00
|
|
|
|
|
|
|
/* avoid creating useless 1-element sublists */
|
|
|
|
if (list_length(leftjoinlist) == 1)
|
|
|
|
leftpart = (Node *) linitial(leftjoinlist);
|
|
|
|
else
|
|
|
|
leftpart = (Node *) leftjoinlist;
|
|
|
|
if (list_length(rightjoinlist) == 1)
|
|
|
|
rightpart = (Node *) linitial(rightjoinlist);
|
|
|
|
else
|
|
|
|
rightpart = (Node *) rightjoinlist;
|
|
|
|
joinlist = list_make2(leftpart, rightpart);
|
|
|
|
}
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
|
|
|
else
|
2005-12-20 03:30:36 +01:00
|
|
|
{
|
2003-07-25 02:01:09 +02:00
|
|
|
elog(ERROR, "unrecognized node type: %d",
|
|
|
|
(int) nodeTag(jtnode));
|
2005-12-20 03:30:36 +01:00
|
|
|
joinlist = NIL; /* keep compiler quiet */
|
|
|
|
}
|
|
|
|
return joinlist;
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
|
|
|
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
/*
|
|
|
|
* process_security_barrier_quals
|
|
|
|
* Transfer security-barrier quals into relation's baserestrictinfo list.
|
|
|
|
*
|
|
|
|
* The rewriter put any relevant security-barrier conditions into the RTE's
|
|
|
|
* securityQuals field, but it's now time to copy them into the rel's
|
|
|
|
* baserestrictinfo.
|
|
|
|
*
|
|
|
|
* In inheritance cases, we only consider quals attached to the parent rel
|
|
|
|
* here; they will be valid for all children too, so it's okay to consider
|
|
|
|
* them for purposes like equivalence class creation. Quals attached to
|
|
|
|
* individual child rels will be dealt with during path creation.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
process_security_barrier_quals(PlannerInfo *root,
|
|
|
|
int rti, Relids qualscope,
|
|
|
|
bool below_outer_join)
|
|
|
|
{
|
|
|
|
RangeTblEntry *rte = root->simple_rte_array[rti];
|
|
|
|
Index security_level = 0;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Each element of the securityQuals list has been preprocessed into an
|
|
|
|
* implicitly-ANDed list of clauses. All the clauses in a given sublist
|
|
|
|
* should get the same security level, but successive sublists get higher
|
|
|
|
* levels.
|
|
|
|
*/
|
|
|
|
foreach(lc, rte->securityQuals)
|
|
|
|
{
|
|
|
|
List *qualset = (List *) lfirst(lc);
|
|
|
|
ListCell *lc2;
|
|
|
|
|
|
|
|
foreach(lc2, qualset)
|
|
|
|
{
|
|
|
|
Node *qual = (Node *) lfirst(lc2);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We cheat to the extent of passing ojscope = qualscope rather
|
|
|
|
* than its more logical value of NULL. The only effect this has
|
|
|
|
* is to force a Var-free qual to be evaluated at the rel rather
|
|
|
|
* than being pushed up to top of tree, which we don't want.
|
|
|
|
*/
|
|
|
|
distribute_qual_to_rels(root, qual,
|
|
|
|
false,
|
|
|
|
below_outer_join,
|
|
|
|
JOIN_INNER,
|
|
|
|
security_level,
|
|
|
|
qualscope,
|
|
|
|
qualscope,
|
|
|
|
NULL,
|
|
|
|
NULL,
|
|
|
|
NULL);
|
|
|
|
}
|
|
|
|
security_level++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Assert that qual_security_level is higher than anything we just used */
|
|
|
|
Assert(security_level <= root->qual_security_level);
|
|
|
|
}
|
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
/*
|
2005-12-20 03:30:36 +01:00
|
|
|
* make_outerjoininfo
|
2008-08-14 20:48:00 +02:00
|
|
|
* Build a SpecialJoinInfo for the current outer join
|
2005-12-20 03:30:36 +01:00
|
|
|
*
|
|
|
|
* Inputs:
|
|
|
|
* left_rels: the base Relids syntactically on outer side of join
|
|
|
|
* right_rels: the base Relids syntactically on inner side of join
|
2007-08-31 03:44:06 +02:00
|
|
|
* inner_join_rels: base Relids participating in inner joins below this one
|
2008-08-14 20:48:00 +02:00
|
|
|
* jointype: what it says (must always be LEFT, FULL, SEMI, or ANTI)
|
|
|
|
* clause: the outer join's join condition (in implicit-AND format)
|
2005-12-20 03:30:36 +01:00
|
|
|
*
|
2008-08-14 20:48:00 +02:00
|
|
|
* The node should eventually be appended to root->join_info_list, but we
|
2005-12-20 03:30:36 +01:00
|
|
|
* do not do that here.
|
2007-02-13 03:31:03 +01:00
|
|
|
*
|
|
|
|
* Note: we assume that this function is invoked bottom-up, so that
|
2008-08-14 20:48:00 +02:00
|
|
|
* root->join_info_list already contains entries for all outer joins that are
|
2007-08-31 03:44:06 +02:00
|
|
|
* syntactically below this one.
|
2000-09-12 23:07:18 +02:00
|
|
|
*/
|
2008-08-14 20:48:00 +02:00
|
|
|
static SpecialJoinInfo *
|
2005-12-20 03:30:36 +01:00
|
|
|
make_outerjoininfo(PlannerInfo *root,
|
|
|
|
Relids left_rels, Relids right_rels,
|
2007-08-31 03:44:06 +02:00
|
|
|
Relids inner_join_rels,
|
2008-08-14 20:48:00 +02:00
|
|
|
JoinType jointype, List *clause)
|
2000-09-12 23:07:18 +02:00
|
|
|
{
|
2008-08-14 20:48:00 +02:00
|
|
|
SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
|
2005-12-20 03:30:36 +01:00
|
|
|
Relids clause_relids;
|
|
|
|
Relids strict_relids;
|
2007-08-31 03:44:06 +02:00
|
|
|
Relids min_lefthand;
|
|
|
|
Relids min_righthand;
|
2005-12-20 03:30:36 +01:00
|
|
|
ListCell *l;
|
|
|
|
|
2008-08-14 20:48:00 +02:00
|
|
|
/*
|
|
|
|
* We should not see RIGHT JOIN here because left/right were switched
|
|
|
|
* earlier
|
|
|
|
*/
|
|
|
|
Assert(jointype != JOIN_INNER);
|
|
|
|
Assert(jointype != JOIN_RIGHT);
|
|
|
|
|
2006-09-08 19:49:13 +02:00
|
|
|
/*
|
2013-05-29 22:58:43 +02:00
|
|
|
* Presently the executor cannot support FOR [KEY] UPDATE/SHARE marking of
|
|
|
|
* rels appearing on the nullable side of an outer join. (It's somewhat
|
|
|
|
* unclear what that would mean, anyway: what should we mark when a result
|
|
|
|
* row is generated from no element of the nullable relation?) So,
|
|
|
|
* complain if any nullable rel is FOR [KEY] UPDATE/SHARE.
|
2006-09-08 19:49:13 +02:00
|
|
|
*
|
|
|
|
* You might be wondering why this test isn't made far upstream in the
|
2014-05-06 18:12:18 +02:00
|
|
|
* parser. It's because the parser hasn't got enough info --- consider
|
2006-10-04 02:30:14 +02:00
|
|
|
* FOR UPDATE applied to a view. Only after rewriting and flattening do
|
|
|
|
* we know whether the view contains an outer join.
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
*
|
2010-02-26 03:01:40 +01:00
|
|
|
* We use the original RowMarkClause list here; the PlanRowMark list would
|
|
|
|
* list everything.
|
2006-09-08 19:49:13 +02:00
|
|
|
*/
|
|
|
|
foreach(l, root->parse->rowMarks)
|
|
|
|
{
|
|
|
|
RowMarkClause *rc = (RowMarkClause *) lfirst(l);
|
|
|
|
|
|
|
|
if (bms_is_member(rc->rti, right_rels) ||
|
2008-08-14 20:48:00 +02:00
|
|
|
(jointype == JOIN_FULL && bms_is_member(rc->rti, left_rels)))
|
2006-09-08 19:49:13 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2014-05-06 18:12:18 +02:00
|
|
|
/*------
|
|
|
|
translator: %s is a SQL row locking clause such as FOR UPDATE */
|
2013-07-23 20:03:09 +02:00
|
|
|
errmsg("%s cannot be applied to the nullable side of an outer join",
|
|
|
|
LCS_asString(rc->strength))));
|
2006-09-08 19:49:13 +02:00
|
|
|
}
|
|
|
|
|
2008-08-14 20:48:00 +02:00
|
|
|
sjinfo->syn_lefthand = left_rels;
|
|
|
|
sjinfo->syn_righthand = right_rels;
|
|
|
|
sjinfo->jointype = jointype;
|
2007-05-23 01:23:58 +02:00
|
|
|
/* this always starts out false */
|
2008-08-14 20:48:00 +02:00
|
|
|
sjinfo->delay_upper_joins = false;
|
Improve planner's cost estimation in the presence of semijoins.
If we have a semijoin, say
SELECT * FROM x WHERE x1 IN (SELECT y1 FROM y)
and we're estimating the cost of a parameterized indexscan on x, the number
of repetitions of the indexscan should not be taken as the size of y; it'll
really only be the number of distinct values of y1, because the only valid
plan with y on the outside of a nestloop would require y to be unique-ified
before joining it to x. Most of the time this doesn't make that much
difference, but sometimes it can lead to drastically underestimating the
cost of the indexscan and hence choosing a bad plan, as pointed out by
David Kubečka.
Fixing this is a bit difficult because parameterized indexscans are costed
out quite early in the planning process, before we have the information
that would be needed to call estimate_num_groups() and thereby estimate the
number of distinct values of the join column(s). However we can move the
code that extracts a semijoin RHS's unique-ification columns, so that it's
done in initsplan.c rather than on-the-fly in create_unique_path(). That
shouldn't make any difference speed-wise and it's really a bit cleaner too.
The other bit of information we need is the size of the semijoin RHS,
which is easy if it's a single relation (we make those estimates before
considering indexscan costs) but problematic if it's a join relation.
The solution adopted here is just to use the product of the sizes of the
join component rels. That will generally be an overestimate, but since
estimate_num_groups() only uses this input as a clamp, an overestimate
shouldn't hurt us too badly. In any case we don't allow this new logic
to produce a value larger than we would have chosen before, so that at
worst an overestimate leaves us no wiser than we were before.
2015-03-12 02:21:00 +01:00
|
|
|
|
|
|
|
compute_semijoin_info(sjinfo, clause);
|
2007-05-23 01:23:58 +02:00
|
|
|
|
2005-12-20 03:30:36 +01:00
|
|
|
/* If it's a full join, no need to be very smart */
|
2008-08-14 20:48:00 +02:00
|
|
|
if (jointype == JOIN_FULL)
|
2005-12-20 03:30:36 +01:00
|
|
|
{
|
2008-08-14 20:48:00 +02:00
|
|
|
sjinfo->min_lefthand = bms_copy(left_rels);
|
|
|
|
sjinfo->min_righthand = bms_copy(right_rels);
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
sjinfo->lhs_strict = false; /* don't care about this */
|
2008-08-14 20:48:00 +02:00
|
|
|
return sjinfo;
|
2005-12-20 03:30:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Retrieve all relids mentioned within the join clause.
|
|
|
|
*/
|
2008-08-14 20:48:00 +02:00
|
|
|
clause_relids = pull_varnos((Node *) clause);
|
2005-12-20 03:30:36 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* For which relids is the clause strict, ie, it cannot succeed if the
|
|
|
|
* rel's columns are all NULL?
|
|
|
|
*/
|
2008-08-14 20:48:00 +02:00
|
|
|
strict_relids = find_nonnullable_rels((Node *) clause);
|
2000-09-12 23:07:18 +02:00
|
|
|
|
2005-12-20 03:30:36 +01:00
|
|
|
/* Remember whether the clause is strict for any LHS relations */
|
2008-08-14 20:48:00 +02:00
|
|
|
sjinfo->lhs_strict = bms_overlap(strict_relids, left_rels);
|
2005-12-20 03:30:36 +01:00
|
|
|
|
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Required LHS always includes the LHS rels mentioned in the clause. We
|
|
|
|
* may have to add more rels based on lower outer joins; see below.
|
2005-12-20 03:30:36 +01:00
|
|
|
*/
|
2007-08-31 03:44:06 +02:00
|
|
|
min_lefthand = bms_intersect(clause_relids, left_rels);
|
2005-12-20 03:30:36 +01:00
|
|
|
|
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Similarly for required RHS. But here, we must also include any lower
|
2007-08-31 03:44:06 +02:00
|
|
|
* inner joins, to ensure we don't try to commute with any of them.
|
2005-12-20 03:30:36 +01:00
|
|
|
*/
|
2007-08-31 03:44:06 +02:00
|
|
|
min_righthand = bms_int_members(bms_union(clause_relids, inner_join_rels),
|
|
|
|
right_rels);
|
2005-12-20 03:30:36 +01:00
|
|
|
|
2015-08-02 02:57:41 +02:00
|
|
|
/*
|
|
|
|
* Now check previous outer joins for ordering restrictions.
|
|
|
|
*/
|
2008-08-14 20:48:00 +02:00
|
|
|
foreach(l, root->join_info_list)
|
2000-09-12 23:07:18 +02:00
|
|
|
{
|
2008-08-14 20:48:00 +02:00
|
|
|
SpecialJoinInfo *otherinfo = (SpecialJoinInfo *) lfirst(l);
|
2005-12-20 03:30:36 +01:00
|
|
|
|
Fix planner failure with full join in RHS of left join.
Given a left join containing a full join in its righthand side, with
the left join's joinclause referencing only one side of the full join
(in a non-strict fashion, so that the full join doesn't get simplified),
the planner could fail with "failed to build any N-way joins" or related
errors. This happened because the full join was seen as overlapping the
left join's RHS, and then recent changes within join_is_legal() caused
that function to conclude that the full join couldn't validly be formed.
Rather than try to rejigger join_is_legal() yet more to allow this,
I think it's better to fix initsplan.c so that the required join order
is explicit in the SpecialJoinInfo data structure. The previous coding
there essentially ignored full joins, relying on the fact that we don't
flatten them in the joinlist data structure to preserve their ordering.
That's sufficient to prevent a wrong plan from being formed, but as this
example shows, it's not sufficient to ensure that the right plan will
be formed. We need to work a bit harder to ensure that the right plan
looks sane according to the SpecialJoinInfos.
Per bug #14105 from Vojtech Rylko. This was apparently induced by
commit 8703059c6 (though now that I've seen it, I wonder whether there
are related cases that could have failed before that); so back-patch
to all active branches. Unfortunately, that patch also went into 9.0,
so this bug is a regression that won't be fixed in that branch.
2016-04-22 02:05:58 +02:00
|
|
|
/*
|
|
|
|
* A full join is an optimization barrier: we can't associate into or
|
|
|
|
* out of it. Hence, if it overlaps either LHS or RHS of the current
|
|
|
|
* rel, expand that side's min relset to cover the whole full join.
|
|
|
|
*/
|
2008-08-14 20:48:00 +02:00
|
|
|
if (otherinfo->jointype == JOIN_FULL)
|
Fix planner failure with full join in RHS of left join.
Given a left join containing a full join in its righthand side, with
the left join's joinclause referencing only one side of the full join
(in a non-strict fashion, so that the full join doesn't get simplified),
the planner could fail with "failed to build any N-way joins" or related
errors. This happened because the full join was seen as overlapping the
left join's RHS, and then recent changes within join_is_legal() caused
that function to conclude that the full join couldn't validly be formed.
Rather than try to rejigger join_is_legal() yet more to allow this,
I think it's better to fix initsplan.c so that the required join order
is explicit in the SpecialJoinInfo data structure. The previous coding
there essentially ignored full joins, relying on the fact that we don't
flatten them in the joinlist data structure to preserve their ordering.
That's sufficient to prevent a wrong plan from being formed, but as this
example shows, it's not sufficient to ensure that the right plan will
be formed. We need to work a bit harder to ensure that the right plan
looks sane according to the SpecialJoinInfos.
Per bug #14105 from Vojtech Rylko. This was apparently induced by
commit 8703059c6 (though now that I've seen it, I wonder whether there
are related cases that could have failed before that); so back-patch
to all active branches. Unfortunately, that patch also went into 9.0,
so this bug is a regression that won't be fixed in that branch.
2016-04-22 02:05:58 +02:00
|
|
|
{
|
|
|
|
if (bms_overlap(left_rels, otherinfo->syn_lefthand) ||
|
|
|
|
bms_overlap(left_rels, otherinfo->syn_righthand))
|
|
|
|
{
|
|
|
|
min_lefthand = bms_add_members(min_lefthand,
|
|
|
|
otherinfo->syn_lefthand);
|
|
|
|
min_lefthand = bms_add_members(min_lefthand,
|
|
|
|
otherinfo->syn_righthand);
|
|
|
|
}
|
|
|
|
if (bms_overlap(right_rels, otherinfo->syn_lefthand) ||
|
|
|
|
bms_overlap(right_rels, otherinfo->syn_righthand))
|
|
|
|
{
|
|
|
|
min_righthand = bms_add_members(min_righthand,
|
|
|
|
otherinfo->syn_lefthand);
|
|
|
|
min_righthand = bms_add_members(min_righthand,
|
|
|
|
otherinfo->syn_righthand);
|
|
|
|
}
|
|
|
|
/* Needn't do anything else with the full join */
|
2005-12-20 03:30:36 +01:00
|
|
|
continue;
|
Fix planner failure with full join in RHS of left join.
Given a left join containing a full join in its righthand side, with
the left join's joinclause referencing only one side of the full join
(in a non-strict fashion, so that the full join doesn't get simplified),
the planner could fail with "failed to build any N-way joins" or related
errors. This happened because the full join was seen as overlapping the
left join's RHS, and then recent changes within join_is_legal() caused
that function to conclude that the full join couldn't validly be formed.
Rather than try to rejigger join_is_legal() yet more to allow this,
I think it's better to fix initsplan.c so that the required join order
is explicit in the SpecialJoinInfo data structure. The previous coding
there essentially ignored full joins, relying on the fact that we don't
flatten them in the joinlist data structure to preserve their ordering.
That's sufficient to prevent a wrong plan from being formed, but as this
example shows, it's not sufficient to ensure that the right plan will
be formed. We need to work a bit harder to ensure that the right plan
looks sane according to the SpecialJoinInfos.
Per bug #14105 from Vojtech Rylko. This was apparently induced by
commit 8703059c6 (though now that I've seen it, I wonder whether there
are related cases that could have failed before that); so back-patch
to all active branches. Unfortunately, that patch also went into 9.0,
so this bug is a regression that won't be fixed in that branch.
2016-04-22 02:05:58 +02:00
|
|
|
}
|
2000-09-12 23:07:18 +02:00
|
|
|
|
|
|
|
/*
|
2005-12-20 03:30:36 +01:00
|
|
|
* For a lower OJ in our LHS, if our join condition uses the lower
|
|
|
|
* join's RHS and is not strict for that rel, we must preserve the
|
2007-08-31 03:44:06 +02:00
|
|
|
* ordering of the two OJs, so add lower OJ's full syntactic relset to
|
2007-11-15 22:14:46 +01:00
|
|
|
* min_lefthand. (We must use its full syntactic relset, not just its
|
|
|
|
* min_lefthand + min_righthand. This is because there might be other
|
|
|
|
* OJs below this one that this one can commute with, but we cannot
|
2009-06-11 16:49:15 +02:00
|
|
|
* commute with them if we don't with this one.) Also, if the current
|
2009-07-21 04:02:44 +02:00
|
|
|
* join is a semijoin or antijoin, we must preserve ordering
|
|
|
|
* regardless of strictness.
|
2007-08-31 03:44:06 +02:00
|
|
|
*
|
|
|
|
* Note: I believe we have to insist on being strict for at least one
|
|
|
|
* rel in the lower OJ's min_righthand, not its whole syn_righthand.
|
2000-09-12 23:07:18 +02:00
|
|
|
*/
|
2009-02-27 23:41:38 +01:00
|
|
|
if (bms_overlap(left_rels, otherinfo->syn_righthand))
|
2005-12-20 03:30:36 +01:00
|
|
|
{
|
2009-02-27 23:41:38 +01:00
|
|
|
if (bms_overlap(clause_relids, otherinfo->syn_righthand) &&
|
2009-07-21 04:02:44 +02:00
|
|
|
(jointype == JOIN_SEMI || jointype == JOIN_ANTI ||
|
2009-02-27 23:41:38 +01:00
|
|
|
!bms_overlap(strict_relids, otherinfo->min_righthand)))
|
|
|
|
{
|
|
|
|
min_lefthand = bms_add_members(min_lefthand,
|
|
|
|
otherinfo->syn_lefthand);
|
|
|
|
min_lefthand = bms_add_members(min_lefthand,
|
|
|
|
otherinfo->syn_righthand);
|
|
|
|
}
|
2005-12-20 03:30:36 +01:00
|
|
|
}
|
2006-10-04 02:30:14 +02:00
|
|
|
|
2001-05-14 22:25:00 +02:00
|
|
|
/*
|
2005-12-20 03:30:36 +01:00
|
|
|
* For a lower OJ in our RHS, if our join condition does not use the
|
|
|
|
* lower join's RHS and the lower OJ's join condition is strict, we
|
2007-11-15 22:14:46 +01:00
|
|
|
* can interchange the ordering of the two OJs; otherwise we must add
|
2015-08-06 21:35:27 +02:00
|
|
|
* the lower OJ's full syntactic relset to min_righthand.
|
|
|
|
*
|
|
|
|
* Also, if our join condition does not use the lower join's LHS
|
|
|
|
* either, force the ordering to be preserved. Otherwise we can end
|
|
|
|
* up with SpecialJoinInfos with identical min_righthands, which can
|
|
|
|
* confuse join_is_legal (see discussion in backend/optimizer/README).
|
|
|
|
*
|
|
|
|
* Also, we must preserve ordering anyway if either the current join
|
|
|
|
* or the lower OJ is either a semijoin or an antijoin.
|
2007-05-23 01:23:58 +02:00
|
|
|
*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Here, we have to consider that "our join condition" includes any
|
|
|
|
* clauses that syntactically appeared above the lower OJ and below
|
|
|
|
* ours; those are equivalent to degenerate clauses in our OJ and must
|
2014-05-06 18:12:18 +02:00
|
|
|
* be treated as such. Such clauses obviously can't reference our
|
2007-11-15 22:14:46 +01:00
|
|
|
* LHS, and they must be non-strict for the lower OJ's RHS (else
|
|
|
|
* reduce_outer_joins would have reduced the lower OJ to a plain
|
|
|
|
* join). Hence the other ways in which we handle clauses within our
|
|
|
|
* join condition are not affected by them. The net effect is
|
|
|
|
* therefore sufficiently represented by the delay_upper_joins flag
|
|
|
|
* saved for us by check_outerjoin_delay.
|
2001-05-14 22:25:00 +02:00
|
|
|
*/
|
2007-08-31 03:44:06 +02:00
|
|
|
if (bms_overlap(right_rels, otherinfo->syn_righthand))
|
2001-05-14 22:25:00 +02:00
|
|
|
{
|
2007-08-31 03:44:06 +02:00
|
|
|
if (bms_overlap(clause_relids, otherinfo->syn_righthand) ||
|
2015-08-06 21:35:27 +02:00
|
|
|
!bms_overlap(clause_relids, otherinfo->min_lefthand) ||
|
2009-05-07 22:13:09 +02:00
|
|
|
jointype == JOIN_SEMI ||
|
2015-04-25 22:44:27 +02:00
|
|
|
jointype == JOIN_ANTI ||
|
2009-07-21 04:02:44 +02:00
|
|
|
otherinfo->jointype == JOIN_SEMI ||
|
2009-02-27 23:41:38 +01:00
|
|
|
otherinfo->jointype == JOIN_ANTI ||
|
2007-08-31 03:44:06 +02:00
|
|
|
!otherinfo->lhs_strict || otherinfo->delay_upper_joins)
|
|
|
|
{
|
|
|
|
min_righthand = bms_add_members(min_righthand,
|
|
|
|
otherinfo->syn_lefthand);
|
|
|
|
min_righthand = bms_add_members(min_righthand,
|
|
|
|
otherinfo->syn_righthand);
|
|
|
|
}
|
2001-05-14 22:25:00 +02:00
|
|
|
}
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
2005-12-20 03:30:36 +01:00
|
|
|
|
2010-09-28 18:08:56 +02:00
|
|
|
/*
|
|
|
|
* Examine PlaceHolderVars. If a PHV is supposed to be evaluated within
|
2013-08-15 00:38:32 +02:00
|
|
|
* this join's nullable side, then ensure that min_righthand contains the
|
|
|
|
* full eval_at set of the PHV. This ensures that the PHV actually can be
|
|
|
|
* evaluated within the RHS. Note that this works only because we should
|
|
|
|
* already have determined the final eval_at level for any PHV
|
|
|
|
* syntactically within this join.
|
2010-09-28 18:08:56 +02:00
|
|
|
*/
|
|
|
|
foreach(l, root->placeholder_list)
|
|
|
|
{
|
|
|
|
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
|
|
|
|
Relids ph_syn_level = phinfo->ph_var->phrels;
|
|
|
|
|
|
|
|
/* Ignore placeholder if it didn't syntactically come from RHS */
|
|
|
|
if (!bms_is_subset(ph_syn_level, right_rels))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Else, prevent join from being formed before we eval the PHV */
|
|
|
|
min_righthand = bms_add_members(min_righthand, phinfo->ph_eval_at);
|
|
|
|
}
|
|
|
|
|
2007-08-31 03:44:06 +02:00
|
|
|
/*
|
|
|
|
* If we found nothing to put in min_lefthand, punt and make it the full
|
|
|
|
* LHS, to avoid having an empty min_lefthand which will confuse later
|
|
|
|
* processing. (We don't try to be smart about such cases, just correct.)
|
2015-08-06 21:35:27 +02:00
|
|
|
* Likewise for min_righthand.
|
2007-08-31 03:44:06 +02:00
|
|
|
*/
|
|
|
|
if (bms_is_empty(min_lefthand))
|
|
|
|
min_lefthand = bms_copy(left_rels);
|
2015-08-06 21:35:27 +02:00
|
|
|
if (bms_is_empty(min_righthand))
|
|
|
|
min_righthand = bms_copy(right_rels);
|
2007-08-31 03:44:06 +02:00
|
|
|
|
|
|
|
/* Now they'd better be nonempty */
|
|
|
|
Assert(!bms_is_empty(min_lefthand));
|
|
|
|
Assert(!bms_is_empty(min_righthand));
|
2005-12-20 03:30:36 +01:00
|
|
|
/* Shouldn't overlap either */
|
2007-08-31 03:44:06 +02:00
|
|
|
Assert(!bms_overlap(min_lefthand, min_righthand));
|
|
|
|
|
2008-08-14 20:48:00 +02:00
|
|
|
sjinfo->min_lefthand = min_lefthand;
|
|
|
|
sjinfo->min_righthand = min_righthand;
|
2005-12-20 03:30:36 +01:00
|
|
|
|
2008-08-14 20:48:00 +02:00
|
|
|
return sjinfo;
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
|
|
|
|
Improve planner's cost estimation in the presence of semijoins.
If we have a semijoin, say
SELECT * FROM x WHERE x1 IN (SELECT y1 FROM y)
and we're estimating the cost of a parameterized indexscan on x, the number
of repetitions of the indexscan should not be taken as the size of y; it'll
really only be the number of distinct values of y1, because the only valid
plan with y on the outside of a nestloop would require y to be unique-ified
before joining it to x. Most of the time this doesn't make that much
difference, but sometimes it can lead to drastically underestimating the
cost of the indexscan and hence choosing a bad plan, as pointed out by
David Kubečka.
Fixing this is a bit difficult because parameterized indexscans are costed
out quite early in the planning process, before we have the information
that would be needed to call estimate_num_groups() and thereby estimate the
number of distinct values of the join column(s). However we can move the
code that extracts a semijoin RHS's unique-ification columns, so that it's
done in initsplan.c rather than on-the-fly in create_unique_path(). That
shouldn't make any difference speed-wise and it's really a bit cleaner too.
The other bit of information we need is the size of the semijoin RHS,
which is easy if it's a single relation (we make those estimates before
considering indexscan costs) but problematic if it's a join relation.
The solution adopted here is just to use the product of the sizes of the
join component rels. That will generally be an overestimate, but since
estimate_num_groups() only uses this input as a clamp, an overestimate
shouldn't hurt us too badly. In any case we don't allow this new logic
to produce a value larger than we would have chosen before, so that at
worst an overestimate leaves us no wiser than we were before.
2015-03-12 02:21:00 +01:00
|
|
|
/*
|
|
|
|
* compute_semijoin_info
|
|
|
|
* Fill semijoin-related fields of a new SpecialJoinInfo
|
|
|
|
*
|
|
|
|
* Note: this relies on only the jointype and syn_righthand fields of the
|
|
|
|
* SpecialJoinInfo; the rest may not be set yet.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause)
|
|
|
|
{
|
|
|
|
List *semi_operators;
|
|
|
|
List *semi_rhs_exprs;
|
|
|
|
bool all_btree;
|
|
|
|
bool all_hash;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/* Initialize semijoin-related fields in case we can't unique-ify */
|
|
|
|
sjinfo->semi_can_btree = false;
|
|
|
|
sjinfo->semi_can_hash = false;
|
|
|
|
sjinfo->semi_operators = NIL;
|
|
|
|
sjinfo->semi_rhs_exprs = NIL;
|
|
|
|
|
|
|
|
/* Nothing more to do if it's not a semijoin */
|
|
|
|
if (sjinfo->jointype != JOIN_SEMI)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Look to see whether the semijoin's join quals consist of AND'ed
|
|
|
|
* equality operators, with (only) RHS variables on only one side of each
|
|
|
|
* one. If so, we can figure out how to enforce uniqueness for the RHS.
|
|
|
|
*
|
|
|
|
* Note that the input clause list is the list of quals that are
|
|
|
|
* *syntactically* associated with the semijoin, which in practice means
|
|
|
|
* the synthesized comparison list for an IN or the WHERE of an EXISTS.
|
|
|
|
* Particularly in the latter case, it might contain clauses that aren't
|
|
|
|
* *semantically* associated with the join, but refer to just one side or
|
|
|
|
* the other. We can ignore such clauses here, as they will just drop
|
|
|
|
* down to be processed within one side or the other. (It is okay to
|
|
|
|
* consider only the syntactically-associated clauses here because for a
|
|
|
|
* semijoin, no higher-level quals could refer to the RHS, and so there
|
|
|
|
* can be no other quals that are semantically associated with this join.
|
|
|
|
* We do things this way because it is useful to have the set of potential
|
|
|
|
* unique-ification expressions before we can extract the list of quals
|
|
|
|
* that are actually semantically associated with the particular join.)
|
|
|
|
*
|
|
|
|
* Note that the semi_operators list consists of the joinqual operators
|
|
|
|
* themselves (but commuted if needed to put the RHS value on the right).
|
|
|
|
* These could be cross-type operators, in which case the operator
|
|
|
|
* actually needed for uniqueness is a related single-type operator. We
|
|
|
|
* assume here that that operator will be available from the btree or hash
|
|
|
|
* opclass when the time comes ... if not, create_unique_plan() will fail.
|
|
|
|
*/
|
|
|
|
semi_operators = NIL;
|
|
|
|
semi_rhs_exprs = NIL;
|
|
|
|
all_btree = true;
|
|
|
|
all_hash = enable_hashagg; /* don't consider hash if not enabled */
|
|
|
|
foreach(lc, clause)
|
|
|
|
{
|
|
|
|
OpExpr *op = (OpExpr *) lfirst(lc);
|
|
|
|
Oid opno;
|
|
|
|
Node *left_expr;
|
|
|
|
Node *right_expr;
|
|
|
|
Relids left_varnos;
|
|
|
|
Relids right_varnos;
|
|
|
|
Relids all_varnos;
|
|
|
|
Oid opinputtype;
|
|
|
|
|
|
|
|
/* Is it a binary opclause? */
|
|
|
|
if (!IsA(op, OpExpr) ||
|
|
|
|
list_length(op->args) != 2)
|
|
|
|
{
|
|
|
|
/* No, but does it reference both sides? */
|
|
|
|
all_varnos = pull_varnos((Node *) op);
|
|
|
|
if (!bms_overlap(all_varnos, sjinfo->syn_righthand) ||
|
|
|
|
bms_is_subset(all_varnos, sjinfo->syn_righthand))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Clause refers to only one rel, so ignore it --- unless it
|
|
|
|
* contains volatile functions, in which case we'd better
|
|
|
|
* punt.
|
|
|
|
*/
|
|
|
|
if (contain_volatile_functions((Node *) op))
|
|
|
|
return;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/* Non-operator clause referencing both sides, must punt */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Extract data from binary opclause */
|
|
|
|
opno = op->opno;
|
|
|
|
left_expr = linitial(op->args);
|
|
|
|
right_expr = lsecond(op->args);
|
|
|
|
left_varnos = pull_varnos(left_expr);
|
|
|
|
right_varnos = pull_varnos(right_expr);
|
|
|
|
all_varnos = bms_union(left_varnos, right_varnos);
|
|
|
|
opinputtype = exprType(left_expr);
|
|
|
|
|
|
|
|
/* Does it reference both sides? */
|
|
|
|
if (!bms_overlap(all_varnos, sjinfo->syn_righthand) ||
|
|
|
|
bms_is_subset(all_varnos, sjinfo->syn_righthand))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Clause refers to only one rel, so ignore it --- unless it
|
|
|
|
* contains volatile functions, in which case we'd better punt.
|
|
|
|
*/
|
|
|
|
if (contain_volatile_functions((Node *) op))
|
|
|
|
return;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check rel membership of arguments */
|
|
|
|
if (!bms_is_empty(right_varnos) &&
|
|
|
|
bms_is_subset(right_varnos, sjinfo->syn_righthand) &&
|
|
|
|
!bms_overlap(left_varnos, sjinfo->syn_righthand))
|
|
|
|
{
|
|
|
|
/* typical case, right_expr is RHS variable */
|
|
|
|
}
|
|
|
|
else if (!bms_is_empty(left_varnos) &&
|
|
|
|
bms_is_subset(left_varnos, sjinfo->syn_righthand) &&
|
|
|
|
!bms_overlap(right_varnos, sjinfo->syn_righthand))
|
|
|
|
{
|
|
|
|
/* flipped case, left_expr is RHS variable */
|
|
|
|
opno = get_commutator(opno);
|
|
|
|
if (!OidIsValid(opno))
|
|
|
|
return;
|
|
|
|
right_expr = left_expr;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* mixed membership of args, punt */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* all operators must be btree equality or hash equality */
|
|
|
|
if (all_btree)
|
|
|
|
{
|
|
|
|
/* oprcanmerge is considered a hint... */
|
|
|
|
if (!op_mergejoinable(opno, opinputtype) ||
|
|
|
|
get_mergejoin_opfamilies(opno) == NIL)
|
|
|
|
all_btree = false;
|
|
|
|
}
|
|
|
|
if (all_hash)
|
|
|
|
{
|
|
|
|
/* ... but oprcanhash had better be correct */
|
|
|
|
if (!op_hashjoinable(opno, opinputtype))
|
|
|
|
all_hash = false;
|
|
|
|
}
|
|
|
|
if (!(all_btree || all_hash))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* so far so good, keep building lists */
|
|
|
|
semi_operators = lappend_oid(semi_operators, opno);
|
|
|
|
semi_rhs_exprs = lappend(semi_rhs_exprs, copyObject(right_expr));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Punt if we didn't find at least one column to unique-ify */
|
|
|
|
if (semi_rhs_exprs == NIL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The expressions we'd need to unique-ify mustn't be volatile.
|
|
|
|
*/
|
|
|
|
if (contain_volatile_functions((Node *) semi_rhs_exprs))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we get here, we can unique-ify the semijoin's RHS using at least one
|
|
|
|
* of sorting and hashing. Save the information about how to do that.
|
|
|
|
*/
|
|
|
|
sjinfo->semi_can_btree = all_btree;
|
|
|
|
sjinfo->semi_can_hash = all_hash;
|
|
|
|
sjinfo->semi_operators = semi_operators;
|
|
|
|
sjinfo->semi_rhs_exprs = semi_rhs_exprs;
|
|
|
|
}
|
|
|
|
|
2005-12-20 03:30:36 +01:00
|
|
|
|
|
|
|
/*****************************************************************************
|
|
|
|
*
|
|
|
|
* QUALIFICATIONS
|
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
2000-09-29 20:21:41 +02:00
|
|
|
* distribute_qual_to_rels
|
2005-06-09 06:19:00 +02:00
|
|
|
* Add clause information to either the baserestrictinfo or joininfo list
|
1999-08-16 04:17:58 +02:00
|
|
|
* (depending on whether the clause is a join) of each base relation
|
2014-05-06 18:12:18 +02:00
|
|
|
* mentioned in the clause. A RestrictInfo node is created and added to
|
2007-01-20 21:45:41 +01:00
|
|
|
* the appropriate list for each rel. Alternatively, if the clause uses a
|
2003-03-03 00:46:34 +01:00
|
|
|
* mergejoinable operator and is not delayed by outer-join rules, enter
|
2007-01-20 21:45:41 +01:00
|
|
|
* the left- and right-side expressions into the query's list of
|
2013-08-19 19:19:25 +02:00
|
|
|
* EquivalenceClasses. Alternatively, if the clause needs to be treated
|
|
|
|
* as belonging to a higher join level, just add it to postponed_qual_list.
|
2000-09-12 23:07:18 +02:00
|
|
|
*
|
2000-09-29 20:21:41 +02:00
|
|
|
* 'clause': the qual clause to be distributed
|
2017-08-16 06:22:32 +02:00
|
|
|
* 'is_deduced': true if the qual came from implied-equality deduction
|
|
|
|
* 'below_outer_join': true if the qual is from a JOIN/ON that is below the
|
2007-01-20 21:45:41 +01:00
|
|
|
* nullable side of a higher-level outer join
|
2008-10-25 21:51:32 +02:00
|
|
|
* 'jointype': type of join the qual is from (JOIN_INNER for a WHERE clause)
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
* 'security_level': security_level to assign to the qual
|
2005-12-20 03:30:36 +01:00
|
|
|
* 'qualscope': set of baserels the qual's syntactic scope covers
|
|
|
|
* 'ojscope': NULL if not an outer-join qual, else the minimum set of baserels
|
|
|
|
* needed to form this join
|
2003-03-03 00:46:34 +01:00
|
|
|
* 'outerjoin_nonnullable': NULL if not an outer-join qual, else the set of
|
|
|
|
* baserels appearing on the outer (nonnullable) side of the join
|
2005-12-20 03:30:36 +01:00
|
|
|
* (for FULL JOIN this includes both sides of the join, and must in fact
|
|
|
|
* equal qualscope)
|
2017-08-16 06:22:32 +02:00
|
|
|
* 'deduced_nullable_relids': if is_deduced is true, the nullable relids to
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
* impute to the clause; otherwise NULL
|
2013-08-19 19:19:25 +02:00
|
|
|
* 'postponed_qual_list': list of PostponedQual structs, which we can add
|
|
|
|
* this qual to if it turns out to belong to a higher join level.
|
|
|
|
* Can be NULL if caller knows postponement is impossible.
|
2000-09-29 20:21:41 +02:00
|
|
|
*
|
2005-12-20 03:30:36 +01:00
|
|
|
* 'qualscope' identifies what level of JOIN the qual came from syntactically.
|
|
|
|
* 'ojscope' is needed if we decide to force the qual up to the outer-join
|
|
|
|
* level, which will be ojscope not necessarily qualscope.
|
2009-02-20 01:01:03 +01:00
|
|
|
*
|
2017-08-16 06:22:32 +02:00
|
|
|
* In normal use (when is_deduced is false), at the time this is called,
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
* root->join_info_list must contain entries for all and only those special
|
2017-08-16 06:22:32 +02:00
|
|
|
* joins that are syntactically below this qual. But when is_deduced is true,
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
* we are adding new deduced clauses after completion of deconstruct_jointree,
|
|
|
|
* so it cannot be assumed that root->join_info_list has anything to do with
|
|
|
|
* qual placement.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
static void
|
2005-06-06 00:32:58 +02:00
|
|
|
distribute_qual_to_rels(PlannerInfo *root, Node *clause,
|
2005-09-28 23:17:02 +02:00
|
|
|
bool is_deduced,
|
|
|
|
bool below_outer_join,
|
2008-10-25 21:51:32 +02:00
|
|
|
JoinType jointype,
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
Index security_level,
|
2005-12-20 03:30:36 +01:00
|
|
|
Relids qualscope,
|
|
|
|
Relids ojscope,
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
Relids outerjoin_nonnullable,
|
2013-08-19 19:19:25 +02:00
|
|
|
Relids deduced_nullable_relids,
|
|
|
|
List **postponed_qual_list)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-02-18 01:49:48 +01:00
|
|
|
Relids relids;
|
2007-02-16 21:57:19 +01:00
|
|
|
bool is_pushed_down;
|
2005-11-15 00:54:23 +01:00
|
|
|
bool outerjoin_delayed;
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
bool pseudoconstant = false;
|
2007-01-20 21:45:41 +01:00
|
|
|
bool maybe_equivalence;
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
bool maybe_outer_join;
|
2009-04-16 22:42:16 +02:00
|
|
|
Relids nullable_relids;
|
2004-01-04 01:07:32 +01:00
|
|
|
RestrictInfo *restrictinfo;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1999-07-25 01:21:14 +02:00
|
|
|
/*
|
2004-01-04 04:51:52 +01:00
|
|
|
* Retrieve all relids mentioned within the clause.
|
1999-07-25 01:21:14 +02:00
|
|
|
*/
|
2004-01-04 04:51:52 +01:00
|
|
|
relids = pull_varnos(clause);
|
1999-07-25 01:21:14 +02:00
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
/*
|
2013-08-19 19:19:25 +02:00
|
|
|
* In ordinary SQL, a WHERE or JOIN/ON clause can't reference any rels
|
|
|
|
* that aren't within its syntactic scope; however, if we pulled up a
|
|
|
|
* LATERAL subquery then we might find such references in quals that have
|
2014-05-06 18:12:18 +02:00
|
|
|
* been pulled up. We need to treat such quals as belonging to the join
|
2013-08-19 19:19:25 +02:00
|
|
|
* level that includes every rel they reference. Although we could make
|
|
|
|
* pull_up_subqueries() place such quals correctly to begin with, it's
|
|
|
|
* easier to handle it here. When we find a clause that contains Vars
|
|
|
|
* outside its syntactic scope, we add it to the postponed-quals list, and
|
|
|
|
* process it once we've recursed back up to the appropriate join level.
|
|
|
|
*/
|
|
|
|
if (!bms_is_subset(relids, qualscope))
|
|
|
|
{
|
|
|
|
PostponedQual *pq = (PostponedQual *) palloc(sizeof(PostponedQual));
|
|
|
|
|
|
|
|
Assert(root->hasLateralRTEs); /* shouldn't happen otherwise */
|
|
|
|
Assert(jointype == JOIN_INNER); /* mustn't postpone past outer join */
|
|
|
|
Assert(!is_deduced); /* shouldn't be deduced, either */
|
|
|
|
pq->qual = clause;
|
|
|
|
pq->relids = relids;
|
|
|
|
*postponed_qual_list = lappend(*postponed_qual_list, pq);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If it's an outer-join clause, also check that relids is a subset of
|
|
|
|
* ojscope. (This should not fail if the syntactic scope check passed.)
|
2000-09-12 23:07:18 +02:00
|
|
|
*/
|
2005-12-20 03:30:36 +01:00
|
|
|
if (ojscope && !bms_is_subset(relids, ojscope))
|
Wording cleanup for error messages. Also change can't -> cannot.
Standard English uses "may", "can", and "might" in different ways:
may - permission, "You may borrow my rake."
can - ability, "I can lift that log."
might - possibility, "It might rain today."
Unfortunately, in conversational English, their use is often mixed, as
in, "You may use this variable to do X", when in fact, "can" is a better
choice. Similarly, "It may crash" is better stated, "It might crash".
2007-02-01 20:10:30 +01:00
|
|
|
elog(ERROR, "JOIN qualification cannot refer to other relations");
|
2000-09-29 20:21:41 +02:00
|
|
|
|
|
|
|
/*
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
* If the clause is variable-free, our normal heuristic for pushing it
|
|
|
|
* down to just the mentioned rels doesn't work, because there are none.
|
|
|
|
*
|
|
|
|
* If the clause is an outer-join clause, we must force it to the OJ's
|
|
|
|
* semantic level to preserve semantics.
|
|
|
|
*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Otherwise, when the clause contains volatile functions, we force it to
|
|
|
|
* be evaluated at its original syntactic level. This preserves the
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
* expected semantics.
|
|
|
|
*
|
2006-10-04 02:30:14 +02:00
|
|
|
* When the clause contains no volatile functions either, it is actually a
|
|
|
|
* pseudoconstant clause that will not change value during any one
|
|
|
|
* execution of the plan, and hence can be used as a one-time qual in a
|
|
|
|
* gating Result plan node. We put such a clause into the regular
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
* RestrictInfo lists for the moment, but eventually createplan.c will
|
|
|
|
* pull it out and make a gating Result node immediately above whatever
|
2014-05-06 18:12:18 +02:00
|
|
|
* plan node the pseudoconstant clause is assigned to. It's usually best
|
2006-10-04 02:30:14 +02:00
|
|
|
* to put a gating node as high in the plan tree as possible. If we are
|
|
|
|
* not below an outer join, we can actually push the pseudoconstant qual
|
2014-05-06 18:12:18 +02:00
|
|
|
* all the way to the top of the tree. If we are below an outer join, we
|
2006-10-04 02:30:14 +02:00
|
|
|
* leave the qual at its original syntactic level (we could push it up to
|
|
|
|
* just below the outer join, but that seems more complex than it's
|
|
|
|
* worth).
|
2000-09-29 20:21:41 +02:00
|
|
|
*/
|
2003-02-08 21:20:55 +01:00
|
|
|
if (bms_is_empty(relids))
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
{
|
|
|
|
if (ojscope)
|
|
|
|
{
|
|
|
|
/* clause is attached to outer join, eval it there */
|
Fix some planner issues found while investigating Kevin Grittner's report
of poorer planning in 8.3 than 8.2:
1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.
2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join. This is safe
as long as the condition is strict and we re-test it at the upper join.
3. Keep the outer-join clause even if we successfully push a constant
across it. This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make. Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.
4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.
5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause. This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
|
|
|
relids = bms_copy(ojscope);
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
/* mustn't use as gating qual, so don't mark pseudoconstant */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* eval at original syntactic level */
|
Fix some planner issues found while investigating Kevin Grittner's report
of poorer planning in 8.3 than 8.2:
1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.
2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join. This is safe
as long as the condition is strict and we re-test it at the upper join.
3. Keep the outer-join clause even if we successfully push a constant
across it. This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make. Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.
4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.
5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause. This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
|
|
|
relids = bms_copy(qualscope);
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
if (!contain_volatile_functions(clause))
|
|
|
|
{
|
|
|
|
/* mark as gating qual */
|
|
|
|
pseudoconstant = true;
|
|
|
|
/* tell createplan.c to check for gating quals */
|
|
|
|
root->hasPseudoConstantQuals = true;
|
|
|
|
/* if not below outer join, push it to top of tree */
|
|
|
|
if (!below_outer_join)
|
2009-05-06 22:31:18 +02:00
|
|
|
{
|
2008-08-17 03:20:00 +02:00
|
|
|
relids =
|
|
|
|
get_relids_in_jointree((Node *) root->parse->jointree,
|
|
|
|
false);
|
2009-05-06 22:31:18 +02:00
|
|
|
qualscope = bms_copy(relids);
|
|
|
|
}
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2000-09-29 20:21:41 +02:00
|
|
|
|
2007-02-16 21:57:19 +01:00
|
|
|
/*----------
|
2003-03-03 00:46:34 +01:00
|
|
|
* Check to see if clause application must be delayed by outer-join
|
|
|
|
* considerations.
|
2007-02-16 21:57:19 +01:00
|
|
|
*
|
|
|
|
* A word about is_pushed_down: we mark the qual as "pushed down" if
|
|
|
|
* it is (potentially) applicable at a level different from its original
|
|
|
|
* syntactic level. This flag is used to distinguish OUTER JOIN ON quals
|
|
|
|
* from other quals pushed down to the same joinrel. The rules are:
|
|
|
|
* WHERE quals and INNER JOIN quals: is_pushed_down = true.
|
|
|
|
* Non-degenerate OUTER JOIN quals: is_pushed_down = false.
|
|
|
|
* Degenerate OUTER JOIN quals: is_pushed_down = true.
|
|
|
|
* A "degenerate" OUTER JOIN qual is one that doesn't mention the
|
|
|
|
* non-nullable side, and hence can be pushed down into the nullable side
|
|
|
|
* without changing the join result. It is correct to treat it as a
|
|
|
|
* regular filter condition at the level where it is evaluated.
|
|
|
|
*
|
|
|
|
* Note: it is not immediately obvious that a simple boolean is enough
|
|
|
|
* for this: if for some reason we were to attach a degenerate qual to
|
|
|
|
* its original join level, it would need to be treated as an outer join
|
2014-05-06 18:12:18 +02:00
|
|
|
* qual there. However, this cannot happen, because all the rels the
|
2007-02-16 21:57:19 +01:00
|
|
|
* clause mentions must be in the outer join's min_righthand, therefore
|
|
|
|
* the join it needs must be formed before the outer join; and we always
|
|
|
|
* attach quals to the lowest level where they can be evaluated. But
|
|
|
|
* if we were ever to re-introduce a mechanism for delaying evaluation
|
|
|
|
* of "expensive" quals, this area would need work.
|
2018-04-20 21:19:16 +02:00
|
|
|
*
|
|
|
|
* Note: generally, use of is_pushed_down has to go through the macro
|
|
|
|
* RINFO_IS_PUSHED_DOWN, because that flag alone is not always sufficient
|
|
|
|
* to tell whether a clause must be treated as pushed-down in context.
|
|
|
|
* This seems like another reason why it should perhaps be rethought.
|
2007-02-16 21:57:19 +01:00
|
|
|
*----------
|
2000-09-29 20:21:41 +02:00
|
|
|
*/
|
2005-09-28 23:17:02 +02:00
|
|
|
if (is_deduced)
|
2001-10-18 18:11:42 +02:00
|
|
|
{
|
2003-03-03 00:46:34 +01:00
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* If the qual came from implied-equality deduction, it should not be
|
|
|
|
* outerjoin-delayed, else deducer blew it. But we can't check this
|
2008-08-14 20:48:00 +02:00
|
|
|
* because the join_info_list may now contain OJs above where the qual
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
* belongs. For the same reason, we must rely on caller to supply the
|
|
|
|
* correct nullable_relids set.
|
2003-03-03 00:46:34 +01:00
|
|
|
*/
|
2005-12-20 03:30:36 +01:00
|
|
|
Assert(!ojscope);
|
2007-02-16 21:57:19 +01:00
|
|
|
is_pushed_down = true;
|
2005-11-15 00:54:23 +01:00
|
|
|
outerjoin_delayed = false;
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
nullable_relids = deduced_nullable_relids;
|
2007-01-20 21:45:41 +01:00
|
|
|
/* Don't feed it back for more deductions */
|
|
|
|
maybe_equivalence = false;
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
maybe_outer_join = false;
|
2001-10-18 18:11:42 +02:00
|
|
|
}
|
2008-11-22 23:47:06 +01:00
|
|
|
else if (bms_overlap(relids, outerjoin_nonnullable))
|
2000-09-12 23:07:18 +02:00
|
|
|
{
|
2003-03-03 00:46:34 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* The qual is attached to an outer join and mentions (some of the)
|
2008-11-22 23:47:06 +01:00
|
|
|
* rels on the nonnullable side, so it's not degenerate.
|
2007-01-20 21:45:41 +01:00
|
|
|
*
|
2007-11-15 22:14:46 +01:00
|
|
|
* We can't use such a clause to deduce equivalence (the left and
|
|
|
|
* right sides might be unequal above the join because one of them has
|
|
|
|
* gone to NULL) ... but we might be able to use it for more limited
|
2014-05-06 18:12:18 +02:00
|
|
|
* deductions, if it is mergejoinable. So consider adding it to the
|
Fix some planner issues found while investigating Kevin Grittner's report
of poorer planning in 8.3 than 8.2:
1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.
2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join. This is safe
as long as the condition is strict and we re-test it at the upper join.
3. Keep the outer-join clause even if we successfully push a constant
across it. This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make. Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.
4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.
5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause. This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
|
|
|
* lists of set-aside outer-join clauses.
|
2007-01-20 21:45:41 +01:00
|
|
|
*/
|
Fix some planner issues found while investigating Kevin Grittner's report
of poorer planning in 8.3 than 8.2:
1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.
2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join. This is safe
as long as the condition is strict and we re-test it at the upper join.
3. Keep the outer-join clause even if we successfully push a constant
across it. This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make. Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.
4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.
5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause. This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
|
|
|
is_pushed_down = false;
|
2007-01-20 21:45:41 +01:00
|
|
|
maybe_equivalence = false;
|
Fix some planner issues found while investigating Kevin Grittner's report
of poorer planning in 8.3 than 8.2:
1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.
2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join. This is safe
as long as the condition is strict and we re-test it at the upper join.
3. Keep the outer-join clause even if we successfully push a constant
across it. This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make. Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.
4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.
5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause. This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
|
|
|
maybe_outer_join = true;
|
|
|
|
|
|
|
|
/* Check to see if must be delayed by lower outer join */
|
2009-04-16 22:42:16 +02:00
|
|
|
outerjoin_delayed = check_outerjoin_delay(root,
|
|
|
|
&relids,
|
|
|
|
&nullable_relids,
|
|
|
|
false);
|
2007-01-20 21:45:41 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Now force the qual to be evaluated exactly at the level of joining
|
|
|
|
* corresponding to the outer join. We cannot let it get pushed down
|
|
|
|
* into the nonnullable side, since then we'd produce no output rows,
|
|
|
|
* rather than the intended single null-extended row, for any
|
|
|
|
* nonnullable-side rows failing the qual.
|
|
|
|
*
|
|
|
|
* (Do this step after calling check_outerjoin_delay, because that
|
|
|
|
* trashes relids.)
|
2003-03-03 00:46:34 +01:00
|
|
|
*/
|
2005-12-20 03:30:36 +01:00
|
|
|
Assert(ojscope);
|
|
|
|
relids = ojscope;
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
Assert(!pseudoconstant);
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2007-02-16 21:57:19 +01:00
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Normal qual clause or degenerate outer-join clause. Either way, we
|
2007-11-15 22:14:46 +01:00
|
|
|
* can mark it as pushed-down.
|
2007-02-16 21:57:19 +01:00
|
|
|
*/
|
|
|
|
is_pushed_down = true;
|
|
|
|
|
Fix some planner issues found while investigating Kevin Grittner's report
of poorer planning in 8.3 than 8.2:
1. After pushing a constant across an outer join --- ie, given
"a LEFT JOIN b ON (a.x = b.y) WHERE a.x = 42", we can deduce that b.y is
sort of equal to 42, in the sense that we needn't fetch any b rows where
it isn't 42 --- loop to see if any additional deductions can be made.
Previous releases did that by recursing, but I had mistakenly thought that
this was no longer necessary given the EquivalenceClass machinery.
2. Allow pushing constants across outer join conditions even if the
condition is outerjoin_delayed due to a lower outer join. This is safe
as long as the condition is strict and we re-test it at the upper join.
3. Keep the outer-join clause even if we successfully push a constant
across it. This is *necessary* in the outerjoin_delayed case, but
even in the simple case, it seems better to do this to ensure that the
join search order heuristics will consider the join as reasonable to
make. Mark such a clause as having selectivity 1.0, though, since it's
not going to eliminate very many rows after application of the constant
condition.
4. Tweak have_relevant_eclass_joinclause to report that two relations
are joinable when they have vars that are equated to the same constant.
We won't actually generate any joinclause from such an EquivalenceClass,
but again it seems that in such a case it's a good idea to consider
the join as worth costing out.
5. Fix a bug in select_mergejoin_clauses that was exposed by these
changes: we have to reject candidate mergejoin clauses if either side was
equated to a constant, because we can't construct a canonical pathkey list
for such a clause. This is an implementation restriction that might be
worth fixing someday, but it doesn't seem critical to get it done for 8.3.
2008-01-09 21:42:29 +01:00
|
|
|
/* Check to see if must be delayed by lower outer join */
|
2009-04-16 22:42:16 +02:00
|
|
|
outerjoin_delayed = check_outerjoin_delay(root,
|
|
|
|
&relids,
|
|
|
|
&nullable_relids,
|
|
|
|
true);
|
2003-03-03 00:46:34 +01:00
|
|
|
|
2006-12-07 20:33:40 +01:00
|
|
|
if (outerjoin_delayed)
|
|
|
|
{
|
|
|
|
/* Should still be a subset of current scope ... */
|
2012-09-01 00:57:12 +02:00
|
|
|
Assert(root->hasLateralRTEs || bms_is_subset(relids, qualscope));
|
|
|
|
Assert(ojscope == NULL || bms_is_subset(relids, ojscope));
|
2007-11-15 22:14:46 +01:00
|
|
|
|
2006-12-07 20:33:40 +01:00
|
|
|
/*
|
|
|
|
* Because application of the qual will be delayed by outer join,
|
|
|
|
* we mustn't assume its vars are equal everywhere.
|
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
maybe_equivalence = false;
|
2008-08-14 20:48:00 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* It's possible that this is an IS NULL clause that's redundant
|
|
|
|
* with a lower antijoin; if so we can just discard it. We need
|
2009-06-11 16:49:15 +02:00
|
|
|
* not test in any of the other cases, because this will only be
|
|
|
|
* possible for pushed-down, delayed clauses.
|
2008-08-14 20:48:00 +02:00
|
|
|
*/
|
|
|
|
if (check_redundant_nullability_qual(root, clause))
|
|
|
|
return;
|
2006-12-07 20:33:40 +01:00
|
|
|
}
|
|
|
|
else
|
2003-03-03 00:46:34 +01:00
|
|
|
{
|
2005-09-28 23:17:02 +02:00
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Qual is not delayed by any lower outer-join restriction, so we
|
|
|
|
* can consider feeding it to the equivalence machinery. However,
|
|
|
|
* if it's itself within an outer-join clause, treat it as though
|
|
|
|
* it appeared below that outer join (note that we can only get
|
|
|
|
* here when the clause references only nullable-side rels).
|
2005-09-28 23:17:02 +02:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
maybe_equivalence = true;
|
|
|
|
if (outerjoin_nonnullable != NULL)
|
|
|
|
below_outer_join = true;
|
2003-03-03 00:46:34 +01:00
|
|
|
}
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
/*
|
|
|
|
* Since it doesn't mention the LHS, it's certainly not useful as a
|
|
|
|
* set-aside OJ clause, even if it's in an OJ.
|
|
|
|
*/
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
maybe_outer_join = false;
|
2000-09-12 23:07:18 +02:00
|
|
|
}
|
|
|
|
|
2003-12-31 00:53:15 +01:00
|
|
|
/*
|
2004-01-04 01:07:32 +01:00
|
|
|
* Build the RestrictInfo node itself.
|
2003-12-31 00:53:15 +01:00
|
|
|
*/
|
2004-01-05 06:07:36 +01:00
|
|
|
restrictinfo = make_restrictinfo((Expr *) clause,
|
|
|
|
is_pushed_down,
|
2005-11-15 00:54:23 +01:00
|
|
|
outerjoin_delayed,
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
pseudoconstant,
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
security_level,
|
2009-04-16 22:42:16 +02:00
|
|
|
relids,
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
outerjoin_nonnullable,
|
2009-04-16 22:42:16 +02:00
|
|
|
nullable_relids);
|
2003-12-31 00:53:15 +01:00
|
|
|
|
2004-01-04 01:07:32 +01:00
|
|
|
/*
|
2007-01-20 21:45:41 +01:00
|
|
|
* If it's a join clause (either naturally, or because delayed by
|
2007-11-15 22:14:46 +01:00
|
|
|
* outer-join rules), add vars used in the clause to targetlists of their
|
|
|
|
* relations, so that they will be emitted by the plan nodes that scan
|
|
|
|
* those relations (else they won't be available at the join node!).
|
2007-01-20 21:45:41 +01:00
|
|
|
*
|
|
|
|
* Note: if the clause gets absorbed into an EquivalenceClass then this
|
|
|
|
* may be unnecessary, but for now we have to do it to cover the case
|
|
|
|
* where the EC becomes ec_broken and we end up reinserting the original
|
|
|
|
* clauses into the plan.
|
2004-01-04 01:07:32 +01:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
if (bms_membership(relids) == BMS_MULTIPLE)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
2011-07-13 00:23:55 +02:00
|
|
|
List *vars = pull_var_clause(clause,
|
2016-03-10 21:52:58 +01:00
|
|
|
PVC_RECURSE_AGGREGATES |
|
2016-03-10 22:23:40 +01:00
|
|
|
PVC_RECURSE_WINDOWFUNCS |
|
Avoid listing ungrouped Vars in the targetlist of Agg-underneath-Window.
Regular aggregate functions in combination with, or within the arguments
of, window functions are OK per spec; they have the semantics that the
aggregate output rows are computed and then we run the window functions
over that row set. (Thus, this combination is not really useful unless
there's a GROUP BY so that more than one aggregate output row is possible.)
The case without GROUP BY could fail, as recently reported by Jeff Davis,
because sloppy construction of the Agg node's targetlist resulted in extra
references to possibly-ungrouped Vars appearing outside the aggregate
function calls themselves. See the added regression test case for an
example.
Fixing this requires modifying the API of flatten_tlist and its underlying
function pull_var_clause. I chose to make pull_var_clause's API for
aggregates identical to what it was already doing for placeholders, since
the useful behaviors turn out to be the same (error, report node as-is, or
recurse into it). I also tightened the error checking in this area a bit:
if it was ever valid to see an uplevel Var, Aggref, or PlaceHolderVar here,
that was a long time ago, so complain instead of ignoring them.
Backpatch into 9.1. The failure exists in 8.4 and 9.0 as well, but seeing
that it only occurs in a basically-useless corner case, it doesn't seem
worth the risks of changing a function API in a minor release. There might
be third-party code using pull_var_clause.
2011-07-13 00:23:55 +02:00
|
|
|
PVC_INCLUDE_PLACEHOLDERS);
|
2003-02-08 21:20:55 +01:00
|
|
|
|
2011-08-09 06:48:51 +02:00
|
|
|
add_vars_to_targetlist(root, vars, relids, false);
|
2007-01-20 21:45:41 +01:00
|
|
|
list_free(vars);
|
2000-08-13 04:50:35 +02:00
|
|
|
}
|
2000-02-15 21:49:31 +01:00
|
|
|
|
|
|
|
/*
|
2007-01-20 21:45:41 +01:00
|
|
|
* We check "mergejoinability" of every clause, not only join clauses,
|
|
|
|
* because we want to know about equivalences between vars of the same
|
|
|
|
* relation, or between vars and consts.
|
|
|
|
*/
|
|
|
|
check_mergejoinable(restrictinfo);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If it is a true equivalence clause, send it to the EquivalenceClass
|
|
|
|
* machinery. We do *not* attach it directly to any restriction or join
|
|
|
|
* lists. The EC code will propagate it to the appropriate places later.
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
*
|
2007-11-15 22:14:46 +01:00
|
|
|
* If the clause has a mergejoinable operator and is not
|
|
|
|
* outerjoin-delayed, yet isn't an equivalence because it is an outer-join
|
|
|
|
* clause, the EC code may yet be able to do something with it. We add it
|
|
|
|
* to appropriate lists for further consideration later. Specifically:
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
*
|
2007-11-15 22:14:46 +01:00
|
|
|
* If it is a left or right outer-join qualification that relates the two
|
|
|
|
* sides of the outer join (no funny business like leftvar1 = leftvar2 +
|
|
|
|
* rightvar), we add it to root->left_join_clauses or
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
* root->right_join_clauses according to which side the nonnullable
|
|
|
|
* variable appears on.
|
|
|
|
*
|
|
|
|
* If it is a full outer-join qualification, we add it to
|
|
|
|
* root->full_join_clauses. (Ideally we'd discard cases that aren't
|
|
|
|
* leftvar = rightvar, as we do for left/right joins, but this routine
|
2007-11-15 22:14:46 +01:00
|
|
|
* doesn't have the info needed to do that; and the current usage of the
|
|
|
|
* full_join_clauses list doesn't require that, so it's not currently
|
|
|
|
* worth complicating this routine's API to make it possible.)
|
2007-01-20 21:45:41 +01:00
|
|
|
*
|
|
|
|
* If none of the above hold, pass it off to
|
|
|
|
* distribute_restrictinfo_to_rels().
|
2010-10-29 17:52:16 +02:00
|
|
|
*
|
|
|
|
* In all cases, it's important to initialize the left_ec and right_ec
|
|
|
|
* fields of a mergejoinable clause, so that all possibly mergejoinable
|
2014-05-06 18:12:18 +02:00
|
|
|
* expressions have representations in EquivalenceClasses. If
|
2010-10-29 17:52:16 +02:00
|
|
|
* process_equivalence is successful, it will take care of that;
|
|
|
|
* otherwise, we have to call initialize_mergeclause_eclasses to do it.
|
2000-02-15 21:49:31 +01:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
if (restrictinfo->mergeopfamilies)
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
{
|
2007-01-20 21:45:41 +01:00
|
|
|
if (maybe_equivalence)
|
|
|
|
{
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
if (check_equivalence_delay(root, restrictinfo) &&
|
Reduce "X = X" to "X IS NOT NULL", if it's easy to do so.
If the operator is a strict btree equality operator, and X isn't volatile,
then the clause must yield true for any non-null value of X, or null if X
is null. At top level of a WHERE clause, we can ignore the distinction
between false and null results, so it's valid to simplify the clause to
"X IS NOT NULL". This is a useful improvement mainly because we'll get
a far better selectivity estimate in most cases.
Because such cases seldom arise in well-written queries, it is unappetizing
to expend a lot of planner cycles looking for them ... but it turns out
that there's a place we can shoehorn this in practically for free, because
equivclass.c already has to detect and reject candidate equivalences of the
form X = X. That doesn't catch every place that it would be valid to
simplify to X IS NOT NULL, but it catches the typical case. Working harder
doesn't seem justified.
Patch by me, reviewed by Petr Jelinek
Discussion: https://postgr.es/m/CAMjNa7cC4X9YR-vAJS-jSYCajhRDvJQnN7m2sLH1wLh-_Z2bsw@mail.gmail.com
2017-10-08 18:23:32 +02:00
|
|
|
process_equivalence(root, &restrictinfo, below_outer_join))
|
2007-01-20 21:45:41 +01:00
|
|
|
return;
|
2010-10-29 17:52:16 +02:00
|
|
|
/* EC rejected it, so set left_ec/right_ec the hard way ... */
|
Reduce "X = X" to "X IS NOT NULL", if it's easy to do so.
If the operator is a strict btree equality operator, and X isn't volatile,
then the clause must yield true for any non-null value of X, or null if X
is null. At top level of a WHERE clause, we can ignore the distinction
between false and null results, so it's valid to simplify the clause to
"X IS NOT NULL". This is a useful improvement mainly because we'll get
a far better selectivity estimate in most cases.
Because such cases seldom arise in well-written queries, it is unappetizing
to expend a lot of planner cycles looking for them ... but it turns out
that there's a place we can shoehorn this in practically for free, because
equivclass.c already has to detect and reject candidate equivalences of the
form X = X. That doesn't catch every place that it would be valid to
simplify to X IS NOT NULL, but it catches the typical case. Working harder
doesn't seem justified.
Patch by me, reviewed by Petr Jelinek
Discussion: https://postgr.es/m/CAMjNa7cC4X9YR-vAJS-jSYCajhRDvJQnN7m2sLH1wLh-_Z2bsw@mail.gmail.com
2017-10-08 18:23:32 +02:00
|
|
|
if (restrictinfo->mergeopfamilies) /* EC might have changed this */
|
|
|
|
initialize_mergeclause_eclasses(root, restrictinfo);
|
2010-10-29 17:52:16 +02:00
|
|
|
/* ... and fall through to distribute_restrictinfo_to_rels */
|
2007-01-20 21:45:41 +01:00
|
|
|
}
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
else if (maybe_outer_join && restrictinfo->can_join)
|
|
|
|
{
|
2010-10-29 17:52:16 +02:00
|
|
|
/* we need to set up left_ec/right_ec the hard way */
|
|
|
|
initialize_mergeclause_eclasses(root, restrictinfo);
|
|
|
|
/* now see if it should go to any outer-join lists */
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
if (bms_is_subset(restrictinfo->left_relids,
|
|
|
|
outerjoin_nonnullable) &&
|
|
|
|
!bms_overlap(restrictinfo->right_relids,
|
|
|
|
outerjoin_nonnullable))
|
|
|
|
{
|
|
|
|
/* we have outervar = innervar */
|
|
|
|
root->left_join_clauses = lappend(root->left_join_clauses,
|
|
|
|
restrictinfo);
|
2007-01-20 21:45:41 +01:00
|
|
|
return;
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
}
|
2007-01-20 21:45:41 +01:00
|
|
|
if (bms_is_subset(restrictinfo->right_relids,
|
2007-11-15 22:14:46 +01:00
|
|
|
outerjoin_nonnullable) &&
|
|
|
|
!bms_overlap(restrictinfo->left_relids,
|
|
|
|
outerjoin_nonnullable))
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
{
|
|
|
|
/* we have innervar = outervar */
|
|
|
|
root->right_join_clauses = lappend(root->right_join_clauses,
|
|
|
|
restrictinfo);
|
2007-01-20 21:45:41 +01:00
|
|
|
return;
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
}
|
2008-10-25 21:51:32 +02:00
|
|
|
if (jointype == JOIN_FULL)
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
{
|
|
|
|
/* FULL JOIN (above tests cannot match in this case) */
|
|
|
|
root->full_join_clauses = lappend(root->full_join_clauses,
|
|
|
|
restrictinfo);
|
2007-01-20 21:45:41 +01:00
|
|
|
return;
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
}
|
2010-10-29 17:52:16 +02:00
|
|
|
/* nope, so fall through to distribute_restrictinfo_to_rels */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* we still need to set up left_ec/right_ec */
|
|
|
|
initialize_mergeclause_eclasses(root, restrictinfo);
|
Teach planner about some cases where a restriction clause can be
propagated inside an outer join. In particular, given
LEFT JOIN ON (A = B) WHERE A = constant, we cannot conclude that
B = constant at the top level (B might be null instead), but we
can nonetheless put a restriction B = constant into the quals for
B's relation, since no inner-side rows not meeting that condition
can contribute to the final result. Similarly, given
FULL JOIN USING (J) WHERE J = constant, we can't directly conclude
that either input J variable = constant, but it's OK to push such
quals into each input rel. Per recent gripe from Kim Bisgaard.
Along the way, remove 'valid_everywhere' flag from RestrictInfo,
as on closer analysis it was not being used for anything, and was
defined backwards anyway.
2005-07-03 01:00:42 +02:00
|
|
|
}
|
|
|
|
}
|
2007-01-20 21:45:41 +01:00
|
|
|
|
|
|
|
/* No EC special case applies, so push it into the clause lists */
|
|
|
|
distribute_restrictinfo_to_rels(root, restrictinfo);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2000-07-24 05:11:01 +02:00
|
|
|
/*
|
2007-01-20 21:45:41 +01:00
|
|
|
* check_outerjoin_delay
|
|
|
|
* Detect whether a qual referencing the given relids must be delayed
|
2007-05-23 01:23:58 +02:00
|
|
|
* in application due to the presence of a lower outer join, and/or
|
|
|
|
* may force extra delay of higher-level outer joins.
|
2003-01-24 04:58:44 +01:00
|
|
|
*
|
2007-05-23 01:23:58 +02:00
|
|
|
* If the qual must be delayed, add relids to *relids_p to reflect the lowest
|
2017-08-16 06:22:32 +02:00
|
|
|
* safe level for evaluating the qual, and return true. Any extra delay for
|
|
|
|
* higher-level joins is reflected by setting delay_upper_joins to true in
|
2009-04-16 22:42:16 +02:00
|
|
|
* SpecialJoinInfo structs. We also compute nullable_relids, the set of
|
|
|
|
* referenced relids that are nullable by lower outer joins (note that this
|
|
|
|
* can be nonempty even for a non-delayed qual).
|
2007-01-20 21:45:41 +01:00
|
|
|
*
|
2007-02-16 21:57:19 +01:00
|
|
|
* For an is_pushed_down qual, we can evaluate the qual as soon as (1) we have
|
2007-01-20 21:45:41 +01:00
|
|
|
* all the rels it mentions, and (2) we are at or above any outer joins that
|
|
|
|
* can null any of these rels and are below the syntactic location of the
|
2014-05-06 18:12:18 +02:00
|
|
|
* given qual. We must enforce (2) because pushing down such a clause below
|
2007-01-20 21:45:41 +01:00
|
|
|
* the OJ might cause the OJ to emit null-extended rows that should not have
|
|
|
|
* been formed, or that should have been rejected by the clause. (This is
|
|
|
|
* only an issue for non-strict quals, since if we can prove a qual mentioning
|
|
|
|
* only nullable rels is strict, we'd have reduced the outer join to an inner
|
|
|
|
* join in reduce_outer_joins().)
|
|
|
|
*
|
2008-08-14 20:48:00 +02:00
|
|
|
* To enforce (2), scan the join_info_list and merge the required-relid sets of
|
2007-01-20 21:45:41 +01:00
|
|
|
* any such OJs into the clause's own reference list. At the time we are
|
2008-08-14 20:48:00 +02:00
|
|
|
* called, the join_info_list contains only outer joins below this qual. We
|
2007-01-20 21:45:41 +01:00
|
|
|
* have to repeat the scan until no new relids get added; this ensures that
|
|
|
|
* the qual is suitably delayed regardless of the order in which OJs get
|
|
|
|
* executed. As an example, if we have one OJ with LHS=A, RHS=B, and one with
|
|
|
|
* LHS=B, RHS=C, it is implied that these can be done in either order; if the
|
|
|
|
* B/C join is done first then the join to A can null C, so a qual actually
|
|
|
|
* mentioning only C cannot be applied below the join to A.
|
|
|
|
*
|
2007-02-16 21:57:19 +01:00
|
|
|
* For a non-pushed-down qual, this isn't going to determine where we place the
|
2009-04-16 22:42:16 +02:00
|
|
|
* qual, but we need to determine outerjoin_delayed and nullable_relids anyway
|
|
|
|
* for use later in the planning process.
|
2007-05-23 01:23:58 +02:00
|
|
|
*
|
|
|
|
* Lastly, a pushed-down qual that references the nullable side of any current
|
2008-08-14 20:48:00 +02:00
|
|
|
* join_info_list member and has to be evaluated above that OJ (because its
|
2007-05-23 01:23:58 +02:00
|
|
|
* required relids overlap the LHS too) causes that OJ's delay_upper_joins
|
2017-08-16 06:22:32 +02:00
|
|
|
* flag to be set true. This will prevent any higher-level OJs from
|
2007-05-23 01:23:58 +02:00
|
|
|
* being interchanged with that OJ, which would result in not having any
|
2014-05-06 18:12:18 +02:00
|
|
|
* correct place to evaluate the qual. (The case we care about here is a
|
2007-05-23 01:23:58 +02:00
|
|
|
* sub-select WHERE clause within the RHS of some outer join. The WHERE
|
|
|
|
* clause must effectively be treated as a degenerate clause of that outer
|
|
|
|
* join's condition. Rather than trying to match such clauses with joins
|
|
|
|
* directly, we set delay_upper_joins here, and when the upper outer join
|
|
|
|
* is processed by make_outerjoininfo, it will refrain from allowing the
|
|
|
|
* two OJs to commute.)
|
2000-07-24 05:11:01 +02:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
static bool
|
2009-04-16 22:42:16 +02:00
|
|
|
check_outerjoin_delay(PlannerInfo *root,
|
2009-06-11 16:49:15 +02:00
|
|
|
Relids *relids_p, /* in/out parameter */
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
Relids *nullable_relids_p, /* output parameter */
|
2007-05-23 01:23:58 +02:00
|
|
|
bool is_pushed_down)
|
2000-07-24 05:11:01 +02:00
|
|
|
{
|
2009-04-16 22:42:16 +02:00
|
|
|
Relids relids;
|
|
|
|
Relids nullable_relids;
|
2007-01-20 21:45:41 +01:00
|
|
|
bool outerjoin_delayed;
|
|
|
|
bool found_some;
|
2003-02-08 21:20:55 +01:00
|
|
|
|
2009-04-16 22:42:16 +02:00
|
|
|
/* fast path if no special joins */
|
|
|
|
if (root->join_info_list == NIL)
|
|
|
|
{
|
|
|
|
*nullable_relids_p = NULL;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* must copy relids because we need the original value at the end */
|
|
|
|
relids = bms_copy(*relids_p);
|
|
|
|
nullable_relids = NULL;
|
2007-01-20 21:45:41 +01:00
|
|
|
outerjoin_delayed = false;
|
2007-11-15 22:14:46 +01:00
|
|
|
do
|
|
|
|
{
|
2007-01-20 21:45:41 +01:00
|
|
|
ListCell *l;
|
2003-01-24 04:58:44 +01:00
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
found_some = false;
|
2008-08-14 20:48:00 +02:00
|
|
|
foreach(l, root->join_info_list)
|
2003-01-24 04:58:44 +01:00
|
|
|
{
|
2008-08-14 20:48:00 +02:00
|
|
|
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
|
2007-01-20 21:45:41 +01:00
|
|
|
|
|
|
|
/* do we reference any nullable rels of this OJ? */
|
2008-08-14 20:48:00 +02:00
|
|
|
if (bms_overlap(relids, sjinfo->min_righthand) ||
|
|
|
|
(sjinfo->jointype == JOIN_FULL &&
|
|
|
|
bms_overlap(relids, sjinfo->min_lefthand)))
|
2003-01-24 04:58:44 +01:00
|
|
|
{
|
2009-04-16 22:42:16 +02:00
|
|
|
/* yes; have we included all its rels in relids? */
|
2008-08-14 20:48:00 +02:00
|
|
|
if (!bms_is_subset(sjinfo->min_lefthand, relids) ||
|
|
|
|
!bms_is_subset(sjinfo->min_righthand, relids))
|
2003-01-24 04:58:44 +01:00
|
|
|
{
|
2007-01-20 21:45:41 +01:00
|
|
|
/* no, so add them in */
|
2008-08-14 20:48:00 +02:00
|
|
|
relids = bms_add_members(relids, sjinfo->min_lefthand);
|
|
|
|
relids = bms_add_members(relids, sjinfo->min_righthand);
|
2009-04-16 22:42:16 +02:00
|
|
|
outerjoin_delayed = true;
|
2007-01-20 21:45:41 +01:00
|
|
|
/* we'll need another iteration */
|
|
|
|
found_some = true;
|
2003-01-24 04:58:44 +01:00
|
|
|
}
|
2009-04-16 22:42:16 +02:00
|
|
|
/* track all the nullable rels of relevant OJs */
|
|
|
|
nullable_relids = bms_add_members(nullable_relids,
|
|
|
|
sjinfo->min_righthand);
|
|
|
|
if (sjinfo->jointype == JOIN_FULL)
|
|
|
|
nullable_relids = bms_add_members(nullable_relids,
|
|
|
|
sjinfo->min_lefthand);
|
2007-05-23 01:23:58 +02:00
|
|
|
/* set delay_upper_joins if needed */
|
2008-08-14 20:48:00 +02:00
|
|
|
if (is_pushed_down && sjinfo->jointype != JOIN_FULL &&
|
|
|
|
bms_overlap(relids, sjinfo->min_lefthand))
|
|
|
|
sjinfo->delay_upper_joins = true;
|
2003-01-24 04:58:44 +01:00
|
|
|
}
|
|
|
|
}
|
2007-01-20 21:45:41 +01:00
|
|
|
} while (found_some);
|
2003-01-24 04:58:44 +01:00
|
|
|
|
2009-04-16 22:42:16 +02:00
|
|
|
/* identify just the actually-referenced nullable rels */
|
|
|
|
nullable_relids = bms_int_members(nullable_relids, *relids_p);
|
|
|
|
|
|
|
|
/* replace *relids_p, and return nullable_relids */
|
|
|
|
bms_free(*relids_p);
|
2007-01-20 21:45:41 +01:00
|
|
|
*relids_p = relids;
|
2009-04-16 22:42:16 +02:00
|
|
|
*nullable_relids_p = nullable_relids;
|
2007-01-20 21:45:41 +01:00
|
|
|
return outerjoin_delayed;
|
|
|
|
}
|
2001-03-22 05:01:46 +01:00
|
|
|
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
/*
|
|
|
|
* check_equivalence_delay
|
|
|
|
* Detect whether a potential equivalence clause is rendered unsafe
|
2017-08-16 06:22:32 +02:00
|
|
|
* by outer-join-delay considerations. Return true if it's safe.
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
*
|
|
|
|
* The initial tests in distribute_qual_to_rels will consider a mergejoinable
|
|
|
|
* clause to be a potential equivalence clause if it is not outerjoin_delayed.
|
|
|
|
* But since the point of equivalence processing is that we will recombine the
|
|
|
|
* two sides of the clause with others, we have to check that each side
|
|
|
|
* satisfies the not-outerjoin_delayed condition on its own; otherwise it might
|
|
|
|
* not be safe to evaluate everywhere we could place a derived equivalence
|
|
|
|
* condition.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
check_equivalence_delay(PlannerInfo *root,
|
|
|
|
RestrictInfo *restrictinfo)
|
|
|
|
{
|
|
|
|
Relids relids;
|
|
|
|
Relids nullable_relids;
|
|
|
|
|
|
|
|
/* fast path if no special joins */
|
|
|
|
if (root->join_info_list == NIL)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* must copy restrictinfo's relids to avoid changing it */
|
|
|
|
relids = bms_copy(restrictinfo->left_relids);
|
|
|
|
/* check left side does not need delay */
|
|
|
|
if (check_outerjoin_delay(root, &relids, &nullable_relids, true))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* and similarly for the right side */
|
|
|
|
relids = bms_copy(restrictinfo->right_relids);
|
|
|
|
if (check_outerjoin_delay(root, &relids, &nullable_relids, true))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2008-08-14 20:48:00 +02:00
|
|
|
/*
|
|
|
|
* check_redundant_nullability_qual
|
|
|
|
* Check to see if the qual is an IS NULL qual that is redundant with
|
|
|
|
* a lower JOIN_ANTI join.
|
|
|
|
*
|
|
|
|
* We want to suppress redundant IS NULL quals, not so much to save cycles
|
|
|
|
* as to avoid generating bogus selectivity estimates for them. So if
|
|
|
|
* redundancy is detected here, distribute_qual_to_rels() just throws away
|
|
|
|
* the qual.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
check_redundant_nullability_qual(PlannerInfo *root, Node *clause)
|
|
|
|
{
|
|
|
|
Var *forced_null_var;
|
|
|
|
Index forced_null_rel;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/* Check for IS NULL, and identify the Var forced to NULL */
|
|
|
|
forced_null_var = find_forced_null_var(clause);
|
|
|
|
if (forced_null_var == NULL)
|
|
|
|
return false;
|
|
|
|
forced_null_rel = forced_null_var->varno;
|
|
|
|
|
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* If the Var comes from the nullable side of a lower antijoin, the IS
|
|
|
|
* NULL condition is necessarily true.
|
2008-08-14 20:48:00 +02:00
|
|
|
*/
|
|
|
|
foreach(lc, root->join_info_list)
|
|
|
|
{
|
|
|
|
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
|
|
|
|
|
|
|
|
if (sjinfo->jointype == JOIN_ANTI &&
|
|
|
|
bms_is_member(forced_null_rel, sjinfo->syn_righthand))
|
2009-02-20 01:01:03 +01:00
|
|
|
return true;
|
2008-08-14 20:48:00 +02:00
|
|
|
}
|
|
|
|
|
2009-02-20 01:01:03 +01:00
|
|
|
return false;
|
2008-08-14 20:48:00 +02:00
|
|
|
}
|
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
/*
|
|
|
|
* distribute_restrictinfo_to_rels
|
|
|
|
* Push a completed RestrictInfo into the proper restriction or join
|
|
|
|
* clause list(s).
|
|
|
|
*
|
|
|
|
* This is the last step of distribute_qual_to_rels() for ordinary qual
|
|
|
|
* clauses. Clauses that are interesting for equivalence-class processing
|
|
|
|
* are diverted to the EC machinery, but may ultimately get fed back here.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
distribute_restrictinfo_to_rels(PlannerInfo *root,
|
|
|
|
RestrictInfo *restrictinfo)
|
|
|
|
{
|
|
|
|
Relids relids = restrictinfo->required_relids;
|
|
|
|
RelOptInfo *rel;
|
|
|
|
|
|
|
|
switch (bms_membership(relids))
|
2000-07-24 05:11:01 +02:00
|
|
|
{
|
2007-01-20 21:45:41 +01:00
|
|
|
case BMS_SINGLETON:
|
|
|
|
|
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* There is only one relation participating in the clause, so it
|
|
|
|
* is a restriction clause for that relation.
|
2007-01-20 21:45:41 +01:00
|
|
|
*/
|
|
|
|
rel = find_base_rel(root, bms_singleton_member(relids));
|
|
|
|
|
|
|
|
/* Add clause to rel's restriction list */
|
|
|
|
rel->baserestrictinfo = lappend(rel->baserestrictinfo,
|
|
|
|
restrictinfo);
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
/* Update security level info */
|
|
|
|
rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
restrictinfo->security_level);
|
2007-01-20 21:45:41 +01:00
|
|
|
break;
|
|
|
|
case BMS_MULTIPLE:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The clause is a join clause, since there is more than one rel
|
|
|
|
* in its relid set.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Check for hashjoinable operators. (We don't bother setting the
|
2010-12-31 02:24:55 +01:00
|
|
|
* hashjoin info except in true join clauses.)
|
2007-01-20 21:45:41 +01:00
|
|
|
*/
|
2010-12-31 02:24:55 +01:00
|
|
|
check_hashjoinable(restrictinfo);
|
2007-01-20 21:45:41 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Add clause to the join lists of all the relevant relations.
|
|
|
|
*/
|
|
|
|
add_join_clause_to_rels(root, restrictinfo, relids);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* clause references no rels, and therefore we have no place to
|
|
|
|
* attach it. Shouldn't get here if callers are working properly.
|
|
|
|
*/
|
|
|
|
elog(ERROR, "cannot cope with variable-free clause");
|
|
|
|
break;
|
2000-07-24 05:11:01 +02:00
|
|
|
}
|
2007-01-20 21:45:41 +01:00
|
|
|
}
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
/*
|
|
|
|
* process_implied_equality
|
|
|
|
* Create a restrictinfo item that says "item1 op item2", and push it
|
|
|
|
* into the appropriate lists. (In practice opno is always a btree
|
|
|
|
* equality operator.)
|
|
|
|
*
|
|
|
|
* "qualscope" is the nominal syntactic level to impute to the restrictinfo.
|
|
|
|
* This must contain at least all the rels used in the expressions, but it
|
|
|
|
* is used only to set the qual application level when both exprs are
|
|
|
|
* variable-free. Otherwise the qual is applied at the lowest join level
|
|
|
|
* that provides all its variables.
|
|
|
|
*
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
* "nullable_relids" is the set of relids used in the expressions that are
|
2014-05-06 18:12:18 +02:00
|
|
|
* potentially nullable below the expressions. (This has to be supplied by
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
* caller because this function is used after deconstruct_jointree, so we
|
|
|
|
* don't have knowledge of where the clause items came from.)
|
|
|
|
*
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
* "security_level" is the security level to assign to the new restrictinfo.
|
|
|
|
*
|
2007-01-20 21:45:41 +01:00
|
|
|
* "both_const" indicates whether both items are known pseudo-constant;
|
|
|
|
* in this case it is worth applying eval_const_expressions() in case we
|
|
|
|
* can produce constant TRUE or constant FALSE. (Otherwise it's not,
|
|
|
|
* because the expressions went through eval_const_expressions already.)
|
|
|
|
*
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
* Note: this function will copy item1 and item2, but it is caller's
|
|
|
|
* responsibility to make sure that the Relids parameters are fresh copies
|
|
|
|
* not shared with other uses.
|
|
|
|
*
|
2007-01-20 21:45:41 +01:00
|
|
|
* This is currently used only when an EquivalenceClass is found to
|
|
|
|
* contain pseudoconstants. See path/pathkeys.c for more details.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
process_implied_equality(PlannerInfo *root,
|
|
|
|
Oid opno,
|
2011-03-20 01:29:08 +01:00
|
|
|
Oid collation,
|
2007-01-20 21:45:41 +01:00
|
|
|
Expr *item1,
|
|
|
|
Expr *item2,
|
|
|
|
Relids qualscope,
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
Relids nullable_relids,
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
Index security_level,
|
2007-01-20 21:45:41 +01:00
|
|
|
bool below_outer_join,
|
|
|
|
bool both_const)
|
|
|
|
{
|
|
|
|
Expr *clause;
|
2000-07-24 05:11:01 +02:00
|
|
|
|
2004-02-27 22:42:00 +01:00
|
|
|
/*
|
2007-01-20 21:45:41 +01:00
|
|
|
* Build the new clause. Copy to ensure it shares no substructure with
|
|
|
|
* original (this is necessary in case there are subselects in there...)
|
2004-02-27 22:42:00 +01:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
clause = make_opclause(opno,
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
BOOLOID, /* opresulttype */
|
|
|
|
false, /* opretset */
|
2017-03-09 21:18:59 +01:00
|
|
|
copyObject(item1),
|
|
|
|
copyObject(item2),
|
2011-03-20 01:29:08 +01:00
|
|
|
InvalidOid,
|
|
|
|
collation);
|
2000-07-24 05:11:01 +02:00
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
/* If both constant, try to reduce to a boolean constant. */
|
|
|
|
if (both_const)
|
|
|
|
{
|
2008-04-01 02:48:33 +02:00
|
|
|
clause = (Expr *) eval_const_expressions(root, (Node *) clause);
|
2007-01-20 21:45:41 +01:00
|
|
|
|
|
|
|
/* If we produced const TRUE, just drop the clause */
|
|
|
|
if (clause && IsA(clause, Const))
|
|
|
|
{
|
2007-11-15 22:14:46 +01:00
|
|
|
Const *cclause = (Const *) clause;
|
2007-01-20 21:45:41 +01:00
|
|
|
|
|
|
|
Assert(cclause->consttype == BOOLOID);
|
|
|
|
if (!cclause->constisnull && DatumGetBool(cclause->constvalue))
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-09-29 20:21:41 +02:00
|
|
|
/*
|
2003-01-15 20:35:48 +01:00
|
|
|
* Push the new clause into all the appropriate restrictinfo lists.
|
2000-09-29 20:21:41 +02:00
|
|
|
*/
|
|
|
|
distribute_qual_to_rels(root, (Node *) clause,
|
2008-10-25 21:51:32 +02:00
|
|
|
true, below_outer_join, JOIN_INNER,
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
security_level,
|
2013-08-19 19:19:25 +02:00
|
|
|
qualscope, NULL, NULL, nullable_relids,
|
|
|
|
NULL);
|
2002-11-20 00:22:00 +01:00
|
|
|
}
|
|
|
|
|
2001-10-18 18:11:42 +02:00
|
|
|
/*
|
2007-01-20 21:45:41 +01:00
|
|
|
* build_implied_join_equality --- build a RestrictInfo for a derived equality
|
2003-01-24 04:58:44 +01:00
|
|
|
*
|
2007-01-20 21:45:41 +01:00
|
|
|
* This overlaps the functionality of process_implied_equality(), but we
|
|
|
|
* must return the RestrictInfo, not push it into the joininfo tree.
|
2010-10-29 17:52:16 +02:00
|
|
|
*
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
* Note: this function will copy item1 and item2, but it is caller's
|
|
|
|
* responsibility to make sure that the Relids parameters are fresh copies
|
|
|
|
* not shared with other uses.
|
|
|
|
*
|
2010-10-29 17:52:16 +02:00
|
|
|
* Note: we do not do initialize_mergeclause_eclasses() here. It is
|
|
|
|
* caller's responsibility that left_ec/right_ec be set as necessary.
|
2001-10-18 18:11:42 +02:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
RestrictInfo *
|
|
|
|
build_implied_join_equality(Oid opno,
|
2011-03-20 01:29:08 +01:00
|
|
|
Oid collation,
|
2007-01-20 21:45:41 +01:00
|
|
|
Expr *item1,
|
|
|
|
Expr *item2,
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
Relids qualscope,
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
Relids nullable_relids,
|
|
|
|
Index security_level)
|
2001-10-18 18:11:42 +02:00
|
|
|
{
|
2007-01-20 21:45:41 +01:00
|
|
|
RestrictInfo *restrictinfo;
|
|
|
|
Expr *clause;
|
2001-10-25 07:50:21 +02:00
|
|
|
|
2001-10-18 18:11:42 +02:00
|
|
|
/*
|
2007-01-20 21:45:41 +01:00
|
|
|
* Build the new clause. Copy to ensure it shares no substructure with
|
|
|
|
* original (this is necessary in case there are subselects in there...)
|
2001-10-18 18:11:42 +02:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
clause = make_opclause(opno,
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
BOOLOID, /* opresulttype */
|
|
|
|
false, /* opretset */
|
2017-03-09 21:18:59 +01:00
|
|
|
copyObject(item1),
|
|
|
|
copyObject(item2),
|
2011-03-20 01:29:08 +01:00
|
|
|
InvalidOid,
|
|
|
|
collation);
|
2001-10-18 18:11:42 +02:00
|
|
|
|
|
|
|
/*
|
2007-01-20 21:45:41 +01:00
|
|
|
* Build the RestrictInfo node itself.
|
2001-10-18 18:11:42 +02:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
restrictinfo = make_restrictinfo(clause,
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
true, /* is_pushed_down */
|
|
|
|
false, /* outerjoin_delayed */
|
|
|
|
false, /* pseudoconstant */
|
Improve RLS planning by marking individual quals with security levels.
In an RLS query, we must ensure that security filter quals are evaluated
before ordinary query quals, in case the latter contain "leaky" functions
that could expose the contents of sensitive rows. The original
implementation of RLS planning ensured this by pushing the scan of a
secured table into a sub-query that it marked as a security-barrier view.
Unfortunately this results in very inefficient plans in many cases, because
the sub-query cannot be flattened and gets planned independently of the
rest of the query.
To fix, drop the use of sub-queries to enforce RLS qual order, and instead
mark each qual (RestrictInfo) with a security_level field establishing its
priority for evaluation. Quals must be evaluated in security_level order,
except that "leakproof" quals can be allowed to go ahead of quals of lower
security_level, if it's helpful to do so. This has to be enforced within
the ordering of any one list of quals to be evaluated at a table scan node,
and we also have to ensure that quals are not chosen for early evaluation
(i.e., use as an index qual or TID scan qual) if they're not allowed to go
ahead of other quals at the scan node.
This is sufficient to fix the problem for RLS quals, since we only support
RLS policies on simple tables and thus RLS quals will always exist at the
table scan level only. Eventually these qual ordering rules should be
enforced for join quals as well, which would permit improving planning for
explicit security-barrier views; but that's a task for another patch.
Note that FDWs would need to be aware of these rules --- and not, for
example, send an insecure qual for remote execution --- but since we do
not yet allow RLS policies on foreign tables, the case doesn't arise.
This will need to be addressed before we can allow such policies.
Patch by me, reviewed by Stephen Frost and Dean Rasheed.
Discussion: https://postgr.es/m/8185.1477432701@sss.pgh.pa.us
2017-01-18 18:58:20 +01:00
|
|
|
security_level, /* security_level */
|
2009-06-11 16:49:15 +02:00
|
|
|
qualscope, /* required_relids */
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
NULL, /* outer_relids */
|
Fix planning of non-strict equivalence clauses above outer joins.
If a potential equivalence clause references a variable from the nullable
side of an outer join, the planner needs to take care that derived clauses
are not pushed to below the outer join; else they may use the wrong value
for the variable. (The problem arises only with non-strict clauses, since
if an upper clause can be proven strict then the outer join will get
simplified to a plain join.) The planner attempted to prevent this type
of error by checking that potential equivalence clauses aren't
outerjoin-delayed as a whole, but actually we have to check each side
separately, since the two sides of the clause will get moved around
separately if it's treated as an equivalence. Bugs of this type can be
demonstrated as far back as 7.4, even though releases before 8.3 had only
a very ad-hoc notion of equivalence clauses.
In addition, we neglected to account for the possibility that such clauses
might have nonempty nullable_relids even when not outerjoin-delayed; so the
equivalence-class machinery lacked logic to compute correct nullable_relids
values for clauses it constructs. This oversight was harmless before 9.2
because we were only using RestrictInfo.nullable_relids for OR clauses;
but as of 9.2 it could result in pushing constructed equivalence clauses
to incorrect places. (This accounts for bug #7604 from Bill MacArthur.)
Fix the first problem by adding a new test check_equivalence_delay() in
distribute_qual_to_rels, and fix the second one by adding code in
equivclass.c and called functions to set correct nullable_relids for
generated clauses. Although I believe the second part of this is not
currently necessary before 9.2, I chose to back-patch it anyway, partly to
keep the logic similar across branches and partly because it seems possible
we might find other reasons why we need valid values of nullable_relids in
the older branches.
Add regression tests illustrating these problems. In 9.0 and up, also
add test cases checking that we can push constants through outer joins,
since we've broken that optimization before and I nearly broke it again
with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
|
|
|
nullable_relids); /* nullable_relids */
|
2007-01-20 21:45:41 +01:00
|
|
|
|
2010-12-31 02:24:55 +01:00
|
|
|
/* Set mergejoinability/hashjoinability flags */
|
2007-01-20 21:45:41 +01:00
|
|
|
check_mergejoinable(restrictinfo);
|
2010-12-31 02:24:55 +01:00
|
|
|
check_hashjoinable(restrictinfo);
|
2007-01-20 21:45:41 +01:00
|
|
|
|
|
|
|
return restrictinfo;
|
2001-10-18 18:11:42 +02:00
|
|
|
}
|
|
|
|
|
2000-07-24 05:11:01 +02:00
|
|
|
|
2016-06-18 21:22:34 +02:00
|
|
|
/*
|
|
|
|
* match_foreign_keys_to_quals
|
|
|
|
* Match foreign-key constraints to equivalence classes and join quals
|
|
|
|
*
|
|
|
|
* The idea here is to see which query join conditions match equality
|
|
|
|
* constraints of a foreign-key relationship. For such join conditions,
|
|
|
|
* we can use the FK semantics to make selectivity estimates that are more
|
|
|
|
* reliable than estimating from statistics, especially for multiple-column
|
|
|
|
* FKs, where the normal assumption of independent conditions tends to fail.
|
|
|
|
*
|
|
|
|
* In this function we annotate the ForeignKeyOptInfos in root->fkey_list
|
|
|
|
* with info about which eclasses and join qual clauses they match, and
|
|
|
|
* discard any ForeignKeyOptInfos that are irrelevant for the query.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
match_foreign_keys_to_quals(PlannerInfo *root)
|
|
|
|
{
|
|
|
|
List *newlist = NIL;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
foreach(lc, root->fkey_list)
|
|
|
|
{
|
|
|
|
ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
|
2016-06-29 22:02:08 +02:00
|
|
|
RelOptInfo *con_rel;
|
|
|
|
RelOptInfo *ref_rel;
|
2016-06-18 21:22:34 +02:00
|
|
|
int colno;
|
|
|
|
|
2016-06-29 22:02:08 +02:00
|
|
|
/*
|
|
|
|
* Either relid might identify a rel that is in the query's rtable but
|
|
|
|
* isn't referenced by the jointree so won't have a RelOptInfo. Hence
|
|
|
|
* don't use find_base_rel() here. We can ignore such FKs.
|
|
|
|
*/
|
|
|
|
if (fkinfo->con_relid >= root->simple_rel_array_size ||
|
|
|
|
fkinfo->ref_relid >= root->simple_rel_array_size)
|
|
|
|
continue; /* just paranoia */
|
|
|
|
con_rel = root->simple_rel_array[fkinfo->con_relid];
|
|
|
|
if (con_rel == NULL)
|
|
|
|
continue;
|
|
|
|
ref_rel = root->simple_rel_array[fkinfo->ref_relid];
|
|
|
|
if (ref_rel == NULL)
|
|
|
|
continue;
|
|
|
|
|
2016-06-18 21:22:34 +02:00
|
|
|
/*
|
|
|
|
* Ignore FK unless both rels are baserels. This gets rid of FKs that
|
|
|
|
* link to inheritance child rels (otherrels) and those that link to
|
|
|
|
* rels removed by join removal (dead rels).
|
|
|
|
*/
|
|
|
|
if (con_rel->reloptkind != RELOPT_BASEREL ||
|
|
|
|
ref_rel->reloptkind != RELOPT_BASEREL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Scan the columns and try to match them to eclasses and quals.
|
|
|
|
*
|
|
|
|
* Note: for simple inner joins, any match should be in an eclass.
|
|
|
|
* "Loose" quals that syntactically match an FK equality must have
|
|
|
|
* been rejected for EC status because they are outer-join quals or
|
|
|
|
* similar. We can still consider them to match the FK if they are
|
|
|
|
* not outerjoin_delayed.
|
|
|
|
*/
|
|
|
|
for (colno = 0; colno < fkinfo->nkeys; colno++)
|
|
|
|
{
|
|
|
|
AttrNumber con_attno,
|
|
|
|
ref_attno;
|
|
|
|
Oid fpeqop;
|
|
|
|
ListCell *lc2;
|
|
|
|
|
|
|
|
fkinfo->eclass[colno] = match_eclasses_to_foreign_key_col(root,
|
|
|
|
fkinfo,
|
|
|
|
colno);
|
|
|
|
/* Don't bother looking for loose quals if we got an EC match */
|
|
|
|
if (fkinfo->eclass[colno] != NULL)
|
|
|
|
{
|
|
|
|
fkinfo->nmatched_ec++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Scan joininfo list for relevant clauses. Either rel's joininfo
|
|
|
|
* list would do equally well; we use con_rel's.
|
|
|
|
*/
|
|
|
|
con_attno = fkinfo->conkey[colno];
|
|
|
|
ref_attno = fkinfo->confkey[colno];
|
|
|
|
fpeqop = InvalidOid; /* we'll look this up only if needed */
|
|
|
|
|
|
|
|
foreach(lc2, con_rel->joininfo)
|
|
|
|
{
|
|
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2);
|
|
|
|
OpExpr *clause = (OpExpr *) rinfo->clause;
|
|
|
|
Var *leftvar;
|
|
|
|
Var *rightvar;
|
|
|
|
|
|
|
|
/* Ignore outerjoin-delayed clauses */
|
|
|
|
if (rinfo->outerjoin_delayed)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Only binary OpExprs are useful for consideration */
|
|
|
|
if (!IsA(clause, OpExpr) ||
|
|
|
|
list_length(clause->args) != 2)
|
|
|
|
continue;
|
|
|
|
leftvar = (Var *) get_leftop((Expr *) clause);
|
|
|
|
rightvar = (Var *) get_rightop((Expr *) clause);
|
|
|
|
|
|
|
|
/* Operands must be Vars, possibly with RelabelType */
|
|
|
|
while (leftvar && IsA(leftvar, RelabelType))
|
|
|
|
leftvar = (Var *) ((RelabelType *) leftvar)->arg;
|
|
|
|
if (!(leftvar && IsA(leftvar, Var)))
|
|
|
|
continue;
|
|
|
|
while (rightvar && IsA(rightvar, RelabelType))
|
|
|
|
rightvar = (Var *) ((RelabelType *) rightvar)->arg;
|
|
|
|
if (!(rightvar && IsA(rightvar, Var)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Now try to match the vars to the current foreign key cols */
|
|
|
|
if (fkinfo->ref_relid == leftvar->varno &&
|
|
|
|
ref_attno == leftvar->varattno &&
|
|
|
|
fkinfo->con_relid == rightvar->varno &&
|
|
|
|
con_attno == rightvar->varattno)
|
|
|
|
{
|
|
|
|
/* Vars match, but is it the right operator? */
|
|
|
|
if (clause->opno == fkinfo->conpfeqop[colno])
|
|
|
|
{
|
|
|
|
fkinfo->rinfos[colno] = lappend(fkinfo->rinfos[colno],
|
|
|
|
rinfo);
|
|
|
|
fkinfo->nmatched_ri++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (fkinfo->ref_relid == rightvar->varno &&
|
|
|
|
ref_attno == rightvar->varattno &&
|
|
|
|
fkinfo->con_relid == leftvar->varno &&
|
|
|
|
con_attno == leftvar->varattno)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Reverse match, must check commutator operator. Look it
|
|
|
|
* up if we didn't already. (In the worst case we might
|
|
|
|
* do multiple lookups here, but that would require an FK
|
|
|
|
* equality operator without commutator, which is
|
|
|
|
* unlikely.)
|
|
|
|
*/
|
|
|
|
if (!OidIsValid(fpeqop))
|
|
|
|
fpeqop = get_commutator(fkinfo->conpfeqop[colno]);
|
|
|
|
if (clause->opno == fpeqop)
|
|
|
|
{
|
|
|
|
fkinfo->rinfos[colno] = lappend(fkinfo->rinfos[colno],
|
|
|
|
rinfo);
|
|
|
|
fkinfo->nmatched_ri++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* If we found any matching loose quals, count col as matched */
|
|
|
|
if (fkinfo->rinfos[colno])
|
|
|
|
fkinfo->nmatched_rcols++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Currently, we drop multicolumn FKs that aren't fully matched to the
|
|
|
|
* query. Later we might figure out how to derive some sort of
|
|
|
|
* estimate from them, in which case this test should be weakened to
|
|
|
|
* "if ((fkinfo->nmatched_ec + fkinfo->nmatched_rcols) > 0)".
|
|
|
|
*/
|
|
|
|
if ((fkinfo->nmatched_ec + fkinfo->nmatched_rcols) == fkinfo->nkeys)
|
|
|
|
newlist = lappend(newlist, fkinfo);
|
|
|
|
}
|
|
|
|
/* Replace fkey_list, thereby discarding any useless entries */
|
|
|
|
root->fkey_list = newlist;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
*
|
2000-02-15 21:49:31 +01:00
|
|
|
* CHECKS FOR MERGEJOINABLE AND HASHJOINABLE CLAUSES
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*****************************************************************************/
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-08-16 04:17:58 +02:00
|
|
|
* check_mergejoinable
|
|
|
|
* If the restrictinfo's clause is mergejoinable, set the mergejoin
|
|
|
|
* info fields in the restrictinfo.
|
|
|
|
*
|
|
|
|
* Currently, we support mergejoin for binary opclauses where
|
2003-01-15 20:35:48 +01:00
|
|
|
* the operator is a mergejoinable operator. The arguments can be
|
|
|
|
* anything --- as long as there are no volatile functions in them.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1999-08-16 04:17:58 +02:00
|
|
|
static void
|
|
|
|
check_mergejoinable(RestrictInfo *restrictinfo)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
Expr *clause = restrictinfo->clause;
|
2007-01-20 21:45:41 +01:00
|
|
|
Oid opno;
|
2010-10-31 02:55:20 +01:00
|
|
|
Node *leftarg;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
if (restrictinfo->pseudoconstant)
|
|
|
|
return;
|
2002-12-12 16:49:42 +01:00
|
|
|
if (!is_opclause(clause))
|
1999-08-16 04:17:58 +02:00
|
|
|
return;
|
2004-05-31 01:40:41 +02:00
|
|
|
if (list_length(((OpExpr *) clause)->args) != 2)
|
1999-08-16 04:17:58 +02:00
|
|
|
return;
|
1999-02-15 02:06:59 +01:00
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
opno = ((OpExpr *) clause)->opno;
|
2010-10-31 02:55:20 +01:00
|
|
|
leftarg = linitial(((OpExpr *) clause)->args);
|
1999-02-15 02:06:59 +01:00
|
|
|
|
2010-10-31 02:55:20 +01:00
|
|
|
if (op_mergejoinable(opno, exprType(leftarg)) &&
|
2003-01-15 20:35:48 +01:00
|
|
|
!contain_volatile_functions((Node *) clause))
|
2007-01-20 21:45:41 +01:00
|
|
|
restrictinfo->mergeopfamilies = get_mergejoin_opfamilies(opno);
|
|
|
|
|
|
|
|
/*
|
2007-11-15 22:14:46 +01:00
|
|
|
* Note: op_mergejoinable is just a hint; if we fail to find the operator
|
|
|
|
* in any btree opfamilies, mergeopfamilies remains NIL and so the clause
|
|
|
|
* is not treated as mergejoinable.
|
2007-01-20 21:45:41 +01:00
|
|
|
*/
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-08-16 04:17:58 +02:00
|
|
|
* check_hashjoinable
|
|
|
|
* If the restrictinfo's clause is hashjoinable, set the hashjoin
|
|
|
|
* info fields in the restrictinfo.
|
|
|
|
*
|
|
|
|
* Currently, we support hashjoin for binary opclauses where
|
2014-05-06 18:12:18 +02:00
|
|
|
* the operator is a hashjoinable operator. The arguments can be
|
2003-01-15 20:35:48 +01:00
|
|
|
* anything --- as long as there are no volatile functions in them.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1999-08-16 04:17:58 +02:00
|
|
|
static void
|
|
|
|
check_hashjoinable(RestrictInfo *restrictinfo)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-08-16 04:17:58 +02:00
|
|
|
Expr *clause = restrictinfo->clause;
|
|
|
|
Oid opno;
|
2010-10-31 02:55:20 +01:00
|
|
|
Node *leftarg;
|
1999-02-15 02:06:59 +01:00
|
|
|
|
Revise the planner's handling of "pseudoconstant" WHERE clauses, that is
clauses containing no variables and no volatile functions. Such a clause
can be used as a one-time qual in a gating Result plan node, to suppress
plan execution entirely when it is false. Even when the clause is true,
putting it in a gating node wins by avoiding repeated evaluation of the
clause. In previous PG releases, query_planner() would do this for
pseudoconstant clauses appearing at the top level of the jointree, but
there was no ability to generate a gating Result deeper in the plan tree.
To fix it, get rid of the special case in query_planner(), and instead
process pseudoconstant clauses through the normal RestrictInfo qual
distribution mechanism. When a pseudoconstant clause is found attached to
a path node in create_plan(), pull it out and generate a gating Result at
that point. This requires special-casing pseudoconstants in selectivity
estimation and cost_qual_eval, but on the whole it's pretty clean.
It probably even makes the planner a bit faster than before for the normal
case of no pseudoconstants, since removing pull_constant_clauses saves one
useless traversal of the qual tree. Per gripe from Phil Frost.
2006-07-01 20:38:33 +02:00
|
|
|
if (restrictinfo->pseudoconstant)
|
|
|
|
return;
|
2002-12-12 16:49:42 +01:00
|
|
|
if (!is_opclause(clause))
|
1999-08-16 04:17:58 +02:00
|
|
|
return;
|
2004-05-31 01:40:41 +02:00
|
|
|
if (list_length(((OpExpr *) clause)->args) != 2)
|
1999-08-16 04:17:58 +02:00
|
|
|
return;
|
|
|
|
|
2002-12-12 16:49:42 +01:00
|
|
|
opno = ((OpExpr *) clause)->opno;
|
2010-10-31 02:55:20 +01:00
|
|
|
leftarg = linitial(((OpExpr *) clause)->args);
|
1999-02-15 02:06:59 +01:00
|
|
|
|
2010-10-31 02:55:20 +01:00
|
|
|
if (op_hashjoinable(opno, exprType(leftarg)) &&
|
2003-01-15 20:35:48 +01:00
|
|
|
!contain_volatile_functions((Node *) clause))
|
1999-08-16 04:17:58 +02:00
|
|
|
restrictinfo->hashjoinoperator = opno;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|