Fix PlaceHolderVar mechanism's interaction with outer joins.

The point of a PlaceHolderVar is to allow a non-strict expression to be
evaluated below an outer join, after which its value bubbles up like a Var
and can be forced to NULL when the outer join's semantics require that.
However, there was a serious design oversight in that, namely that we
didn't ensure that there was actually a correct place in the plan tree
to evaluate the placeholder :-(.  It may be necessary to delay evaluation
of an outer join to ensure that a placeholder that should be evaluated
below the join can be evaluated there.  Per recent bug report from Kirill
Simonov.

Back-patch to 8.4 where the PlaceHolderVar mechanism was introduced.
This commit is contained in:
Tom Lane 2010-09-28 12:08:56 -04:00
parent 9c5f4f6cb5
commit eb22950510
11 changed files with 374 additions and 36 deletions

View File

@ -1806,6 +1806,7 @@ _copyPlaceHolderInfo(PlaceHolderInfo *from)
COPY_NODE_FIELD(ph_var);
COPY_BITMAPSET_FIELD(ph_eval_at);
COPY_BITMAPSET_FIELD(ph_needed);
COPY_BITMAPSET_FIELD(ph_may_need);
COPY_SCALAR_FIELD(ph_width);
return newnode;

View File

@ -838,6 +838,7 @@ _equalPlaceHolderInfo(PlaceHolderInfo *a, PlaceHolderInfo *b)
COMPARE_NODE_FIELD(ph_var);
COMPARE_BITMAPSET_FIELD(ph_eval_at);
COMPARE_BITMAPSET_FIELD(ph_needed);
COMPARE_BITMAPSET_FIELD(ph_may_need);
COMPARE_SCALAR_FIELD(ph_width);
return true;

View File

@ -1768,6 +1768,7 @@ _outPlaceHolderInfo(StringInfo str, PlaceHolderInfo *node)
WRITE_NODE_FIELD(ph_var);
WRITE_BITMAPSET_FIELD(ph_eval_at);
WRITE_BITMAPSET_FIELD(ph_needed);
WRITE_BITMAPSET_FIELD(ph_may_need);
WRITE_INT_FIELD(ph_width);
}

View File

@ -396,6 +396,8 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
phinfo->ph_eval_at = bms_add_member(phinfo->ph_eval_at, relid);
phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
/* ph_may_need probably isn't used after this, but fix it anyway */
phinfo->ph_may_need = bms_del_member(phinfo->ph_may_need, relid);
}
/*

View File

@ -187,6 +187,13 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars, Relids where_needed)
phinfo->ph_needed = bms_add_members(phinfo->ph_needed,
where_needed);
/*
* Update ph_may_need too. This is currently only necessary
* when being called from build_base_rel_tlists, but we may as
* well do it always.
*/
phinfo->ph_may_need = bms_add_members(phinfo->ph_may_need,
where_needed);
}
else
elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
@ -465,7 +472,11 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
/* Now we can add the SpecialJoinInfo to join_info_list */
if (sjinfo)
{
root->join_info_list = lappend(root->join_info_list, sjinfo);
/* Each time we do that, recheck placeholder eval levels */
update_placeholder_eval_levels(root, sjinfo);
}
/*
* Finally, compute the output joinlist. We fold subproblems together
@ -687,6 +698,32 @@ make_outerjoininfo(PlannerInfo *root,
}
}
/*
* Examine PlaceHolderVars. If a PHV is supposed to be evaluated within
* this join's nullable side, and it may get used above this join, then
* ensure that min_righthand contains the full eval_at set of the PHV.
* This ensures that the PHV actually can be evaluated within the RHS.
* Note that this works only because we should already have determined
* the final eval_at level for any PHV syntactically within this join.
*/
foreach(l, root->placeholder_list)
{
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
Relids ph_syn_level = phinfo->ph_var->phrels;
/* Ignore placeholder if it didn't syntactically come from RHS */
if (!bms_is_subset(ph_syn_level, right_rels))
continue;
/* We can also ignore it if it's certainly not used above this join */
/* XXX this test is probably overly conservative */
if (bms_is_subset(phinfo->ph_may_need, min_righthand))
continue;
/* Else, prevent join from being formed before we eval the PHV */
min_righthand = bms_add_members(min_righthand, phinfo->ph_eval_at);
}
/*
* If we found nothing to put in min_lefthand, punt and make it the full
* LHS, to avoid having an empty min_lefthand which will confuse later

View File

@ -180,14 +180,19 @@ query_planner(PlannerInfo *root, List *tlist,
add_base_rels_to_query(root, (Node *) parse->jointree);
/*
* Examine the targetlist and qualifications, adding entries to baserel
* targetlists for all referenced Vars. Restrict and join clauses are
* added to appropriate lists belonging to the mentioned relations. We
* also build EquivalenceClasses for provably equivalent expressions, and
* form a target joinlist for make_one_rel() to work from.
* Examine the targetlist and join tree, adding entries to baserel
* targetlists for all referenced Vars, and generating PlaceHolderInfo
* entries for all referenced PlaceHolderVars. Restrict and join clauses
* are added to appropriate lists belonging to the mentioned relations.
* We also build EquivalenceClasses for provably equivalent expressions.
* The SpecialJoinInfo list is also built to hold information about join
* order restrictions. Finally, we form a target joinlist for
* make_one_rel() to work from.
*/
build_base_rel_tlists(root, tlist);
find_placeholders_in_jointree(root);
joinlist = deconstruct_jointree(root);
/*
@ -218,10 +223,12 @@ query_planner(PlannerInfo *root, List *tlist,
/*
* Examine any "placeholder" expressions generated during subquery pullup.
* Make sure that we know what level to evaluate them at, and that the
* Vars they need are marked as needed.
* Make sure that the Vars they need are marked as needed at the relevant
* join level. This must be done before join removal because it might
* cause Vars or placeholders to be needed above a join when they weren't
* so marked before.
*/
fix_placeholder_eval_levels(root);
fix_placeholder_input_needed_levels(root);
/*
* Remove any useless outer joins. Ideally this would be done during

View File

@ -22,6 +22,11 @@
#include "optimizer/var.h"
#include "utils/lsyscache.h"
/* Local functions */
static Relids find_placeholders_recurse(PlannerInfo *root, Node *jtnode);
static void find_placeholders_in_qual(PlannerInfo *root, Node *qual,
Relids relids);
/*
* make_placeholder_expr
@ -47,6 +52,12 @@ make_placeholder_expr(PlannerInfo *root, Expr *expr, Relids phrels)
* find_placeholder_info
* Fetch the PlaceHolderInfo for the given PHV; create it if not found
*
* This is separate from make_placeholder_expr because subquery pullup has
* to make PlaceHolderVars for expressions that might not be used at all in
* the upper query, or might not remain after const-expression simplification.
* We build PlaceHolderInfos only for PHVs that are still present in the
* simplified query passed to query_planner().
*
* Note: this should only be called after query_planner() has started.
*/
PlaceHolderInfo *
@ -71,8 +82,9 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv)
phinfo->phid = phv->phid;
phinfo->ph_var = copyObject(phv);
phinfo->ph_eval_at = pull_varnos((Node *) phv);
/* ph_eval_at may change later, see fix_placeholder_eval_levels */
/* ph_eval_at may change later, see update_placeholder_eval_levels */
phinfo->ph_needed = NULL; /* initially it's unused */
phinfo->ph_may_need = NULL;
/* for the moment, estimate width using just the datatype info */
phinfo->ph_width = get_typavgwidth(exprType((Node *) phv->phexpr),
exprTypmod((Node *) phv->phexpr));
@ -83,21 +95,168 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv)
}
/*
* fix_placeholder_eval_levels
* find_placeholders_in_jointree
* Search the jointree for PlaceHolderVars, and build PlaceHolderInfos
*
* We don't need to look at the targetlist because build_base_rel_tlists()
* will already have made entries for any PHVs in the tlist.
*/
void
find_placeholders_in_jointree(PlannerInfo *root)
{
/* We need do nothing if the query contains no PlaceHolderVars */
if (root->glob->lastPHId != 0)
{
/* Start recursion at top of jointree */
Assert(root->parse->jointree != NULL &&
IsA(root->parse->jointree, FromExpr));
(void) find_placeholders_recurse(root, (Node *) root->parse->jointree);
}
}
/*
* find_placeholders_recurse
* One recursion level of find_placeholders_in_jointree.
*
* jtnode is the current jointree node to examine.
*
* The result is the set of base Relids contained in or below jtnode.
* This is just an internal convenience, it's not used at the top level.
*/
static Relids
find_placeholders_recurse(PlannerInfo *root, Node *jtnode)
{
Relids jtrelids;
if (jtnode == NULL)
return NULL;
if (IsA(jtnode, RangeTblRef))
{
int varno = ((RangeTblRef *) jtnode)->rtindex;
/* No quals to deal with, just return correct result */
jtrelids = bms_make_singleton(varno);
}
else if (IsA(jtnode, FromExpr))
{
FromExpr *f = (FromExpr *) jtnode;
ListCell *l;
/*
* First, recurse to handle child joins, and form their relid set.
*/
jtrelids = NULL;
foreach(l, f->fromlist)
{
Relids sub_relids;
sub_relids = find_placeholders_recurse(root, lfirst(l));
jtrelids = bms_join(jtrelids, sub_relids);
}
/*
* Now process the top-level quals.
*/
find_placeholders_in_qual(root, f->quals, jtrelids);
}
else if (IsA(jtnode, JoinExpr))
{
JoinExpr *j = (JoinExpr *) jtnode;
Relids leftids,
rightids;
/*
* First, recurse to handle child joins, and form their relid set.
*/
leftids = find_placeholders_recurse(root, j->larg);
rightids = find_placeholders_recurse(root, j->rarg);
jtrelids = bms_join(leftids, rightids);
/* Process the qual clauses */
find_placeholders_in_qual(root, j->quals, jtrelids);
}
else
{
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(jtnode));
jtrelids = NULL; /* keep compiler quiet */
}
return jtrelids;
}
/*
* find_placeholders_in_qual
* Process a qual clause for find_placeholders_in_jointree.
*
* relids is the syntactic join level to mark as the "maybe needed" level
* for each PlaceHolderVar found in the qual clause.
*/
static void
find_placeholders_in_qual(PlannerInfo *root, Node *qual, Relids relids)
{
List *vars;
ListCell *vl;
/*
* pull_var_clause does more than we need here, but it'll do and it's
* convenient to use.
*/
vars = pull_var_clause(qual, PVC_INCLUDE_PLACEHOLDERS);
foreach(vl, vars)
{
PlaceHolderVar *phv = (PlaceHolderVar *) lfirst(vl);
PlaceHolderInfo *phinfo;
/* Ignore any plain Vars */
if (!IsA(phv, PlaceHolderVar))
continue;
/* Create a PlaceHolderInfo entry if there's not one already */
phinfo = find_placeholder_info(root, phv);
/* Mark the PHV as possibly needed at the qual's syntactic level */
phinfo->ph_may_need = bms_add_members(phinfo->ph_may_need, relids);
/*
* This is a bit tricky: the PHV's contained expression may contain
* other, lower-level PHVs. We need to get those into the
* PlaceHolderInfo list, but they aren't going to be needed where the
* outer PHV is referenced. Rather, they'll be needed where the outer
* PHV is evaluated. We can estimate that (conservatively) as the
* syntactic location of the PHV's expression. Recurse to take care
* of any such PHVs.
*/
find_placeholders_in_qual(root, (Node *) phv->phexpr, phv->phrels);
}
list_free(vars);
}
/*
* update_placeholder_eval_levels
* Adjust the target evaluation levels for placeholders
*
* The initial eval_at level set by find_placeholder_info was the set of
* rels used in the placeholder's expression (or the whole subselect if
* the expr is variable-free). If the subselect contains any outer joins
* that can null any of those rels, we must delay evaluation to above those
* joins.
* rels used in the placeholder's expression (or the whole subselect below
* the placeholder's syntactic location, if the expr is variable-free).
* If the subselect contains any outer joins that can null any of those rels,
* we must delay evaluation to above those joins.
*
* We repeat this operation each time we add another outer join to
* root->join_info_list. It's somewhat annoying to have to do that, but
* since we don't have very much information on the placeholders' locations,
* it's hard to avoid. Each placeholder's eval_at level must be correct
* by the time it starts to figure in outer-join delay decisions for higher
* outer joins.
*
* In future we might want to put additional policy/heuristics here to
* try to determine an optimal evaluation level. The current rules will
* result in evaluation at the lowest possible level.
* result in evaluation at the lowest possible level. However, pushing a
* placeholder eval up the tree is likely to further constrain evaluation
* order for outer joins, so it could easily be counterproductive; and we
* don't have enough information at this point to make an intelligent choice.
*/
void
fix_placeholder_eval_levels(PlannerInfo *root)
update_placeholder_eval_levels(PlannerInfo *root, SpecialJoinInfo *new_sjinfo)
{
ListCell *lc1;
@ -105,16 +264,27 @@ fix_placeholder_eval_levels(PlannerInfo *root)
{
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc1);
Relids syn_level = phinfo->ph_var->phrels;
Relids eval_at = phinfo->ph_eval_at;
Relids eval_at;
bool found_some;
ListCell *lc2;
/*
* We don't need to do any work on this placeholder unless the
* newly-added outer join is syntactically beneath its location.
*/
if (!bms_is_subset(new_sjinfo->syn_lefthand, syn_level) ||
!bms_is_subset(new_sjinfo->syn_righthand, syn_level))
continue;
/*
* Check for delays due to lower outer joins. This is the same logic
* as in check_outerjoin_delay in initsplan.c, except that we don't
* want to modify the delay_upper_joins flags; that was all handled
* already during distribute_qual_to_rels.
* have anything to do with the delay_upper_joins flags; delay of
* upper outer joins will be handled later, based on the eval_at
* values we compute now.
*/
eval_at = phinfo->ph_eval_at;
do
{
found_some = false;
@ -122,7 +292,7 @@ fix_placeholder_eval_levels(PlannerInfo *root)
{
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc2);
/* disregard joins not within the expr's sub-select */
/* disregard joins not within the PHV's sub-select */
if (!bms_is_subset(sjinfo->syn_lefthand, syn_level) ||
!bms_is_subset(sjinfo->syn_righthand, syn_level))
continue;
@ -149,16 +319,32 @@ fix_placeholder_eval_levels(PlannerInfo *root)
} while (found_some);
phinfo->ph_eval_at = eval_at;
}
}
/*
* Now that we know where to evaluate the placeholder, make sure that
* any vars or placeholders it uses will be available at that join
* level. NOTE: this could cause more PlaceHolderInfos to be added to
* placeholder_list. That is okay because we'll process them before
* falling out of the foreach loop. Also, it could cause the
* ph_needed sets of existing list entries to expand, which is also
* okay because this loop doesn't examine those.
*/
/*
* fix_placeholder_input_needed_levels
* Adjust the "needed at" levels for placeholder inputs
*
* This is called after we've finished determining the eval_at levels for
* all placeholders. We need to make sure that all vars and placeholders
* needed to evaluate each placeholder will be available at the join level
* where the evaluation will be done. Note that this loop can have
* side-effects on the ph_needed sets of other PlaceHolderInfos; that's okay
* because we don't examine ph_needed here, so there are no ordering issues
* to worry about.
*/
void
fix_placeholder_input_needed_levels(PlannerInfo *root)
{
ListCell *lc;
foreach(lc, root->placeholder_list)
{
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
Relids eval_at = phinfo->ph_eval_at;
/* No work unless it'll be evaluated above baserel level */
if (bms_membership(eval_at) == BMS_MULTIPLE)
{
List *vars = pull_var_clause((Node *) phinfo->ph_var->phexpr,
@ -175,11 +361,11 @@ fix_placeholder_eval_levels(PlannerInfo *root)
* Add any required PlaceHolderVars to base rels' targetlists.
*
* If any placeholder can be computed at a base rel and is needed above it,
* add it to that rel's targetlist. We have to do this separately from
* fix_placeholder_eval_levels() because join removal happens in between,
* and can change the ph_eval_at sets. There is essentially the same logic
* in add_placeholders_to_joinrel, but we can't do that part until joinrels
* are formed.
* add it to that rel's targetlist. This might look like it could be merged
* with fix_placeholder_input_needed_levels, but it must be separate because
* join removal happens in between, and can change the ph_eval_at sets. There
* is essentially the same logic in add_placeholders_to_joinrel, but we can't
* do that part until joinrels are formed.
*/
void
add_placeholders_to_base_rels(PlannerInfo *root)

View File

@ -1316,8 +1316,22 @@ typedef struct AppendRelInfo
* then allow it to bubble up like a Var until the ph_needed join level.
* ph_needed has the same definition as attr_needed for a regular Var.
*
* ph_may_need is an initial estimate of ph_needed, formed using the
* syntactic locations of references to the PHV. We need this in order to
* determine whether the PHV reference forces a join ordering constraint:
* if the PHV has to be evaluated below the nullable side of an outer join,
* and then used above that outer join, we must constrain join order to ensure
* there's a valid place to evaluate the PHV below the join. The final
* actual ph_needed level might be lower than ph_may_need, but we can't
* determine that until later on. Fortunately this doesn't matter for what
* we need ph_may_need for: if there's a PHV reference syntactically
* above the outer join, it's not going to be allowed to drop below the outer
* join, so we would come to the same conclusions about join order even if
* we had the final ph_needed value to compare to.
*
* We create a PlaceHolderInfo only after determining that the PlaceHolderVar
* is actually referenced in the plan tree.
* is actually referenced in the plan tree, so that unreferenced placeholders
* don't result in unnecessary constraints on join order.
*/
typedef struct PlaceHolderInfo
@ -1328,6 +1342,7 @@ typedef struct PlaceHolderInfo
PlaceHolderVar *ph_var; /* copy of PlaceHolderVar tree */
Relids ph_eval_at; /* lowest level we can evaluate value at */
Relids ph_needed; /* highest level the value is needed at */
Relids ph_may_need; /* highest level it might be needed at */
int32 ph_width; /* estimated attribute width */
} PlaceHolderInfo;

View File

@ -21,7 +21,10 @@ extern PlaceHolderVar *make_placeholder_expr(PlannerInfo *root, Expr *expr,
Relids phrels);
extern PlaceHolderInfo *find_placeholder_info(PlannerInfo *root,
PlaceHolderVar *phv);
extern void fix_placeholder_eval_levels(PlannerInfo *root);
extern void find_placeholders_in_jointree(PlannerInfo *root);
extern void update_placeholder_eval_levels(PlannerInfo *root,
SpecialJoinInfo *new_sjinfo);
extern void fix_placeholder_input_needed_levels(PlannerInfo *root);
extern void add_placeholders_to_base_rels(PlannerInfo *root);
extern void add_placeholders_to_joinrel(PlannerInfo *root,
RelOptInfo *joinrel);

View File

@ -2443,6 +2443,51 @@ group by t1.q2 order by 1;
4567890123456789 | 6
(4 rows)
--
-- test incorrect failure to NULL pulled-up subexpressions
--
begin;
create temp table a (
code char not null,
constraint a_pk primary key (code)
);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "a_pk" for table "a"
create temp table b (
a char not null,
num integer not null,
constraint b_pk primary key (a, num)
);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "b_pk" for table "b"
create temp table c (
name char not null,
a char,
constraint c_pk primary key (name)
);
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "c_pk" for table "c"
insert into a (code) values ('p');
insert into a (code) values ('q');
insert into b (a, num) values ('p', 1);
insert into b (a, num) values ('p', 2);
insert into c (name, a) values ('A', 'p');
insert into c (name, a) values ('B', 'q');
insert into c (name, a) values ('C', null);
select c.name, ss.code, ss.b_cnt, ss.const
from c left join
(select a.code, coalesce(b_grp.cnt, 0) as b_cnt, -1 as const
from a left join
(select count(1) as cnt, b.a from b group by b.a) as b_grp
on a.code = b_grp.a
) as ss
on (c.a = ss.code)
order by c.name;
name | code | b_cnt | const
------+------+-------+-------
A | p | 2 | -1
B | q | 0 | -1
C | | |
(3 rows)
rollback;
--
-- test the corner cases FULL JOIN ON TRUE and FULL JOIN ON FALSE
--

View File

@ -562,6 +562,46 @@ from int8_tbl t1 left join
on (t1.q2 = t2.q1)
group by t1.q2 order by 1;
--
-- test incorrect failure to NULL pulled-up subexpressions
--
begin;
create temp table a (
code char not null,
constraint a_pk primary key (code)
);
create temp table b (
a char not null,
num integer not null,
constraint b_pk primary key (a, num)
);
create temp table c (
name char not null,
a char,
constraint c_pk primary key (name)
);
insert into a (code) values ('p');
insert into a (code) values ('q');
insert into b (a, num) values ('p', 1);
insert into b (a, num) values ('p', 2);
insert into c (name, a) values ('A', 'p');
insert into c (name, a) values ('B', 'q');
insert into c (name, a) values ('C', null);
select c.name, ss.code, ss.b_cnt, ss.const
from c left join
(select a.code, coalesce(b_grp.cnt, 0) as b_cnt, -1 as const
from a left join
(select count(1) as cnt, b.a from b group by b.a) as b_grp
on a.code = b_grp.a
) as ss
on (c.a = ss.code)
order by c.name;
rollback;
--
-- test the corner cases FULL JOIN ON TRUE and FULL JOIN ON FALSE
--