Fix convert_IN_to_join to properly handle the case where the subselect's

output is not of the same type that's needed for the IN comparison (ie,
where the parser inserted an implicit coercion above the subselect result).
We should record the coerced expression, not just a raw Var referencing
the subselect output, as the quantity that needs to be unique-ified if
we choose to implement the IN as Unique followed by a plain join.

As of 8.3 this error was causing crashes, as seen in bug #4113 from Javier
Hernandez, because the executor was being told to hash or sort the raw
subselect output column using operators appropriate to the coerced type.

In prior versions there was no crash because the executor chose the
hash or sort operators for itself based on the column type it saw.
However, that's still not really right, because what's unique for one data
type might not be unique for another.  In corner cases we could get multiple
outputs of a row that should appear only once, as demonstrated by the
regression test case included in this commit.

However, this patch doesn't apply cleanly to 8.2 or before, and the code
involved has shifted enough over time that I'm hesitant to try to back-patch.
Given the lack of complaints from the field about such corner cases, I think
the bug may not be important enough to risk breaking other things with a
back-patch.
This commit is contained in:
Tom Lane 2008-04-21 20:54:15 +00:00
parent a31b03ba27
commit ff673f558a
5 changed files with 104 additions and 25 deletions

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.129 2008/01/17 20:35:27 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.130 2008/04/21 20:54:15 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -725,10 +725,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
Query *parse = root->parse;
Query *subselect = (Query *) sublink->subselect;
List *in_operators;
List *left_exprs;
List *right_exprs;
Relids left_varnos;
int rtindex;
RangeTblEntry *rte;
RangeTblRef *rtr;
List *subquery_vars;
InClauseInfo *ininfo;
Node *result;
@ -744,28 +747,37 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
return NULL;
if (sublink->testexpr && IsA(sublink->testexpr, OpExpr))
{
Oid opno = ((OpExpr *) sublink->testexpr)->opno;
OpExpr *op = (OpExpr *) sublink->testexpr;
Oid opno = op->opno;
List *opfamilies;
List *opstrats;
if (list_length(op->args) != 2)
return NULL; /* not binary operator? */
get_op_btree_interpretation(opno, &opfamilies, &opstrats);
if (!list_member_int(opstrats, ROWCOMPARE_EQ))
return NULL;
in_operators = list_make1_oid(opno);
left_exprs = list_make1(linitial(op->args));
right_exprs = list_make1(lsecond(op->args));
}
else if (and_clause(sublink->testexpr))
{
ListCell *lc;
/* OK, but we need to extract the per-column operator OIDs */
in_operators = NIL;
/* OK, but we need to extract the per-column info */
in_operators = left_exprs = right_exprs = NIL;
foreach(lc, ((BoolExpr *) sublink->testexpr)->args)
{
OpExpr *op = (OpExpr *) lfirst(lc);
if (!IsA(op, OpExpr)) /* probably shouldn't happen */
return NULL;
if (list_length(op->args) != 2)
return NULL; /* not binary operator? */
in_operators = lappend_oid(in_operators, op->opno);
left_exprs = lappend(left_exprs, linitial(op->args));
right_exprs = lappend(right_exprs, lsecond(op->args));
}
}
else
@ -782,10 +794,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
* The left-hand expressions must contain some Vars of the current query,
* else it's not gonna be a join.
*/
left_varnos = pull_varnos(sublink->testexpr);
left_varnos = pull_varnos((Node *) left_exprs);
if (bms_is_empty(left_varnos))
return NULL;
/* ... and the right-hand expressions better not contain Vars at all */
Assert(!contain_var_clause((Node *) right_exprs));
/*
* The combining operators and left-hand expressions mustn't be volatile.
*/
@ -810,6 +825,20 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
rtr->rtindex = rtindex;
parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr);
/*
* Build a list of Vars representing the subselect outputs.
*/
subquery_vars = generate_subquery_vars(root,
subselect->targetList,
rtindex);
/*
* Build the result qual expression, replacing Params with these Vars.
*/
result = convert_testexpr(root,
sublink->testexpr,
subquery_vars);
/*
* Now build the InClauseInfo node.
*/
@ -819,24 +848,20 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
ininfo->in_operators = in_operators;
/*
* ininfo->sub_targetlist is filled with a list of Vars representing the
* subselect outputs.
* ininfo->sub_targetlist must be filled with a list of expressions that
* would need to be unique-ified if we try to implement the IN using a
* regular join to unique-ified subquery output. This is most easily done
* by applying convert_testexpr to just the RHS inputs of the testexpr
* operators. That handles cases like type coercions of the subquery
* outputs, clauses dropped due to const-simplification, etc.
*/
ininfo->sub_targetlist = generate_subquery_vars(root,
subselect->targetList,
rtindex);
Assert(list_length(in_operators) == list_length(ininfo->sub_targetlist));
ininfo->sub_targetlist = (List *) convert_testexpr(root,
(Node *) right_exprs,
subquery_vars);
/* Add the completed node to the query's list */
root->in_info_list = lappend(root->in_info_list, ininfo);
/*
* Build the result qual expression.
*/
result = convert_testexpr(root,
sublink->testexpr,
ininfo->sub_targetlist);
return result;
}

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.142 2008/01/01 19:45:50 momjian Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.143 2008/04/21 20:54:15 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -898,7 +898,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
/*
* translate_sub_tlist - get subquery column numbers represented by tlist
*
* The given targetlist should contain only Vars referencing the given relid.
* The given targetlist usually contains only Vars referencing the given relid.
* Extract their varattnos (ie, the column numbers of the subquery) and return
* as an integer List.
*

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.155 2008/03/24 21:53:04 tgl Exp $
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.156 2008/04/21 20:54:15 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -1107,8 +1107,10 @@ typedef struct OuterJoinInfo
* We record information about each such IN clause in an InClauseInfo struct.
* These structs are kept in the PlannerInfo node's in_info_list.
*
* Note: sub_targetlist is just a list of Vars or expressions; it does not
* contain TargetEntry nodes.
* Note: sub_targetlist is a bit misnamed; it is a list of the expressions
* on the RHS of the IN's join clauses. (This normally starts out as a list
* of Vars referencing the subquery outputs, but can get mutated if the
* subquery is flattened into the main query.)
*/
typedef struct InClauseInfo
@ -1116,8 +1118,8 @@ typedef struct InClauseInfo
NodeTag type;
Relids lefthand; /* base relids in lefthand expressions */
Relids righthand; /* base relids coming from the subselect */
List *sub_targetlist; /* targetlist of original RHS subquery */
List *in_operators; /* OIDs of the IN's equality operator(s) */
List *sub_targetlist; /* RHS expressions of the IN's comparisons */
List *in_operators; /* OIDs of the IN's equality operators */
} InClauseInfo;
/*

View File

@ -408,3 +408,34 @@ select * from (
0
(1 row)
--
-- Test that an IN implemented using a UniquePath does unique-ification
-- with the right semantics, as per bug #4113. (Unfortunately we have
-- no simple way to ensure that this test case actually chooses that type
-- of plan, but it does in releases 7.4-8.3. Note that an ordering difference
-- here might mean that some other plan type is being used, rendering the test
-- pointless.)
--
create temp table numeric_table (num_col numeric);
insert into numeric_table values (1), (1.000000000000000000001), (2), (3);
create temp table float_table (float_col float8);
insert into float_table values (1), (2), (3);
select * from float_table
where float_col in (select num_col from numeric_table);
float_col
-----------
1
2
3
(3 rows)
select * from numeric_table
where num_col in (select float_col from float_table);
num_col
-------------------------
1
1.000000000000000000001
2
3
(4 rows)

View File

@ -251,3 +251,24 @@ select * from (
select min(unique1) from tenk1 as a
where not exists (select 1 from tenk1 as b where b.unique2 = 10000)
) ss;
--
-- Test that an IN implemented using a UniquePath does unique-ification
-- with the right semantics, as per bug #4113. (Unfortunately we have
-- no simple way to ensure that this test case actually chooses that type
-- of plan, but it does in releases 7.4-8.3. Note that an ordering difference
-- here might mean that some other plan type is being used, rendering the test
-- pointless.)
--
create temp table numeric_table (num_col numeric);
insert into numeric_table values (1), (1.000000000000000000001), (2), (3);
create temp table float_table (float_col float8);
insert into float_table values (1), (2), (3);
select * from float_table
where float_col in (select num_col from numeric_table);
select * from numeric_table
where num_col in (select float_col from float_table);