Fix planner to pass correct collation to operator selectivity estimators.

We can do this without creating an API break for estimation functions
by passing the collation using the existing fmgr functionality for
passing an input collation as a hidden parameter.

The need for this was foreseen at the outset, but we didn't get around to
making it happen in 9.1 because of the decision to sort all pg_statistic
histograms according to the database's default collation.  That meant that
selectivity estimators generally need to use the default collation too,
even if they're estimating for an operator that will do something
different.  The reason it's suddenly become more interesting is that
regexp interpretation also uses a collation (for its LC_TYPE not LC_COLLATE
property), and we no longer want to use the wrong collation when examining
regexps during planning.  It's not that the selectivity estimate is likely
to change much from this; rather that we are thinking of caching compiled
regexps during planner estimation, and we won't get the intended benefit
if we cache them with a different collation than the executor will use.

Back-patch to 9.1, both because the regexp change is likely to get
back-patched and because we might as well get this right in all
collation-supporting branches, in case any third-party code wants to
rely on getting the collation.  The patch turns out to be minuscule
now that I've done it ...
This commit is contained in:
Tom Lane 2012-07-08 23:51:08 -04:00
parent c6aae3042b
commit e7ef6d7e24
4 changed files with 83 additions and 51 deletions

View File

@ -578,6 +578,7 @@ clause_selectivity(PlannerInfo *root,
list_make2(var,
makeBoolConst(true,
false)),
InvalidOid,
varRelid);
}
}
@ -649,13 +650,15 @@ clause_selectivity(PlannerInfo *root,
}
else if (is_opclause(clause) || IsA(clause, DistinctExpr))
{
Oid opno = ((OpExpr *) clause)->opno;
OpExpr *opclause = (OpExpr *) clause;
Oid opno = opclause->opno;
if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
{
/* Estimate selectivity for a join clause. */
s1 = join_selectivity(root, opno,
((OpExpr *) clause)->args,
opclause->args,
opclause->inputcollid,
jointype,
sjinfo);
}
@ -663,7 +666,8 @@ clause_selectivity(PlannerInfo *root,
{
/* Estimate selectivity for a restriction clause. */
s1 = restriction_selectivity(root, opno,
((OpExpr *) clause)->args,
opclause->args,
opclause->inputcollid,
varRelid);
}

View File

@ -1010,6 +1010,7 @@ Selectivity
restriction_selectivity(PlannerInfo *root,
Oid operatorid,
List *args,
Oid inputcollid,
int varRelid)
{
RegProcedure oprrest = get_oprrest(operatorid);
@ -1022,11 +1023,12 @@ restriction_selectivity(PlannerInfo *root,
if (!oprrest)
return (Selectivity) 0.5;
result = DatumGetFloat8(OidFunctionCall4(oprrest,
PointerGetDatum(root),
ObjectIdGetDatum(operatorid),
PointerGetDatum(args),
Int32GetDatum(varRelid)));
result = DatumGetFloat8(OidFunctionCall4Coll(oprrest,
inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operatorid),
PointerGetDatum(args),
Int32GetDatum(varRelid)));
if (result < 0.0 || result > 1.0)
elog(ERROR, "invalid restriction selectivity: %f", result);
@ -1045,6 +1047,7 @@ Selectivity
join_selectivity(PlannerInfo *root,
Oid operatorid,
List *args,
Oid inputcollid,
JoinType jointype,
SpecialJoinInfo *sjinfo)
{
@ -1058,12 +1061,13 @@ join_selectivity(PlannerInfo *root,
if (!oprjoin)
return (Selectivity) 0.5;
result = DatumGetFloat8(OidFunctionCall5(oprjoin,
PointerGetDatum(root),
ObjectIdGetDatum(operatorid),
PointerGetDatum(args),
Int16GetDatum(jointype),
PointerGetDatum(sjinfo)));
result = DatumGetFloat8(OidFunctionCall5Coll(oprjoin,
inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operatorid),
PointerGetDatum(args),
Int16GetDatum(jointype),
PointerGetDatum(sjinfo)));
if (result < 0.0 || result > 1.0)
elog(ERROR, "invalid join selectivity: %f", result);

View File

@ -83,6 +83,15 @@
* joins, however, the selectivity is defined as the fraction of the left-hand
* side relation's rows that are expected to have a match (ie, at least one
* row with a TRUE result) in the right-hand side.
*
* For both oprrest and oprjoin functions, the operator's input collation OID
* (if any) is passed using the standard fmgr mechanism, so that the estimator
* function can fetch it with PG_GET_COLLATION(). Note, however, that all
* statistics in pg_statistic are currently built using the database's default
* collation. Thus, in most cases where we are looking at statistics, we
* should ignore the actual operator collation and use DEFAULT_COLLATION_OID.
* We expect that the error induced by doing this is usually not large enough
* to justify complicating matters.
*----------
*/
@ -1097,6 +1106,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
Oid collation = PG_GET_COLLATION();
VariableStatData vardata;
Node *other;
bool varonleft;
@ -1197,12 +1207,15 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
}
/*
* Divide pattern into fixed prefix and remainder. XXX we have to assume
* default collation here, because we don't have access to the actual
* input collation for the operator. FIXME ...
* Divide pattern into fixed prefix and remainder. Unlike many of the
* other functions in this file, we use the pattern operator's actual
* collation for this step. This is not because we expect the collation
* to make a big difference in the selectivity estimate (it seldom would),
* but because we want to be sure we cache compiled regexps under the
* right cache key, so that they can be re-used at runtime.
*/
patt = (Const *) other;
pstatus = pattern_fixed_prefix(patt, ptype, DEFAULT_COLLATION_OID,
pstatus = pattern_fixed_prefix(patt, ptype, collation,
&prefix, &rest);
/*
@ -1847,18 +1860,20 @@ scalararraysel(PlannerInfo *root,
elem_nulls[i],
elmbyval));
if (is_join_clause)
s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int16GetDatum(jointype),
PointerGetDatum(sjinfo)));
s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
clause->inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int16GetDatum(jointype),
PointerGetDatum(sjinfo)));
else
s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int32GetDatum(varRelid)));
s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
clause->inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int32GetDatum(varRelid)));
if (useOr)
{
@ -1912,18 +1927,20 @@ scalararraysel(PlannerInfo *root,
*/
args = list_make2(leftop, elem);
if (is_join_clause)
s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int16GetDatum(jointype),
PointerGetDatum(sjinfo)));
s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
clause->inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int16GetDatum(jointype),
PointerGetDatum(sjinfo)));
else
s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int32GetDatum(varRelid)));
s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
clause->inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int32GetDatum(varRelid)));
if (useOr)
{
@ -1962,18 +1979,20 @@ scalararraysel(PlannerInfo *root,
dummyexpr->collation = clause->inputcollid;
args = list_make2(leftop, dummyexpr);
if (is_join_clause)
s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int16GetDatum(jointype),
PointerGetDatum(sjinfo)));
s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
clause->inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int16GetDatum(jointype),
PointerGetDatum(sjinfo)));
else
s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int32GetDatum(varRelid)));
s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
clause->inputcollid,
PointerGetDatum(root),
ObjectIdGetDatum(operator),
PointerGetDatum(args),
Int32GetDatum(varRelid)));
s1 = useOr ? 0.0 : 1.0;
/*
@ -2046,6 +2065,7 @@ rowcomparesel(PlannerInfo *root,
{
Selectivity s1;
Oid opno = linitial_oid(clause->opnos);
Oid inputcollid = linitial_oid(clause->inputcollids);
List *opargs;
bool is_join_clause;
@ -2086,6 +2106,7 @@ rowcomparesel(PlannerInfo *root,
/* Estimate selectivity for a join clause. */
s1 = join_selectivity(root, opno,
opargs,
inputcollid,
jointype,
sjinfo);
}
@ -2094,6 +2115,7 @@ rowcomparesel(PlannerInfo *root,
/* Estimate selectivity for a restriction clause. */
s1 = restriction_selectivity(root, opno,
opargs,
inputcollid,
varRelid);
}

View File

@ -43,11 +43,13 @@ extern bool has_unique_index(RelOptInfo *rel, AttrNumber attno);
extern Selectivity restriction_selectivity(PlannerInfo *root,
Oid operatorid,
List *args,
Oid inputcollid,
int varRelid);
extern Selectivity join_selectivity(PlannerInfo *root,
Oid operatorid,
List *args,
Oid inputcollid,
JoinType jointype,
SpecialJoinInfo *sjinfo);