Refactor pattern_fixed_prefix() to avoid dealing in incomplete patterns.

Previously, pattern_fixed_prefix() was defined to return whatever fixed
prefix it could extract from the pattern, plus the "rest" of the pattern.
That definition was sensible for LIKE patterns, but not so much for
regexes, where reconstituting a valid pattern minus the prefix could be
quite tricky (certainly the existing code wasn't doing that correctly).
Since the only thing that callers ever did with the "rest" of the pattern
was to pass it to like_selectivity() or regex_selectivity(), let's cut out
the middle-man and just have pattern_fixed_prefix's subroutines do this
directly.  Then pattern_fixed_prefix can return a simple selectivity
number, and the question of how to cope with partial patterns is removed
from its API specification.

While at it, adjust the API spec so that callers who don't actually care
about the pattern's selectivity (which is a lot of them) can pass NULL for
the selectivity pointer to skip doing the work of computing a selectivity
estimate.

This patch is only an API refactoring that doesn't actually change any
processing, other than allowing a little bit of useless work to be skipped.
However, it's necessary infrastructure for my upcoming fix to regex prefix
extraction, because after that change there won't be any simple way to
identify the "rest" of the regex, not even to the low level of fidelity
needed by regex_selectivity.  We can cope with that if regex_fixed_prefix
and regex_selectivity communicate directly, but not if we have to work
within the old API.  Hence, back-patch to all active branches.
This commit is contained in:
Tom Lane 2012-07-09 23:22:55 -04:00
parent e7ef6d7e24
commit 00dac6000d
3 changed files with 60 additions and 128 deletions

View File

@ -2785,7 +2785,6 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
Oid expr_coll;
Const *patt;
Const *prefix = NULL;
Const *rest = NULL;
Pattern_Prefix_Status pstatus = Pattern_Prefix_None;
/*
@ -2814,13 +2813,13 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
case OID_NAME_LIKE_OP:
/* the right-hand const is type text for all of these */
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll,
&prefix, &rest);
&prefix, NULL);
isIndexable = (pstatus != Pattern_Prefix_None);
break;
case OID_BYTEA_LIKE_OP:
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll,
&prefix, &rest);
&prefix, NULL);
isIndexable = (pstatus != Pattern_Prefix_None);
break;
@ -2829,7 +2828,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
case OID_NAME_ICLIKE_OP:
/* the right-hand const is type text for all of these */
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, expr_coll,
&prefix, &rest);
&prefix, NULL);
isIndexable = (pstatus != Pattern_Prefix_None);
break;
@ -2838,7 +2837,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
case OID_NAME_REGEXEQ_OP:
/* the right-hand const is type text for all of these */
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, expr_coll,
&prefix, &rest);
&prefix, NULL);
isIndexable = (pstatus != Pattern_Prefix_None);
break;
@ -2847,7 +2846,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation,
case OID_NAME_ICREGEXEQ_OP:
/* the right-hand const is type text for all of these */
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, expr_coll,
&prefix, &rest);
&prefix, NULL);
isIndexable = (pstatus != Pattern_Prefix_None);
break;
@ -3115,7 +3114,6 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
Oid expr_coll = ((OpExpr *) clause)->inputcollid;
Const *patt = (Const *) rightop;
Const *prefix = NULL;
Const *rest = NULL;
Pattern_Prefix_Status pstatus;
/*
@ -3135,7 +3133,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
if (!op_in_opfamily(expr_op, opfamily))
{
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll,
&prefix, &rest);
&prefix, NULL);
return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
}
break;
@ -3147,7 +3145,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
{
/* the right-hand const is type text for all of these */
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, expr_coll,
&prefix, &rest);
&prefix, NULL);
return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
}
break;
@ -3159,7 +3157,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
{
/* the right-hand const is type text for all of these */
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, expr_coll,
&prefix, &rest);
&prefix, NULL);
return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
}
break;
@ -3171,7 +3169,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation)
{
/* the right-hand const is type text for all of these */
pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, expr_coll,
&prefix, &rest);
&prefix, NULL);
return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus);
}
break;

View File

@ -192,7 +192,10 @@ static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
static Selectivity prefix_selectivity(PlannerInfo *root,
VariableStatData *vardata,
Oid vartype, Oid opfamily, Const *prefixcon);
static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);
static Selectivity like_selectivity(const char *patt, int pattlen,
bool case_insensitive);
static Selectivity regex_selectivity(const char *patt, int pattlen,
bool case_insensitive);
static Datum string_to_datum(const char *str, Oid datatype);
static Const *string_to_const(const char *str, Oid datatype);
static Const *string_to_bytea_const(const char *str, size_t str_len);
@ -1115,9 +1118,9 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Oid vartype;
Oid opfamily;
Pattern_Prefix_Status pstatus;
Const *patt = NULL;
Const *patt;
Const *prefix = NULL;
Const *rest = NULL;
Selectivity rest_selec = 0;
double result;
/*
@ -1207,8 +1210,9 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
}
/*
* Divide pattern into fixed prefix and remainder. Unlike many of the
* other functions in this file, we use the pattern operator's actual
* Pull out any fixed prefix implied by the pattern, and estimate the
* fractional selectivity of the remainder of the pattern. Unlike many of
* the other functions in this file, we use the pattern operator's actual
* collation for this step. This is not because we expect the collation
* to make a big difference in the selectivity estimate (it seldom would),
* but because we want to be sure we cache compiled regexps under the
@ -1216,11 +1220,10 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
*/
patt = (Const *) other;
pstatus = pattern_fixed_prefix(patt, ptype, collation,
&prefix, &rest);
&prefix, &rest_selec);
/*
* If necessary, coerce the prefix constant to the right type. (The "rest"
* constant need not be changed.)
* If necessary, coerce the prefix constant to the right type.
*/
if (prefix && prefix->consttype != vartype)
{
@ -1294,15 +1297,13 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
{
Selectivity heursel;
Selectivity prefixsel;
Selectivity restsel;
if (pstatus == Pattern_Prefix_Partial)
prefixsel = prefix_selectivity(root, &vardata, vartype,
opfamily, prefix);
else
prefixsel = 1.0;
restsel = pattern_selectivity(rest, ptype);
heursel = prefixsel * restsel;
heursel = prefixsel * rest_selec;
if (selec < 0) /* fewer than 10 histogram entries? */
selec = heursel;
@ -5133,9 +5134,9 @@ pattern_char_isalpha(char c, bool is_multibyte,
*
* *prefix is set to a palloc'd prefix string (in the form of a Const node),
* or to NULL if no fixed prefix exists for the pattern.
* *rest is set to a palloc'd Const representing the remainder of the pattern
* after the portion describing the fixed prefix.
* Each of these has the same type (TEXT or BYTEA) as the given pattern Const.
* If rest_selec is not NULL, *rest_selec is set to an estimate of the
* selectivity of the remainder of the pattern (without any fixed prefix).
* The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
*
* The return value distinguishes no fixed prefix, a partial prefix,
* or an exact-match-only pattern.
@ -5143,12 +5144,11 @@ pattern_char_isalpha(char c, bool is_multibyte,
static Pattern_Prefix_Status
like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
Const **prefix_const, Const **rest_const)
Const **prefix_const, Selectivity *rest_selec)
{
char *match;
char *patt;
int pattlen;
char *rest;
Oid typeid = patt_const->consttype;
int pos,
match_pos;
@ -5228,18 +5228,15 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
}
match[match_pos] = '\0';
rest = &patt[pos];
if (typeid != BYTEAOID)
{
*prefix_const = string_to_const(match, typeid);
*rest_const = string_to_const(rest, typeid);
}
else
{
*prefix_const = string_to_bytea_const(match, match_pos);
*rest_const = string_to_bytea_const(rest, pattlen - pos);
}
if (rest_selec != NULL)
*rest_selec = like_selectivity(&patt[pos], pattlen - pos,
case_insensitive);
pfree(patt);
pfree(match);
@ -5256,7 +5253,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
static Pattern_Prefix_Status
regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
Const **prefix_const, Const **rest_const)
Const **prefix_const, Selectivity *rest_selec)
{
char *match;
int pos,
@ -5318,10 +5315,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
/* Pattern must be anchored left */
if (patt[pos] != '^')
{
rest = patt;
*prefix_const = NULL;
*rest_const = string_to_const(rest, typeid);
if (rest_selec != NULL)
*rest_selec = regex_selectivity(patt, strlen(patt),
case_insensitive);
return Pattern_Prefix_None;
}
@ -5335,10 +5333,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
*/
if (strchr(patt + pos, '|') != NULL)
{
rest = patt;
*prefix_const = NULL;
*rest_const = string_to_const(rest, typeid);
if (rest_selec != NULL)
*rest_selec = regex_selectivity(patt, strlen(patt),
case_insensitive);
return Pattern_Prefix_None;
}
@ -5434,10 +5433,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
if (patt[pos] == '$' && patt[pos + 1] == '\0')
{
rest = &patt[pos + 1];
*prefix_const = string_to_const(match, typeid);
*rest_const = string_to_const(rest, typeid);
if (rest_selec != NULL)
*rest_selec = 1.0;
pfree(patt);
pfree(match);
@ -5446,7 +5445,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
}
*prefix_const = string_to_const(match, typeid);
*rest_const = string_to_const(rest, typeid);
if (rest_selec != NULL)
*rest_selec = regex_selectivity(rest, strlen(rest),
case_insensitive);
pfree(patt);
pfree(match);
@ -5459,23 +5461,27 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
Pattern_Prefix_Status
pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
Const **prefix, Const **rest)
Const **prefix, Selectivity *rest_selec)
{
Pattern_Prefix_Status result;
switch (ptype)
{
case Pattern_Type_Like:
result = like_fixed_prefix(patt, false, collation, prefix, rest);
result = like_fixed_prefix(patt, false, collation,
prefix, rest_selec);
break;
case Pattern_Type_Like_IC:
result = like_fixed_prefix(patt, true, collation, prefix, rest);
result = like_fixed_prefix(patt, true, collation,
prefix, rest_selec);
break;
case Pattern_Type_Regex:
result = regex_fixed_prefix(patt, false, collation, prefix, rest);
result = regex_fixed_prefix(patt, false, collation,
prefix, rest_selec);
break;
case Pattern_Type_Regex_IC:
result = regex_fixed_prefix(patt, true, collation, prefix, rest);
result = regex_fixed_prefix(patt, true, collation,
prefix, rest_selec);
break;
default:
elog(ERROR, "unrecognized ptype: %d", (int) ptype);
@ -5590,7 +5596,8 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
/*
* Estimate the selectivity of a pattern of the specified type.
* Note that any fixed prefix of the pattern will have been removed already.
* Note that any fixed prefix of the pattern will have been removed already,
* so actually we may be looking at just a fragment of the pattern.
*
* For now, we use a very simplistic approach: fixed characters reduce the
* selectivity a good deal, character ranges reduce it a little,
@ -5604,37 +5611,10 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
#define PARTIAL_WILDCARD_SEL 2.0
static Selectivity
like_selectivity(Const *patt_const, bool case_insensitive)
like_selectivity(const char *patt, int pattlen, bool case_insensitive)
{
Selectivity sel = 1.0;
int pos;
Oid typeid = patt_const->consttype;
char *patt;
int pattlen;
/* the right-hand const is type text or bytea */
Assert(typeid == BYTEAOID || typeid == TEXTOID);
if (typeid == BYTEAOID && case_insensitive)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("case insensitive matching not supported on type bytea")));
if (typeid != BYTEAOID)
{
patt = TextDatumGetCString(patt_const->constvalue);
pattlen = strlen(patt);
}
else
{
bytea *bstr = DatumGetByteaP(patt_const->constvalue);
pattlen = VARSIZE(bstr) - VARHDRSZ;
patt = (char *) palloc(pattlen);
memcpy(patt, VARDATA(bstr), pattlen);
if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue))
pfree(bstr);
}
/* Skip any leading wildcard; it's already factored into initial sel */
for (pos = 0; pos < pattlen; pos++)
@ -5664,13 +5644,11 @@ like_selectivity(Const *patt_const, bool case_insensitive)
/* Could get sel > 1 if multiple wildcards */
if (sel > 1.0)
sel = 1.0;
pfree(patt);
return sel;
}
static Selectivity
regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
{
Selectivity sel = 1.0;
int paren_depth = 0;
@ -5763,26 +5741,9 @@ regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive)
}
static Selectivity
regex_selectivity(Const *patt_const, bool case_insensitive)
regex_selectivity(const char *patt, int pattlen, bool case_insensitive)
{
Selectivity sel;
char *patt;
int pattlen;
Oid typeid = patt_const->consttype;
/*
* Should be unnecessary, there are no bytea regex operators defined. As
* such, it should be noted that the rest of this function has *not* been
* made safe for binary (possibly NULL containing) strings.
*/
if (typeid == BYTEAOID)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("regular-expression matching not supported on type bytea")));
/* the right-hand const is type text for all of these */
patt = TextDatumGetCString(patt_const->constvalue);
pattlen = strlen(patt);
/* If patt doesn't end with $, consider it to have a trailing wildcard */
if (pattlen > 0 && patt[pattlen - 1] == '$' &&
@ -5802,33 +5763,6 @@ regex_selectivity(Const *patt_const, bool case_insensitive)
return sel;
}
static Selectivity
pattern_selectivity(Const *patt, Pattern_Type ptype)
{
Selectivity result;
switch (ptype)
{
case Pattern_Type_Like:
result = like_selectivity(patt, false);
break;
case Pattern_Type_Like_IC:
result = like_selectivity(patt, true);
break;
case Pattern_Type_Regex:
result = regex_selectivity(patt, false);
break;
case Pattern_Type_Regex_IC:
result = regex_selectivity(patt, true);
break;
default:
elog(ERROR, "unrecognized ptype: %d", (int) ptype);
result = 1.0; /* keep compiler quiet */
break;
}
return result;
}
/*
* For bytea, the increment function need only increment the current byte

View File

@ -134,7 +134,7 @@ extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
Pattern_Type ptype,
Oid collation,
Const **prefix,
Const **rest);
Selectivity *rest_selec);
extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
Oid collation);