From 00dac6000d422033c3e8d191f01ee0e6525794c2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 9 Jul 2012 23:22:55 -0400 Subject: [PATCH] Refactor pattern_fixed_prefix() to avoid dealing in incomplete patterns. Previously, pattern_fixed_prefix() was defined to return whatever fixed prefix it could extract from the pattern, plus the "rest" of the pattern. That definition was sensible for LIKE patterns, but not so much for regexes, where reconstituting a valid pattern minus the prefix could be quite tricky (certainly the existing code wasn't doing that correctly). Since the only thing that callers ever did with the "rest" of the pattern was to pass it to like_selectivity() or regex_selectivity(), let's cut out the middle-man and just have pattern_fixed_prefix's subroutines do this directly. Then pattern_fixed_prefix can return a simple selectivity number, and the question of how to cope with partial patterns is removed from its API specification. While at it, adjust the API spec so that callers who don't actually care about the pattern's selectivity (which is a lot of them) can pass NULL for the selectivity pointer to skip doing the work of computing a selectivity estimate. This patch is only an API refactoring that doesn't actually change any processing, other than allowing a little bit of useless work to be skipped. However, it's necessary infrastructure for my upcoming fix to regex prefix extraction, because after that change there won't be any simple way to identify the "rest" of the regex, not even to the low level of fidelity needed by regex_selectivity. We can cope with that if regex_fixed_prefix and regex_selectivity communicate directly, but not if we have to work within the old API. Hence, back-patch to all active branches. --- src/backend/optimizer/path/indxpath.c | 20 ++-- src/backend/utils/adt/selfuncs.c | 166 ++++++++------------------ src/include/utils/selfuncs.h | 2 +- 3 files changed, 60 insertions(+), 128 deletions(-) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 2e8ccd0578..66b68fc71d 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -2785,7 +2785,6 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation, Oid expr_coll; Const *patt; Const *prefix = NULL; - Const *rest = NULL; Pattern_Prefix_Status pstatus = Pattern_Prefix_None; /* @@ -2814,13 +2813,13 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation, case OID_NAME_LIKE_OP: /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; case OID_BYTEA_LIKE_OP: pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; @@ -2829,7 +2828,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation, case OID_NAME_ICLIKE_OP: /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, expr_coll, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; @@ -2838,7 +2837,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation, case OID_NAME_REGEXEQ_OP: /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, expr_coll, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; @@ -2847,7 +2846,7 @@ match_special_index_operator(Expr *clause, Oid opfamily, Oid idxcollation, case OID_NAME_ICREGEXEQ_OP: /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, expr_coll, - &prefix, &rest); + &prefix, NULL); isIndexable = (pstatus != Pattern_Prefix_None); break; @@ -3115,7 +3114,6 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation) Oid expr_coll = ((OpExpr *) clause)->inputcollid; Const *patt = (Const *) rightop; Const *prefix = NULL; - Const *rest = NULL; Pattern_Prefix_Status pstatus; /* @@ -3135,7 +3133,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation) if (!op_in_opfamily(expr_op, opfamily)) { pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, expr_coll, - &prefix, &rest); + &prefix, NULL); return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus); } break; @@ -3147,7 +3145,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation) { /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, expr_coll, - &prefix, &rest); + &prefix, NULL); return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus); } break; @@ -3159,7 +3157,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation) { /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, expr_coll, - &prefix, &rest); + &prefix, NULL); return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus); } break; @@ -3171,7 +3169,7 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily, Oid idxcollation) { /* the right-hand const is type text for all of these */ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, expr_coll, - &prefix, &rest); + &prefix, NULL); return prefix_quals(leftop, opfamily, idxcollation, prefix, pstatus); } break; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index dc38034104..7eb64cba4b 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -192,7 +192,10 @@ static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, Oid vartype, Oid opfamily, Const *prefixcon); -static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype); +static Selectivity like_selectivity(const char *patt, int pattlen, + bool case_insensitive); +static Selectivity regex_selectivity(const char *patt, int pattlen, + bool case_insensitive); static Datum string_to_datum(const char *str, Oid datatype); static Const *string_to_const(const char *str, Oid datatype); static Const *string_to_bytea_const(const char *str, size_t str_len); @@ -1115,9 +1118,9 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) Oid vartype; Oid opfamily; Pattern_Prefix_Status pstatus; - Const *patt = NULL; + Const *patt; Const *prefix = NULL; - Const *rest = NULL; + Selectivity rest_selec = 0; double result; /* @@ -1207,8 +1210,9 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) } /* - * Divide pattern into fixed prefix and remainder. Unlike many of the - * other functions in this file, we use the pattern operator's actual + * Pull out any fixed prefix implied by the pattern, and estimate the + * fractional selectivity of the remainder of the pattern. Unlike many of + * the other functions in this file, we use the pattern operator's actual * collation for this step. This is not because we expect the collation * to make a big difference in the selectivity estimate (it seldom would), * but because we want to be sure we cache compiled regexps under the @@ -1216,11 +1220,10 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) */ patt = (Const *) other; pstatus = pattern_fixed_prefix(patt, ptype, collation, - &prefix, &rest); + &prefix, &rest_selec); /* - * If necessary, coerce the prefix constant to the right type. (The "rest" - * constant need not be changed.) + * If necessary, coerce the prefix constant to the right type. */ if (prefix && prefix->consttype != vartype) { @@ -1294,15 +1297,13 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate) { Selectivity heursel; Selectivity prefixsel; - Selectivity restsel; if (pstatus == Pattern_Prefix_Partial) prefixsel = prefix_selectivity(root, &vardata, vartype, opfamily, prefix); else prefixsel = 1.0; - restsel = pattern_selectivity(rest, ptype); - heursel = prefixsel * restsel; + heursel = prefixsel * rest_selec; if (selec < 0) /* fewer than 10 histogram entries? */ selec = heursel; @@ -5133,9 +5134,9 @@ pattern_char_isalpha(char c, bool is_multibyte, * * *prefix is set to a palloc'd prefix string (in the form of a Const node), * or to NULL if no fixed prefix exists for the pattern. - * *rest is set to a palloc'd Const representing the remainder of the pattern - * after the portion describing the fixed prefix. - * Each of these has the same type (TEXT or BYTEA) as the given pattern Const. + * If rest_selec is not NULL, *rest_selec is set to an estimate of the + * selectivity of the remainder of the pattern (without any fixed prefix). + * The prefix Const has the same type (TEXT or BYTEA) as the input pattern. * * The return value distinguishes no fixed prefix, a partial prefix, * or an exact-match-only pattern. @@ -5143,12 +5144,11 @@ pattern_char_isalpha(char c, bool is_multibyte, static Pattern_Prefix_Status like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, - Const **prefix_const, Const **rest_const) + Const **prefix_const, Selectivity *rest_selec) { char *match; char *patt; int pattlen; - char *rest; Oid typeid = patt_const->consttype; int pos, match_pos; @@ -5228,18 +5228,15 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, } match[match_pos] = '\0'; - rest = &patt[pos]; if (typeid != BYTEAOID) - { *prefix_const = string_to_const(match, typeid); - *rest_const = string_to_const(rest, typeid); - } else - { *prefix_const = string_to_bytea_const(match, match_pos); - *rest_const = string_to_bytea_const(rest, pattlen - pos); - } + + if (rest_selec != NULL) + *rest_selec = like_selectivity(&patt[pos], pattlen - pos, + case_insensitive); pfree(patt); pfree(match); @@ -5256,7 +5253,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, static Pattern_Prefix_Status regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, - Const **prefix_const, Const **rest_const) + Const **prefix_const, Selectivity *rest_selec) { char *match; int pos, @@ -5318,10 +5315,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, /* Pattern must be anchored left */ if (patt[pos] != '^') { - rest = patt; - *prefix_const = NULL; - *rest_const = string_to_const(rest, typeid); + + if (rest_selec != NULL) + *rest_selec = regex_selectivity(patt, strlen(patt), + case_insensitive); return Pattern_Prefix_None; } @@ -5335,10 +5333,11 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, */ if (strchr(patt + pos, '|') != NULL) { - rest = patt; - *prefix_const = NULL; - *rest_const = string_to_const(rest, typeid); + + if (rest_selec != NULL) + *rest_selec = regex_selectivity(patt, strlen(patt), + case_insensitive); return Pattern_Prefix_None; } @@ -5434,10 +5433,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, if (patt[pos] == '$' && patt[pos + 1] == '\0') { - rest = &patt[pos + 1]; - *prefix_const = string_to_const(match, typeid); - *rest_const = string_to_const(rest, typeid); + + if (rest_selec != NULL) + *rest_selec = 1.0; pfree(patt); pfree(match); @@ -5446,7 +5445,10 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, } *prefix_const = string_to_const(match, typeid); - *rest_const = string_to_const(rest, typeid); + + if (rest_selec != NULL) + *rest_selec = regex_selectivity(rest, strlen(rest), + case_insensitive); pfree(patt); pfree(match); @@ -5459,23 +5461,27 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, - Const **prefix, Const **rest) + Const **prefix, Selectivity *rest_selec) { Pattern_Prefix_Status result; switch (ptype) { case Pattern_Type_Like: - result = like_fixed_prefix(patt, false, collation, prefix, rest); + result = like_fixed_prefix(patt, false, collation, + prefix, rest_selec); break; case Pattern_Type_Like_IC: - result = like_fixed_prefix(patt, true, collation, prefix, rest); + result = like_fixed_prefix(patt, true, collation, + prefix, rest_selec); break; case Pattern_Type_Regex: - result = regex_fixed_prefix(patt, false, collation, prefix, rest); + result = regex_fixed_prefix(patt, false, collation, + prefix, rest_selec); break; case Pattern_Type_Regex_IC: - result = regex_fixed_prefix(patt, true, collation, prefix, rest); + result = regex_fixed_prefix(patt, true, collation, + prefix, rest_selec); break; default: elog(ERROR, "unrecognized ptype: %d", (int) ptype); @@ -5590,7 +5596,8 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, /* * Estimate the selectivity of a pattern of the specified type. - * Note that any fixed prefix of the pattern will have been removed already. + * Note that any fixed prefix of the pattern will have been removed already, + * so actually we may be looking at just a fragment of the pattern. * * For now, we use a very simplistic approach: fixed characters reduce the * selectivity a good deal, character ranges reduce it a little, @@ -5604,37 +5611,10 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, #define PARTIAL_WILDCARD_SEL 2.0 static Selectivity -like_selectivity(Const *patt_const, bool case_insensitive) +like_selectivity(const char *patt, int pattlen, bool case_insensitive) { Selectivity sel = 1.0; int pos; - Oid typeid = patt_const->consttype; - char *patt; - int pattlen; - - /* the right-hand const is type text or bytea */ - Assert(typeid == BYTEAOID || typeid == TEXTOID); - - if (typeid == BYTEAOID && case_insensitive) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("case insensitive matching not supported on type bytea"))); - - if (typeid != BYTEAOID) - { - patt = TextDatumGetCString(patt_const->constvalue); - pattlen = strlen(patt); - } - else - { - bytea *bstr = DatumGetByteaP(patt_const->constvalue); - - pattlen = VARSIZE(bstr) - VARHDRSZ; - patt = (char *) palloc(pattlen); - memcpy(patt, VARDATA(bstr), pattlen); - if ((Pointer) bstr != DatumGetPointer(patt_const->constvalue)) - pfree(bstr); - } /* Skip any leading wildcard; it's already factored into initial sel */ for (pos = 0; pos < pattlen; pos++) @@ -5664,13 +5644,11 @@ like_selectivity(Const *patt_const, bool case_insensitive) /* Could get sel > 1 if multiple wildcards */ if (sel > 1.0) sel = 1.0; - - pfree(patt); return sel; } static Selectivity -regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive) +regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive) { Selectivity sel = 1.0; int paren_depth = 0; @@ -5763,26 +5741,9 @@ regex_selectivity_sub(char *patt, int pattlen, bool case_insensitive) } static Selectivity -regex_selectivity(Const *patt_const, bool case_insensitive) +regex_selectivity(const char *patt, int pattlen, bool case_insensitive) { Selectivity sel; - char *patt; - int pattlen; - Oid typeid = patt_const->consttype; - - /* - * Should be unnecessary, there are no bytea regex operators defined. As - * such, it should be noted that the rest of this function has *not* been - * made safe for binary (possibly NULL containing) strings. - */ - if (typeid == BYTEAOID) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("regular-expression matching not supported on type bytea"))); - - /* the right-hand const is type text for all of these */ - patt = TextDatumGetCString(patt_const->constvalue); - pattlen = strlen(patt); /* If patt doesn't end with $, consider it to have a trailing wildcard */ if (pattlen > 0 && patt[pattlen - 1] == '$' && @@ -5802,33 +5763,6 @@ regex_selectivity(Const *patt_const, bool case_insensitive) return sel; } -static Selectivity -pattern_selectivity(Const *patt, Pattern_Type ptype) -{ - Selectivity result; - - switch (ptype) - { - case Pattern_Type_Like: - result = like_selectivity(patt, false); - break; - case Pattern_Type_Like_IC: - result = like_selectivity(patt, true); - break; - case Pattern_Type_Regex: - result = regex_selectivity(patt, false); - break; - case Pattern_Type_Regex_IC: - result = regex_selectivity(patt, true); - break; - default: - elog(ERROR, "unrecognized ptype: %d", (int) ptype); - result = 1.0; /* keep compiler quiet */ - break; - } - return result; -} - /* * For bytea, the increment function need only increment the current byte diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 87c6554b32..aa5ee69d8b 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -134,7 +134,7 @@ extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, Const **prefix, - Const **rest); + Selectivity *rest_selec); extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation);