Add a planner support function for starts_with().

This fills in some gaps in planner support for starts_with() and
the equivalent ^@ operator:

* A condition such as "textcol ^@ constant" can now use a regular
btree index, not only an SP-GiST index, so long as the index's
collation is C.  (This works just like "textcol LIKE 'foo%'".)

* "starts_with(textcol, constant)" can be optimized the same as
"textcol ^@ constant".

* Fixed-prefix LIKE and regex patterns are now more like starts_with()
in another way: if you apply one to an SPGiST-indexed column, you'll
get an index condition using ^@ rather than two index conditions with
>= and <.

Per a complaint from Shay Rojansky.  Patch by me; thanks to
Nathan Bossart for review.

Discussion: https://postgr.es/m/232599.1633800229@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2021-11-17 16:54:12 -05:00
parent 248c3a937d
commit a148f8bc04
6 changed files with 95 additions and 23 deletions

View File

@ -143,6 +143,14 @@ texticregexeq_support(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
}
Datum
text_starts_with_support(PG_FUNCTION_ARGS)
{
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Prefix));
}
/* Common code for the above */
static Node *
like_regex_support(Node *rawreq, Pattern_Type ptype)
@ -246,6 +254,7 @@ match_pattern_prefix(Node *leftop,
Oid eqopr;
Oid ltopr;
Oid geopr;
Oid preopr = InvalidOid;
bool collation_aware;
Expr *expr;
FmgrInfo ltproc;
@ -302,14 +311,22 @@ match_pattern_prefix(Node *leftop,
switch (ldatatype)
{
case TEXTOID:
if (opfamily == TEXT_PATTERN_BTREE_FAM_OID ||
opfamily == TEXT_SPGIST_FAM_OID)
if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
{
eqopr = TextEqualOperator;
ltopr = TextPatternLessOperator;
geopr = TextPatternGreaterEqualOperator;
collation_aware = false;
}
else if (opfamily == TEXT_SPGIST_FAM_OID)
{
eqopr = TextEqualOperator;
ltopr = TextPatternLessOperator;
geopr = TextPatternGreaterEqualOperator;
/* This opfamily has direct support for prefixing */
preopr = TextPrefixOperator;
collation_aware = false;
}
else
{
eqopr = TextEqualOperator;
@ -360,20 +377,6 @@ match_pattern_prefix(Node *leftop,
return NIL;
}
/*
* If necessary, verify that the index's collation behavior is compatible.
* For an exact-match case, we don't have to be picky. Otherwise, insist
* that the index collation be "C". Note that here we are looking at the
* index's collation, not the expression's collation -- this test is *not*
* dependent on the LIKE/regex operator's collation.
*/
if (collation_aware)
{
if (!(pstatus == Pattern_Prefix_Exact ||
lc_collate_is_c(indexcollation)))
return NIL;
}
/*
* If necessary, coerce the prefix constant to the right type. The given
* prefix constant is either text or bytea type, therefore the only case
@ -409,8 +412,31 @@ match_pattern_prefix(Node *leftop,
}
/*
* Otherwise, we have a nonempty required prefix of the values.
*
* Otherwise, we have a nonempty required prefix of the values. Some
* opclasses support prefix checks directly, otherwise we'll try to
* generate a range constraint.
*/
if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
{
expr = make_opclause(preopr, BOOLOID, false,
(Expr *) leftop, (Expr *) prefix,
InvalidOid, indexcollation);
result = list_make1(expr);
return result;
}
/*
* Since we need a range constraint, it's only going to work reliably if
* the index is collation-insensitive or has "C" collation. Note that
* here we are looking at the index's collation, not the expression's
* collation -- this test is *not* dependent on the LIKE/regex operator's
* collation.
*/
if (collation_aware &&
!lc_collate_is_c(indexcollation))
return NIL;
/*
* We can always say "x >= prefix".
*/
if (!op_in_opfamily(geopr, opfamily))
@ -1165,7 +1191,6 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
case Pattern_Type_Prefix:
/* Prefix type work is trivial. */
result = Pattern_Prefix_Partial;
*rest_selec = 1.0; /* all */
*prefix = makeConst(patt->consttype,
patt->consttypmod,
patt->constcollid,
@ -1175,6 +1200,8 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
patt->constlen),
patt->constisnull,
patt->constbyval);
if (rest_selec != NULL)
*rest_selec = 1.0; /* all */
break;
default:
elog(ERROR, "unrecognized ptype: %d", (int) ptype);

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202111091
#define CATALOG_VERSION_NO 202111171
#endif

View File

@ -102,7 +102,7 @@
oprright => 'text', oprresult => 'bool', oprcom => '=(text,text)',
oprnegate => '<>(text,text)', oprcode => 'texteq', oprrest => 'eqsel',
oprjoin => 'eqjoinsel' },
{ oid => '3877', descr => 'starts with',
{ oid => '3877', oid_symbol => 'TextPrefixOperator', descr => 'starts with',
oprname => '^@', oprleft => 'text', oprright => 'text', oprresult => 'bool',
oprcode => 'starts_with', oprrest => 'prefixsel',
oprjoin => 'prefixjoinsel' },

View File

@ -167,8 +167,12 @@
proname => 'texteq', proleakproof => 't', prorettype => 'bool',
proargtypes => 'text text', prosrc => 'texteq' },
{ oid => '3696',
proname => 'starts_with', proleakproof => 't', prorettype => 'bool',
proargtypes => 'text text', prosrc => 'text_starts_with' },
proname => 'starts_with', prosupport => 'text_starts_with_support',
proleakproof => 't', prorettype => 'bool', proargtypes => 'text text',
prosrc => 'text_starts_with' },
{ oid => '8923', descr => 'planner support for text_starts_with',
proname => 'text_starts_with_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'text_starts_with_support' },
{ oid => '68',
proname => 'xideq', proleakproof => 't', prorettype => 'bool',
proargtypes => 'xid xid', prosrc => 'xideq' },

View File

@ -804,6 +804,22 @@ SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
2
(1 row)
EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
QUERY PLAN
------------------------------------------------------------
Aggregate
-> Index Only Scan using sp_radix_ind on radix_text_tbl
Index Cond: (t ^@ 'Worth'::text)
Filter: starts_with(t, 'Worth'::text)
(4 rows)
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
count
-------
2
(1 row)
-- Now check the results from bitmap indexscan
SET enable_seqscan = OFF;
SET enable_indexscan = OFF;
@ -1333,6 +1349,23 @@ SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
2
(1 row)
EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
QUERY PLAN
------------------------------------------------
Aggregate
-> Bitmap Heap Scan on radix_text_tbl
Filter: starts_with(t, 'Worth'::text)
-> Bitmap Index Scan on sp_radix_ind
Index Cond: (t ^@ 'Worth'::text)
(5 rows)
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
count
-------
2
(1 row)
RESET enable_seqscan;
RESET enable_indexscan;
RESET enable_bitmapscan;

View File

@ -295,6 +295,10 @@ EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
-- Now check the results from bitmap indexscan
SET enable_seqscan = OFF;
SET enable_indexscan = OFF;
@ -424,6 +428,10 @@ EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
EXPLAIN (COSTS OFF)
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
RESET enable_seqscan;
RESET enable_indexscan;
RESET enable_bitmapscan;