314 lines
9.0 KiB
C
314 lines
9.0 KiB
C
|
/*-------------------------------------------------------------------------
|
||
|
*
|
||
|
* like_support.c
|
||
|
* Planner support functions for LIKE, regex, and related operators.
|
||
|
*
|
||
|
* These routines handle special optimization of operators that can be
|
||
|
* used with index scans even though they are not known to the executor's
|
||
|
* indexscan machinery. The key idea is that these operators allow us
|
||
|
* to derive approximate indexscan qual clauses, such that any tuples
|
||
|
* that pass the operator clause itself must also satisfy the simpler
|
||
|
* indexscan condition(s). Then we can use the indexscan machinery
|
||
|
* to avoid scanning as much of the table as we'd otherwise have to,
|
||
|
* while applying the original operator as a qpqual condition to ensure
|
||
|
* we deliver only the tuples we want. (In essence, we're using a regular
|
||
|
* index as if it were a lossy index.)
|
||
|
*
|
||
|
* An example of what we're doing is
|
||
|
* textfield LIKE 'abc%def'
|
||
|
* from which we can generate the indexscanable conditions
|
||
|
* textfield >= 'abc' AND textfield < 'abd'
|
||
|
* which allow efficient scanning of an index on textfield.
|
||
|
* (In reality, character set and collation issues make the transformation
|
||
|
* from LIKE to indexscan limits rather harder than one might think ...
|
||
|
* but that's the basic idea.)
|
||
|
*
|
||
|
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||
|
*
|
||
|
*
|
||
|
* IDENTIFICATION
|
||
|
* src/backend/utils/adt/like_support.c
|
||
|
*
|
||
|
*-------------------------------------------------------------------------
|
||
|
*/
|
||
|
#include "postgres.h"
|
||
|
|
||
|
#include "access/stratnum.h"
|
||
|
#include "catalog/pg_opfamily.h"
|
||
|
#include "catalog/pg_type.h"
|
||
|
#include "nodes/makefuncs.h"
|
||
|
#include "nodes/nodeFuncs.h"
|
||
|
#include "nodes/supportnodes.h"
|
||
|
#include "utils/builtins.h"
|
||
|
#include "utils/lsyscache.h"
|
||
|
#include "utils/pg_locale.h"
|
||
|
#include "utils/selfuncs.h"
|
||
|
|
||
|
|
||
|
static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
|
||
|
static List *match_pattern_prefix(Node *leftop,
|
||
|
Node *rightop,
|
||
|
Pattern_Type ptype,
|
||
|
Oid expr_coll,
|
||
|
Oid opfamily,
|
||
|
Oid indexcollation);
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Planner support functions for LIKE, regex, and related operators
|
||
|
*/
|
||
|
Datum
|
||
|
textlike_support(PG_FUNCTION_ARGS)
|
||
|
{
|
||
|
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
|
||
|
|
||
|
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like));
|
||
|
}
|
||
|
|
||
|
Datum
|
||
|
texticlike_support(PG_FUNCTION_ARGS)
|
||
|
{
|
||
|
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
|
||
|
|
||
|
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC));
|
||
|
}
|
||
|
|
||
|
Datum
|
||
|
textregexeq_support(PG_FUNCTION_ARGS)
|
||
|
{
|
||
|
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
|
||
|
|
||
|
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex));
|
||
|
}
|
||
|
|
||
|
Datum
|
||
|
texticregexeq_support(PG_FUNCTION_ARGS)
|
||
|
{
|
||
|
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
|
||
|
|
||
|
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
|
||
|
}
|
||
|
|
||
|
/* Common code for the above */
|
||
|
static Node *
|
||
|
like_regex_support(Node *rawreq, Pattern_Type ptype)
|
||
|
{
|
||
|
Node *ret = NULL;
|
||
|
|
||
|
if (IsA(rawreq, SupportRequestIndexCondition))
|
||
|
{
|
||
|
/* Try to convert operator/function call to index conditions */
|
||
|
SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
|
||
|
|
||
|
/*
|
||
|
* Currently we have no "reverse" match operators with the pattern on
|
||
|
* the left, so we only need consider cases with the indexkey on the
|
||
|
* left.
|
||
|
*/
|
||
|
if (req->indexarg != 0)
|
||
|
return NULL;
|
||
|
|
||
|
if (is_opclause(req->node))
|
||
|
{
|
||
|
OpExpr *clause = (OpExpr *) req->node;
|
||
|
|
||
|
Assert(list_length(clause->args) == 2);
|
||
|
ret = (Node *)
|
||
|
match_pattern_prefix((Node *) linitial(clause->args),
|
||
|
(Node *) lsecond(clause->args),
|
||
|
ptype,
|
||
|
clause->inputcollid,
|
||
|
req->opfamily,
|
||
|
req->indexcollation);
|
||
|
}
|
||
|
else if (is_funcclause(req->node)) /* be paranoid */
|
||
|
{
|
||
|
FuncExpr *clause = (FuncExpr *) req->node;
|
||
|
|
||
|
Assert(list_length(clause->args) == 2);
|
||
|
ret = (Node *)
|
||
|
match_pattern_prefix((Node *) linitial(clause->args),
|
||
|
(Node *) lsecond(clause->args),
|
||
|
ptype,
|
||
|
clause->inputcollid,
|
||
|
req->opfamily,
|
||
|
req->indexcollation);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* match_pattern_prefix
|
||
|
* Try to generate an indexqual for a LIKE or regex operator.
|
||
|
*/
|
||
|
static List *
|
||
|
match_pattern_prefix(Node *leftop,
|
||
|
Node *rightop,
|
||
|
Pattern_Type ptype,
|
||
|
Oid expr_coll,
|
||
|
Oid opfamily,
|
||
|
Oid indexcollation)
|
||
|
{
|
||
|
List *result;
|
||
|
Const *patt;
|
||
|
Const *prefix;
|
||
|
Pattern_Prefix_Status pstatus;
|
||
|
Oid ldatatype;
|
||
|
Oid rdatatype;
|
||
|
Oid oproid;
|
||
|
Expr *expr;
|
||
|
FmgrInfo ltproc;
|
||
|
Const *greaterstr;
|
||
|
|
||
|
/*
|
||
|
* Can't do anything with a non-constant or NULL pattern argument.
|
||
|
*
|
||
|
* Note that since we restrict ourselves to cases with a hard constant on
|
||
|
* the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
|
||
|
* about verifying that.
|
||
|
*/
|
||
|
if (!IsA(rightop, Const) ||
|
||
|
((Const *) rightop)->constisnull)
|
||
|
return NIL;
|
||
|
patt = (Const *) rightop;
|
||
|
|
||
|
/*
|
||
|
* Try to extract a fixed prefix from the pattern.
|
||
|
*/
|
||
|
pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
|
||
|
&prefix, NULL);
|
||
|
|
||
|
/* fail if no fixed prefix */
|
||
|
if (pstatus == Pattern_Prefix_None)
|
||
|
return NIL;
|
||
|
|
||
|
/*
|
||
|
* Must also check that index's opfamily supports the operators we will
|
||
|
* want to apply. (A hash index, for example, will not support ">=".)
|
||
|
* Currently, only btree and spgist support the operators we need.
|
||
|
*
|
||
|
* Note: actually, in the Pattern_Prefix_Exact case, we only need "=" so a
|
||
|
* hash index would work. Currently it doesn't seem worth checking for
|
||
|
* that, however.
|
||
|
*
|
||
|
* We insist on the opfamily being one of the specific ones we expect,
|
||
|
* else we'd do the wrong thing if someone were to make a reverse-sort
|
||
|
* opfamily with the same operators.
|
||
|
*
|
||
|
* The non-pattern opclasses will not sort the way we need in most non-C
|
||
|
* locales. We can use such an index anyway for an exact match (simple
|
||
|
* equality), but not for prefix-match cases. Note that here we are
|
||
|
* looking at the index's collation, not the expression's collation --
|
||
|
* this test is *not* dependent on the LIKE/regex operator's collation.
|
||
|
*
|
||
|
* While we're at it, identify the type the comparison constant(s) should
|
||
|
* have, based on the opfamily.
|
||
|
*/
|
||
|
switch (opfamily)
|
||
|
{
|
||
|
case TEXT_BTREE_FAM_OID:
|
||
|
if (!(pstatus == Pattern_Prefix_Exact ||
|
||
|
lc_collate_is_c(indexcollation)))
|
||
|
return NIL;
|
||
|
rdatatype = TEXTOID;
|
||
|
break;
|
||
|
|
||
|
case TEXT_PATTERN_BTREE_FAM_OID:
|
||
|
case TEXT_SPGIST_FAM_OID:
|
||
|
rdatatype = TEXTOID;
|
||
|
break;
|
||
|
|
||
|
case BPCHAR_BTREE_FAM_OID:
|
||
|
if (!(pstatus == Pattern_Prefix_Exact ||
|
||
|
lc_collate_is_c(indexcollation)))
|
||
|
return NIL;
|
||
|
rdatatype = BPCHAROID;
|
||
|
break;
|
||
|
|
||
|
case BPCHAR_PATTERN_BTREE_FAM_OID:
|
||
|
rdatatype = BPCHAROID;
|
||
|
break;
|
||
|
|
||
|
case BYTEA_BTREE_FAM_OID:
|
||
|
rdatatype = BYTEAOID;
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
return NIL;
|
||
|
}
|
||
|
|
||
|
/* OK, prepare to create the indexqual(s) */
|
||
|
ldatatype = exprType(leftop);
|
||
|
|
||
|
/*
|
||
|
* If necessary, coerce the prefix constant to the right type. The given
|
||
|
* prefix constant is either text or bytea type, therefore the only case
|
||
|
* where we need to do anything is when converting text to bpchar. Those
|
||
|
* two types are binary-compatible, so relabeling the Const node is
|
||
|
* sufficient.
|
||
|
*/
|
||
|
if (prefix->consttype != rdatatype)
|
||
|
{
|
||
|
Assert(prefix->consttype == TEXTOID &&
|
||
|
rdatatype == BPCHAROID);
|
||
|
prefix->consttype = rdatatype;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If we found an exact-match pattern, generate an "=" indexqual.
|
||
|
*/
|
||
|
if (pstatus == Pattern_Prefix_Exact)
|
||
|
{
|
||
|
oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
|
||
|
BTEqualStrategyNumber);
|
||
|
if (oproid == InvalidOid)
|
||
|
elog(ERROR, "no = operator for opfamily %u", opfamily);
|
||
|
expr = make_opclause(oproid, BOOLOID, false,
|
||
|
(Expr *) leftop, (Expr *) prefix,
|
||
|
InvalidOid, indexcollation);
|
||
|
result = list_make1(expr);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Otherwise, we have a nonempty required prefix of the values.
|
||
|
*
|
||
|
* We can always say "x >= prefix".
|
||
|
*/
|
||
|
oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
|
||
|
BTGreaterEqualStrategyNumber);
|
||
|
if (oproid == InvalidOid)
|
||
|
elog(ERROR, "no >= operator for opfamily %u", opfamily);
|
||
|
expr = make_opclause(oproid, BOOLOID, false,
|
||
|
(Expr *) leftop, (Expr *) prefix,
|
||
|
InvalidOid, indexcollation);
|
||
|
result = list_make1(expr);
|
||
|
|
||
|
/*-------
|
||
|
* If we can create a string larger than the prefix, we can say
|
||
|
* "x < greaterstr". NB: we rely on make_greater_string() to generate
|
||
|
* a guaranteed-greater string, not just a probably-greater string.
|
||
|
* In general this is only guaranteed in C locale, so we'd better be
|
||
|
* using a C-locale index collation.
|
||
|
*-------
|
||
|
*/
|
||
|
oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
|
||
|
BTLessStrategyNumber);
|
||
|
if (oproid == InvalidOid)
|
||
|
elog(ERROR, "no < operator for opfamily %u", opfamily);
|
||
|
fmgr_info(get_opcode(oproid), <proc);
|
||
|
greaterstr = make_greater_string(prefix, <proc, indexcollation);
|
||
|
if (greaterstr)
|
||
|
{
|
||
|
expr = make_opclause(oproid, BOOLOID, false,
|
||
|
(Expr *) leftop, (Expr *) greaterstr,
|
||
|
InvalidOid, indexcollation);
|
||
|
result = lappend(result, expr);
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
}
|