postgresql/contrib/ltree/ltree_op.c

591 lines
11 KiB
C
Raw Normal View History

/*
2002-09-04 22:31:48 +02:00
* op function for ltree
* Teodor Sigaev <teodor@stack.net>
2010-09-20 22:08:53 +02:00
* contrib/ltree/ltree_op.c
*/
#include "postgres.h"
#include <ctype.h>
#include "access/htup_details.h"
#include "catalog/pg_statistic.h"
#include "ltree.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
PG_MODULE_MAGIC;
/* compare functions */
PG_FUNCTION_INFO_V1(ltree_cmp);
PG_FUNCTION_INFO_V1(ltree_lt);
PG_FUNCTION_INFO_V1(ltree_le);
PG_FUNCTION_INFO_V1(ltree_eq);
PG_FUNCTION_INFO_V1(ltree_ne);
PG_FUNCTION_INFO_V1(ltree_ge);
PG_FUNCTION_INFO_V1(ltree_gt);
PG_FUNCTION_INFO_V1(nlevel);
PG_FUNCTION_INFO_V1(ltree_isparent);
PG_FUNCTION_INFO_V1(ltree_risparent);
PG_FUNCTION_INFO_V1(subltree);
PG_FUNCTION_INFO_V1(subpath);
PG_FUNCTION_INFO_V1(ltree_index);
PG_FUNCTION_INFO_V1(ltree_addltree);
PG_FUNCTION_INFO_V1(ltree_addtext);
PG_FUNCTION_INFO_V1(ltree_textadd);
PG_FUNCTION_INFO_V1(lca);
PG_FUNCTION_INFO_V1(ltree2text);
PG_FUNCTION_INFO_V1(text2ltree);
PG_FUNCTION_INFO_V1(ltreeparentsel);
int
ltree_compare(const ltree *a, const ltree *b)
2002-09-04 22:31:48 +02:00
{
ltree_level *al = LTREE_FIRST(a);
ltree_level *bl = LTREE_FIRST(b);
2002-09-04 22:31:48 +02:00
int an = a->numlevel;
int bn = b->numlevel;
while (an > 0 && bn > 0)
{
int res;
if ((res = memcmp(al->name, bl->name, Min(al->len, bl->len))) == 0)
2002-09-04 22:31:48 +02:00
{
if (al->len != bl->len)
return (al->len - bl->len) * 10 * (an + 1);
}
else
{
if (res < 0)
res = -1;
else
res = 1;
2002-09-04 22:31:48 +02:00
return res * 10 * (an + 1);
}
2002-09-04 22:31:48 +02:00
an--;
bn--;
al = LEVEL_NEXT(al);
bl = LEVEL_NEXT(bl);
}
2002-09-04 22:31:48 +02:00
return (a->numlevel - b->numlevel) * 10 * (an + 1);
}
2002-09-04 22:31:48 +02:00
#define RUNCMP \
ltree *a = PG_GETARG_LTREE_P(0); \
ltree *b = PG_GETARG_LTREE_P(1); \
int res = ltree_compare(a,b); \
PG_FREE_IF_COPY(a,0); \
PG_FREE_IF_COPY(b,1)
Datum
2002-09-04 22:31:48 +02:00
ltree_cmp(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_INT32(res);
}
Datum
2002-09-04 22:31:48 +02:00
ltree_lt(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_BOOL((res < 0) ? true : false);
}
Datum
2002-09-04 22:31:48 +02:00
ltree_le(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_BOOL((res <= 0) ? true : false);
}
Datum
2002-09-04 22:31:48 +02:00
ltree_eq(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_BOOL((res == 0) ? true : false);
}
Datum
2002-09-04 22:31:48 +02:00
ltree_ge(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_BOOL((res >= 0) ? true : false);
}
Datum
2002-09-04 22:31:48 +02:00
ltree_gt(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_BOOL((res > 0) ? true : false);
}
Datum
2002-09-04 22:31:48 +02:00
ltree_ne(PG_FUNCTION_ARGS)
{
RUNCMP;
PG_RETURN_BOOL((res != 0) ? true : false);
}
Datum
2002-09-04 22:31:48 +02:00
nlevel(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(0);
2002-09-04 22:31:48 +02:00
int res = a->numlevel;
PG_FREE_IF_COPY(a, 0);
PG_RETURN_INT32(res);
}
bool
inner_isparent(const ltree *c, const ltree *p)
2002-09-04 22:31:48 +02:00
{
ltree_level *cl = LTREE_FIRST(c);
ltree_level *pl = LTREE_FIRST(p);
2002-09-04 22:31:48 +02:00
int pn = p->numlevel;
2002-09-04 22:31:48 +02:00
if (pn > c->numlevel)
return false;
2002-09-04 22:31:48 +02:00
while (pn > 0)
{
if (cl->len != pl->len)
return false;
if (memcmp(cl->name, pl->name, cl->len) != 0)
return false;
pn--;
2002-09-04 22:31:48 +02:00
cl = LEVEL_NEXT(cl);
pl = LEVEL_NEXT(pl);
}
return true;
}
2002-09-04 22:31:48 +02:00
Datum
ltree_isparent(PG_FUNCTION_ARGS)
{
ltree *c = PG_GETARG_LTREE_P(1);
ltree *p = PG_GETARG_LTREE_P(0);
2002-09-04 22:31:48 +02:00
bool res = inner_isparent(c, p);
PG_FREE_IF_COPY(c, 1);
PG_FREE_IF_COPY(p, 0);
PG_RETURN_BOOL(res);
}
2002-09-04 22:31:48 +02:00
Datum
ltree_risparent(PG_FUNCTION_ARGS)
{
ltree *c = PG_GETARG_LTREE_P(0);
ltree *p = PG_GETARG_LTREE_P(1);
2002-09-04 22:31:48 +02:00
bool res = inner_isparent(c, p);
PG_FREE_IF_COPY(c, 0);
PG_FREE_IF_COPY(p, 1);
PG_RETURN_BOOL(res);
}
2002-09-04 22:31:48 +02:00
static ltree *
inner_subltree(ltree *t, int32 startpos, int32 endpos)
2002-09-04 22:31:48 +02:00
{
char *start = NULL,
*end = NULL;
ltree_level *ptr = LTREE_FIRST(t);
2002-09-04 22:31:48 +02:00
ltree *res;
int i;
if (startpos < 0 || endpos < 0 || startpos >= t->numlevel || startpos > endpos)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid positions")));
2002-09-04 22:31:48 +02:00
if (endpos > t->numlevel)
endpos = t->numlevel;
start = end = (char *) ptr;
2002-09-04 22:31:48 +02:00
for (i = 0; i < endpos; i++)
{
if (i == startpos)
start = (char *) ptr;
if (i == endpos - 1)
{
end = (char *) LEVEL_NEXT(ptr);
break;
}
2002-09-04 22:31:48 +02:00
ptr = LEVEL_NEXT(ptr);
}
res = (ltree *) palloc0(LTREE_HDRSIZE + (end - start));
SET_VARSIZE(res, LTREE_HDRSIZE + (end - start));
2002-09-04 22:31:48 +02:00
res->numlevel = endpos - startpos;
memcpy(LTREE_FIRST(res), start, end - start);
return res;
}
Datum
2002-09-04 22:31:48 +02:00
subltree(PG_FUNCTION_ARGS)
{
ltree *t = PG_GETARG_LTREE_P(0);
2002-09-04 22:31:48 +02:00
ltree *res = inner_subltree(t, PG_GETARG_INT32(1), PG_GETARG_INT32(2));
2002-09-04 22:31:48 +02:00
PG_FREE_IF_COPY(t, 0);
PG_RETURN_POINTER(res);
}
2002-09-04 22:31:48 +02:00
Datum
subpath(PG_FUNCTION_ARGS)
{
ltree *t = PG_GETARG_LTREE_P(0);
int32 start = PG_GETARG_INT32(1);
int32 len = (fcinfo->nargs == 3) ? PG_GETARG_INT32(2) : 0;
int32 end;
2002-09-04 22:31:48 +02:00
ltree *res;
end = start + len;
if (start < 0)
{
start = t->numlevel + start;
2002-09-04 22:31:48 +02:00
end = start + len;
}
2002-09-04 22:31:48 +02:00
if (start < 0)
{ /* start > t->numlevel */
start = t->numlevel + start;
2002-09-04 22:31:48 +02:00
end = start + len;
}
2002-09-04 22:31:48 +02:00
if (len < 0)
end = t->numlevel + len;
2002-09-04 22:31:48 +02:00
else if (len == 0)
end = (fcinfo->nargs == 3) ? start : 0xffff;
2002-09-04 22:31:48 +02:00
res = inner_subltree(t, start, end);
2002-09-04 22:31:48 +02:00
PG_FREE_IF_COPY(t, 0);
PG_RETURN_POINTER(res);
}
2002-09-04 22:31:48 +02:00
static ltree *
ltree_concat(ltree *a, ltree *b)
2002-09-04 22:31:48 +02:00
{
ltree *r;
int numlevel = (int) a->numlevel + b->numlevel;
if (numlevel > LTREE_MAX_LEVELS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("number of ltree levels (%d) exceeds the maximum allowed (%d)",
numlevel, LTREE_MAX_LEVELS)));
2002-09-04 22:31:48 +02:00
r = (ltree *) palloc0(VARSIZE(a) + VARSIZE(b) - LTREE_HDRSIZE);
SET_VARSIZE(r, VARSIZE(a) + VARSIZE(b) - LTREE_HDRSIZE);
r->numlevel = (uint16) numlevel;
2002-09-04 22:31:48 +02:00
memcpy(LTREE_FIRST(r), LTREE_FIRST(a), VARSIZE(a) - LTREE_HDRSIZE);
memcpy(((char *) LTREE_FIRST(r)) + VARSIZE(a) - LTREE_HDRSIZE,
LTREE_FIRST(b),
VARSIZE(b) - LTREE_HDRSIZE);
2002-09-04 22:31:48 +02:00
return r;
}
Datum
ltree_addltree(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(0);
ltree *b = PG_GETARG_LTREE_P(1);
2002-09-04 22:31:48 +02:00
ltree *r;
r = ltree_concat(a, b);
2002-09-04 22:31:48 +02:00
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(r);
}
Datum
2002-09-04 22:31:48 +02:00
ltree_addtext(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(0);
text *b = PG_GETARG_TEXT_PP(1);
2002-09-04 22:31:48 +02:00
char *s;
ltree *r,
*tmp;
s = text_to_cstring(b);
tmp = (ltree *) DatumGetPointer(DirectFunctionCall1(ltree_in,
PointerGetDatum(s)));
pfree(s);
2002-09-04 22:31:48 +02:00
r = ltree_concat(a, tmp);
2002-09-04 22:31:48 +02:00
pfree(tmp);
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(r);
}
Datum
ltree_index(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(0);
ltree *b = PG_GETARG_LTREE_P(1);
2003-08-04 02:43:34 +02:00
int start = (fcinfo->nargs == 3) ? PG_GETARG_INT32(2) : 0;
int i,
j;
ltree_level *startptr,
*aptr,
*bptr;
bool found = false;
if (start < 0)
{
if (-start >= a->numlevel)
start = 0;
else
start = (int) (a->numlevel) + start;
}
2003-08-04 02:43:34 +02:00
if (a->numlevel - start < b->numlevel || a->numlevel == 0 || b->numlevel == 0)
{
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_INT32(-1);
}
2003-08-04 02:43:34 +02:00
startptr = LTREE_FIRST(a);
for (i = 0; i <= a->numlevel - b->numlevel; i++)
{
if (i >= start)
{
aptr = startptr;
bptr = LTREE_FIRST(b);
for (j = 0; j < b->numlevel; j++)
{
if (!(aptr->len == bptr->len && memcmp(aptr->name, bptr->name, aptr->len) == 0))
2003-08-04 02:43:34 +02:00
break;
aptr = LEVEL_NEXT(aptr);
bptr = LEVEL_NEXT(bptr);
}
2003-08-04 02:43:34 +02:00
if (j == b->numlevel)
{
found = true;
break;
}
}
2003-08-04 02:43:34 +02:00
startptr = LEVEL_NEXT(startptr);
}
2003-08-04 02:43:34 +02:00
if (!found)
i = -1;
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_INT32(i);
}
Datum
2002-09-04 22:31:48 +02:00
ltree_textadd(PG_FUNCTION_ARGS)
{
ltree *a = PG_GETARG_LTREE_P(1);
text *b = PG_GETARG_TEXT_PP(0);
2002-09-04 22:31:48 +02:00
char *s;
ltree *r,
*tmp;
s = text_to_cstring(b);
2002-09-04 22:31:48 +02:00
tmp = (ltree *) DatumGetPointer(DirectFunctionCall1(ltree_in,
PointerGetDatum(s)));
2002-09-04 22:31:48 +02:00
pfree(s);
r = ltree_concat(tmp, a);
pfree(tmp);
PG_FREE_IF_COPY(a, 1);
PG_FREE_IF_COPY(b, 0);
PG_RETURN_POINTER(r);
}
/*
* Common code for variants of lca(), find longest common ancestor of inputs
*
* Returns NULL if there is no common ancestor, ie, the longest common
* prefix is empty.
*/
2002-09-04 22:31:48 +02:00
ltree *
lca_inner(ltree **a, int len)
2002-09-04 22:31:48 +02:00
{
int tmp,
num,
i,
reslen;
ltree **ptr;
2002-09-04 22:31:48 +02:00
ltree_level *l1,
*l2;
ltree *res;
if (len <= 0)
return NULL; /* no inputs? */
2002-09-04 22:31:48 +02:00
if ((*a)->numlevel == 0)
return NULL; /* any empty input means NULL result */
/* num is the length of the longest common ancestor so far */
num = (*a)->numlevel - 1;
/* Compare each additional input to *a */
ptr = a + 1;
2002-09-04 22:31:48 +02:00
while (ptr - a < len)
{
if ((*ptr)->numlevel == 0)
return NULL;
2002-09-04 22:31:48 +02:00
else if ((*ptr)->numlevel == 1)
num = 0;
else
{
l1 = LTREE_FIRST(*a);
l2 = LTREE_FIRST(*ptr);
tmp = Min(num, (*ptr)->numlevel - 1);
2002-09-04 22:31:48 +02:00
num = 0;
for (i = 0; i < tmp; i++)
2002-09-04 22:31:48 +02:00
{
if (l1->len == l2->len &&
memcmp(l1->name, l2->name, l1->len) == 0)
2002-09-04 22:31:48 +02:00
num = i + 1;
else
break;
2002-09-04 22:31:48 +02:00
l1 = LEVEL_NEXT(l1);
l2 = LEVEL_NEXT(l2);
}
}
ptr++;
}
/* Now compute size of result ... */
reslen = LTREE_HDRSIZE;
l1 = LTREE_FIRST(*a);
2002-09-04 22:31:48 +02:00
for (i = 0; i < num; i++)
{
reslen += MAXALIGN(l1->len + LEVEL_HDRSIZE);
2002-09-04 22:31:48 +02:00
l1 = LEVEL_NEXT(l1);
}
/* ... and construct it by copying from *a */
res = (ltree *) palloc0(reslen);
SET_VARSIZE(res, reslen);
res->numlevel = num;
l1 = LTREE_FIRST(*a);
l2 = LTREE_FIRST(res);
2002-09-04 22:31:48 +02:00
for (i = 0; i < num; i++)
{
memcpy(l2, l1, MAXALIGN(l1->len + LEVEL_HDRSIZE));
l1 = LEVEL_NEXT(l1);
l2 = LEVEL_NEXT(l2);
}
return res;
}
Datum
2002-09-04 22:31:48 +02:00
lca(PG_FUNCTION_ARGS)
{
int i;
ltree **a,
*res;
a = (ltree **) palloc(sizeof(ltree *) * fcinfo->nargs);
for (i = 0; i < fcinfo->nargs; i++)
a[i] = PG_GETARG_LTREE_P(i);
2002-09-04 22:31:48 +02:00
res = lca_inner(a, (int) fcinfo->nargs);
for (i = 0; i < fcinfo->nargs; i++)
PG_FREE_IF_COPY(a[i], i);
pfree(a);
2002-09-04 22:31:48 +02:00
if (res)
PG_RETURN_POINTER(res);
else
PG_RETURN_NULL();
}
Datum
text2ltree(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_PP(0);
char *s;
2003-08-04 02:43:34 +02:00
ltree *out;
s = text_to_cstring(in);
out = (ltree *) DatumGetPointer(DirectFunctionCall1(ltree_in,
PointerGetDatum(s)));
pfree(s);
2003-08-04 02:43:34 +02:00
PG_FREE_IF_COPY(in, 0);
PG_RETURN_POINTER(out);
}
Datum
ltree2text(PG_FUNCTION_ARGS)
{
ltree *in = PG_GETARG_LTREE_P(0);
2003-08-04 02:43:34 +02:00
char *ptr;
int i;
ltree_level *curlevel;
2003-08-04 02:43:34 +02:00
text *out;
out = (text *) palloc(VARSIZE(in) + VARHDRSZ);
2003-08-04 02:43:34 +02:00
ptr = VARDATA(out);
curlevel = LTREE_FIRST(in);
2003-08-04 02:43:34 +02:00
for (i = 0; i < in->numlevel; i++)
{
if (i != 0)
{
*ptr = '.';
ptr++;
}
memcpy(ptr, curlevel->name, curlevel->len);
ptr += curlevel->len;
curlevel = LEVEL_NEXT(curlevel);
}
2003-08-04 02:43:34 +02:00
SET_VARSIZE(out, ptr - ((char *) out));
PG_FREE_IF_COPY(in, 0);
2003-08-04 02:43:34 +02:00
PG_RETURN_POINTER(out);
}
/*
* ltreeparentsel - Selectivity of parent relationship for ltree data types.
Improve selectivity estimation for assorted match-style operators. Quite a few matching operators such as JSONB's @> used "contsel" and "contjoinsel" as their selectivity estimators. That was a bad idea, because (a) contsel is only a stub, yielding a fixed default estimate, and (b) that default is 0.001, meaning we estimate these operators as five times more selective than equality, which is surely pretty silly. There's a good model for improving this in ltree's ltreeparentsel(): for any "var OP constant" query, we can try applying the operator to all of the column's MCV and histogram values, taking the latter as being a random sample of the non-MCV values. That code is actually 100% generic, except for the question of exactly what default selectivity ought to be plugged in when we don't have stats. Hence, migrate the guts of ltreeparentsel() into the core code, provide wrappers "matchingsel" and "matchingjoinsel" with a more-appropriate default estimate, and use those for the non-geometric operators that formerly used contsel (mostly JSONB containment operators and tsquery matching). Also apply this code to some match-like operators in hstore, ltree, and pg_trgm, including the former users of ltreeparentsel as well as ones that improperly used contsel. Since commit 911e70207 just created new versions of those extensions that we haven't released yet, we can sneak this change into those new versions instead of having to create an additional generation of update scripts. Patch by me, reviewed by Alexey Bashtanov Discussion: https://postgr.es/m/12237.1582833074@sss.pgh.pa.us
2020-04-01 16:32:33 +02:00
*
* This function is not used anymore, if the ltree extension has been
* updated to 1.2 or later.
*/
Datum
ltreeparentsel(PG_FUNCTION_ARGS)
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
double selec;
Improve selectivity estimation for assorted match-style operators. Quite a few matching operators such as JSONB's @> used "contsel" and "contjoinsel" as their selectivity estimators. That was a bad idea, because (a) contsel is only a stub, yielding a fixed default estimate, and (b) that default is 0.001, meaning we estimate these operators as five times more selective than equality, which is surely pretty silly. There's a good model for improving this in ltree's ltreeparentsel(): for any "var OP constant" query, we can try applying the operator to all of the column's MCV and histogram values, taking the latter as being a random sample of the non-MCV values. That code is actually 100% generic, except for the question of exactly what default selectivity ought to be plugged in when we don't have stats. Hence, migrate the guts of ltreeparentsel() into the core code, provide wrappers "matchingsel" and "matchingjoinsel" with a more-appropriate default estimate, and use those for the non-geometric operators that formerly used contsel (mostly JSONB containment operators and tsquery matching). Also apply this code to some match-like operators in hstore, ltree, and pg_trgm, including the former users of ltreeparentsel as well as ones that improperly used contsel. Since commit 911e70207 just created new versions of those extensions that we haven't released yet, we can sneak this change into those new versions instead of having to create an additional generation of update scripts. Patch by me, reviewed by Alexey Bashtanov Discussion: https://postgr.es/m/12237.1582833074@sss.pgh.pa.us
2020-04-01 16:32:33 +02:00
/* Use generic restriction selectivity logic, with default 0.001. */
selec = generic_restriction_selectivity(root, operator,
args, varRelid,
0.001);
PG_RETURN_FLOAT8((float8) selec);
}