From 59d61409cd72a2e6688f588d17a73c1bc665df26 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 26 Apr 2006 22:33:36 +0000 Subject: [PATCH] Move ltree parentsel() selectivity function into /contrib/ltree. --- contrib/ltree/expected/ltree.out | 12 +- contrib/ltree/ltree.sql.in | 5 + contrib/ltree/ltree_op.c | 188 +++++++++++++++++++++++- src/backend/utils/adt/geo_selfuncs.c | 4 +- src/backend/utils/adt/selfuncs.c | 206 +-------------------------- src/include/catalog/catversion.h | 4 +- src/include/catalog/pg_proc.h | 4 +- src/include/utils/selfuncs.h | 28 +++- 8 files changed, 230 insertions(+), 221 deletions(-) diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out index 3ce1ecf293..2546490d22 100644 --- a/contrib/ltree/expected/ltree.out +++ b/contrib/ltree/expected/ltree.out @@ -2,15 +2,15 @@ psql:ltree.sql:7: NOTICE: type "ltree" is not yet defined DETAIL: Creating a shell type definition. psql:ltree.sql:12: NOTICE: argument type ltree is only a shell -psql:ltree.sql:299: NOTICE: type "lquery" is not yet defined +psql:ltree.sql:304: NOTICE: type "lquery" is not yet defined DETAIL: Creating a shell type definition. -psql:ltree.sql:304: NOTICE: argument type lquery is only a shell -psql:ltree.sql:410: NOTICE: type "ltxtquery" is not yet defined +psql:ltree.sql:309: NOTICE: argument type lquery is only a shell +psql:ltree.sql:415: NOTICE: type "ltxtquery" is not yet defined DETAIL: Creating a shell type definition. -psql:ltree.sql:415: NOTICE: argument type ltxtquery is only a shell -psql:ltree.sql:477: NOTICE: type "ltree_gist" is not yet defined +psql:ltree.sql:420: NOTICE: argument type ltxtquery is only a shell +psql:ltree.sql:482: NOTICE: type "ltree_gist" is not yet defined DETAIL: Creating a shell type definition. -psql:ltree.sql:482: NOTICE: argument type ltree_gist is only a shell +psql:ltree.sql:487: NOTICE: argument type ltree_gist is only a shell SELECT ''::ltree; ltree ------- diff --git a/contrib/ltree/ltree.sql.in b/contrib/ltree/ltree.sql.in index bdb8bdf52c..df32c0c501 100644 --- a/contrib/ltree/ltree.sql.in +++ b/contrib/ltree/ltree.sql.in @@ -225,6 +225,11 @@ RETURNS ltree AS 'MODULE_PATHNAME' LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE; +CREATE FUNCTION ltreeparentsel(internal, oid, internal, integer) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE; + CREATE OPERATOR @> ( LEFTARG = ltree, RIGHTARG = ltree, diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c index ec2d0fbf5d..6b894861dd 100644 --- a/contrib/ltree/ltree_op.c +++ b/contrib/ltree/ltree_op.c @@ -1,12 +1,19 @@ /* * op function for ltree * Teodor Sigaev - * $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.9 2006/03/11 04:38:29 momjian Exp $ + * $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.10 2006/04/26 22:32:36 momjian Exp $ */ #include "ltree.h" #include +#include "access/heapam.h" +#include "catalog/pg_statistic.h" +#include "nodes/relation.h" +#include "utils/lsyscache.h" +#include "utils/selfuncs.h" +#include "utils/syscache.h" + /* compare functions */ PG_FUNCTION_INFO_V1(ltree_cmp); PG_FUNCTION_INFO_V1(ltree_lt); @@ -44,6 +51,7 @@ Datum ltree_textadd(PG_FUNCTION_ARGS); Datum lca(PG_FUNCTION_ARGS); Datum ltree2text(PG_FUNCTION_ARGS); Datum text2ltree(PG_FUNCTION_ARGS); +Datum ltreeparentsel(PG_FUNCTION_ARGS); int ltree_compare(const ltree * a, const ltree * b) @@ -551,3 +559,181 @@ ltree2text(PG_FUNCTION_ARGS) PG_RETURN_POINTER(out); } + + +#define DEFAULT_PARENT_SEL 0.001 + +/* + * ltreeparentsel - Selectivity of parent relationship for ltree data types. + */ +Datum +ltreeparentsel(PG_FUNCTION_ARGS) +{ + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + VariableStatData vardata; + Node *other; + bool varonleft; + Datum *values; + int nvalues; + float4 *numbers; + int nnumbers; + double selec = 0.0; + + /* + * If expression is not variable <@ something or something <@ variable, + * then punt and return a default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL); + + /* + * If the something is a NULL constant, assume operator is strict and + * return zero, ie, operator will never return TRUE. + */ + if (IsA(other, Const) && + ((Const *) other)->constisnull) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(0.0); + } + + if (HeapTupleIsValid(vardata.statsTuple)) + { + Form_pg_statistic stats; + double mcvsum = 0.0; + double mcvsel = 0.0; + double hissel = 0.0; + + stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + + if (IsA(other, Const)) + { + /* Variable is being compared to a known non-null constant */ + Datum constval = ((Const *) other)->constvalue; + bool match = false; + int i; + + /* + * Is the constant "<@" to any of the column's most common values? + */ + if (get_attstatsslot(vardata.statsTuple, + vardata.atttype, vardata.atttypmod, + STATISTIC_KIND_MCV, InvalidOid, + &values, &nvalues, + &numbers, &nnumbers)) + { + FmgrInfo contproc; + + fmgr_info(get_opcode(operator), &contproc); + + for (i = 0; i < nvalues; i++) + { + /* be careful to apply operator right way 'round */ + if (varonleft) + match = DatumGetBool(FunctionCall2(&contproc, + values[i], + constval)); + else + match = DatumGetBool(FunctionCall2(&contproc, + constval, + values[i])); + + /* calculate total selectivity of all most-common-values */ + mcvsum += numbers[i]; + + /* calculate selectivity of matching most-common-values */ + if (match) + mcvsel += numbers[i]; + } + } + else + { + /* no most-common-values info available */ + values = NULL; + numbers = NULL; + i = nvalues = nnumbers = 0; + } + + free_attstatsslot(vardata.atttype, values, nvalues, NULL, 0); + + /* + * Is the constant "<@" to any of the column's histogram values? + */ + if (get_attstatsslot(vardata.statsTuple, + vardata.atttype, vardata.atttypmod, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + &values, &nvalues, + NULL, NULL)) + { + FmgrInfo contproc; + + fmgr_info(get_opcode(operator), &contproc); + + for (i = 0; i < nvalues; i++) + { + /* be careful to apply operator right way 'round */ + if (varonleft) + match = DatumGetBool(FunctionCall2(&contproc, + values[i], + constval)); + else + match = DatumGetBool(FunctionCall2(&contproc, + constval, + values[i])); + /* count matching histogram values */ + if (match) + hissel++; + } + + if (hissel > 0.0) + { + /* + * some matching values found inside histogram, divide + * matching entries number by total histogram entries to + * get the histogram related selectivity + */ + hissel /= nvalues; + } + } + else + { + /* no histogram info available */ + values = NULL; + i = nvalues = 0; + } + + free_attstatsslot(vardata.atttype, values, nvalues, + NULL, 0); + + + /* + * calculate selectivity based on MCV and histogram result + * histogram selectivity needs to be scaled down if there are any + * most-common-values + */ + selec = mcvsel + hissel * (1.0 - mcvsum); + + /* + * don't return 0.0 selectivity unless all table values are inside + * mcv + */ + if (selec == 0.0 && mcvsum != 1.0) + selec = DEFAULT_PARENT_SEL; + } + else + selec = DEFAULT_PARENT_SEL; + } + else + selec = DEFAULT_PARENT_SEL; + + ReleaseVariableStats(vardata); + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); +} diff --git a/src/backend/utils/adt/geo_selfuncs.c b/src/backend/utils/adt/geo_selfuncs.c index 1bc7f0679d..9c450c9e44 100644 --- a/src/backend/utils/adt/geo_selfuncs.c +++ b/src/backend/utils/adt/geo_selfuncs.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/geo_selfuncs.c,v 1.28 2006/04/26 18:28:29 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/geo_selfuncs.c,v 1.29 2006/04/26 22:32:52 momjian Exp $ * * XXX These are totally bogus. Perhaps someone will make them do * something reasonable, someday. @@ -20,6 +20,7 @@ #include "utils/geo_decls.h" + /* * Selectivity functions for geometric operators. These are bogus -- unless * we know the actual key distribution in the index, we can't make a good @@ -92,4 +93,3 @@ contjoinsel(PG_FUNCTION_ARGS) { PG_RETURN_FLOAT8(0.001); } - diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 810a8f6db1..1b37f38e64 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.200 2006/04/26 18:28:29 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.201 2006/04/26 22:32:56 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -111,26 +111,6 @@ #include "utils/syscache.h" -/* Return data from examine_variable and friends */ -typedef struct -{ - Node *var; /* the Var or expression tree */ - RelOptInfo *rel; /* Relation, or NULL if not identifiable */ - HeapTuple statsTuple; /* pg_statistic tuple, or NULL if none */ - /* NB: if statsTuple!=NULL, it must be freed when caller is done */ - Oid vartype; /* exposed type of expression */ - Oid atttype; /* type to pass to get_attstatsslot */ - int32 atttypmod; /* typmod to pass to get_attstatsslot */ - bool isunique; /* true if matched to a unique index */ -} VariableStatData; - -#define ReleaseVariableStats(vardata) \ - do { \ - if (HeapTupleIsValid((vardata).statsTuple)) \ - ReleaseSysCache((vardata).statsTuple); \ - } while(0) - - static double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, double *sumcommonp); static double ineq_histogram_selectivity(VariableStatData *vardata, @@ -158,9 +138,6 @@ static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, int rangelo, int rangehi); static char *convert_string_datum(Datum value, Oid typid); static double convert_timevalue_to_scalar(Datum value, Oid typid); -static bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, - VariableStatData *vardata, Node **other, - bool *varonleft); static void get_join_variables(PlannerInfo *root, List *args, VariableStatData *vardata1, VariableStatData *vardata2); @@ -3172,7 +3149,7 @@ convert_timevalue_to_scalar(Datum value, Oid typid) * Note: if there are Vars on both sides of the clause, we must fail, because * callers are expecting that the other side will act like a pseudoconstant. */ -static bool +bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft) @@ -4852,182 +4829,3 @@ gistcostestimate(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } - - -#define DEFAULT_PARENT_SEL 0.001 - -/* - * parentsel - Selectivity of parent relationship for ltree data types. - */ -Datum -parentsel(PG_FUNCTION_ARGS) -{ - PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); - Oid operator = PG_GETARG_OID(1); - List *args = (List *) PG_GETARG_POINTER(2); - int varRelid = PG_GETARG_INT32(3); - VariableStatData vardata; - Node *other; - bool varonleft; - Datum *values; - int nvalues; - float4 *numbers; - int nnumbers; - double selec = 0.0; - - /* - * If expression is not variable <@ something or something <@ variable, - * then punt and return a default estimate. - */ - if (!get_restriction_variable(root, args, varRelid, - &vardata, &other, &varonleft)) - PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL); - - /* - * If the something is a NULL constant, assume operator is strict and - * return zero, ie, operator will never return TRUE. - */ - if (IsA(other, Const) && - ((Const *) other)->constisnull) - { - ReleaseVariableStats(vardata); - PG_RETURN_FLOAT8(0.0); - } - - if (HeapTupleIsValid(vardata.statsTuple)) - { - Form_pg_statistic stats; - double mcvsum = 0.0; - double mcvsel = 0.0; - double hissel = 0.0; - - stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); - - if (IsA(other, Const)) - { - /* Variable is being compared to a known non-null constant */ - Datum constval = ((Const *) other)->constvalue; - bool match = false; - int i; - - /* - * Is the constant "<@" to any of the column's most common values? - */ - if (get_attstatsslot(vardata.statsTuple, - vardata.atttype, vardata.atttypmod, - STATISTIC_KIND_MCV, InvalidOid, - &values, &nvalues, - &numbers, &nnumbers)) - { - FmgrInfo contproc; - - fmgr_info(get_opcode(operator), &contproc); - - for (i = 0; i < nvalues; i++) - { - /* be careful to apply operator right way 'round */ - if (varonleft) - match = DatumGetBool(FunctionCall2(&contproc, - values[i], - constval)); - else - match = DatumGetBool(FunctionCall2(&contproc, - constval, - values[i])); - - /* calculate total selectivity of all most-common-values */ - mcvsum += numbers[i]; - - /* calculate selectivity of matching most-common-values */ - if (match) - mcvsel += numbers[i]; - } - } - else - { - /* no most-common-values info available */ - values = NULL; - numbers = NULL; - i = nvalues = nnumbers = 0; - } - - free_attstatsslot(vardata.atttype, values, nvalues, NULL, 0); - - /* - * Is the constant "<@" to any of the column's histogram values? - */ - if (get_attstatsslot(vardata.statsTuple, - vardata.atttype, vardata.atttypmod, - STATISTIC_KIND_HISTOGRAM, InvalidOid, - &values, &nvalues, - NULL, NULL)) - { - FmgrInfo contproc; - - fmgr_info(get_opcode(operator), &contproc); - - for (i = 0; i < nvalues; i++) - { - /* be careful to apply operator right way 'round */ - if (varonleft) - match = DatumGetBool(FunctionCall2(&contproc, - values[i], - constval)); - else - match = DatumGetBool(FunctionCall2(&contproc, - constval, - values[i])); - /* count matching histogram values */ - if (match) - hissel++; - } - - if (hissel > 0.0) - { - /* - * some matching values found inside histogram, divide - * matching entries number by total histogram entries to - * get the histogram related selectivity - */ - hissel /= nvalues; - } - } - else - { - /* no histogram info available */ - values = NULL; - i = nvalues = 0; - } - - free_attstatsslot(vardata.atttype, values, nvalues, - NULL, 0); - - - /* - * calculate selectivity based on MCV and histogram result - * histogram selectivity needs to be scaled down if there are any - * most-common-values - */ - selec = mcvsel + hissel * (1.0 - mcvsum); - - /* - * don't return 0.0 selectivity unless all table values are inside - * mcv - */ - if (selec == 0.0 && mcvsum != 1.0) - selec = DEFAULT_PARENT_SEL; - } - else - selec = DEFAULT_PARENT_SEL; - } - else - selec = DEFAULT_PARENT_SEL; - - ReleaseVariableStats(vardata); - - /* result should be in range, but make sure... */ - CLAMP_PROBABILITY(selec); - - PG_RETURN_FLOAT8((float8) selec); -} - diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index b92cc0ab05..ff44afe909 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.325 2006/04/26 18:30:10 momjian Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.326 2006/04/26 22:33:13 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200604261 +#define CATALOG_VERSION_NO 200604262 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 53bcf3f4e3..43c87f3647 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.407 2006/04/26 18:28:30 momjian Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.408 2006/04/26 22:33:17 momjian Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -3812,8 +3812,6 @@ DATA(insert OID = 2591 ( gist_circle_consistent PGNSP PGUID 12 f f t f i 3 16 " DESCR("GiST support"); DATA(insert OID = 2592 ( gist_circle_compress PGNSP PGUID 12 f f t f i 1 2281 "2281" _null_ _null_ _null_ gist_circle_compress - _null_ )); DESCR("GiST support"); -DATA(insert OID = 2599 ( parentsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ parentsel - _null_ )); -DESCR("enhanced restriction selectivity for ltree isparent comparison operators"); /* diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index df28d73843..ad484d8b49 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.29 2006/04/26 18:28:34 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.30 2006/04/26 22:33:36 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,7 @@ #define SELFUNCS_H #include "fmgr.h" +#include "access/htup.h" #include "nodes/relation.h" @@ -62,6 +63,26 @@ } while (0) +/* Return data from examine_variable and friends */ +typedef struct +{ + Node *var; /* the Var or expression tree */ + RelOptInfo *rel; /* Relation, or NULL if not identifiable */ + HeapTuple statsTuple; /* pg_statistic tuple, or NULL if none */ + /* NB: if statsTuple!=NULL, it must be freed when caller is done */ + Oid vartype; /* exposed type of expression */ + Oid atttype; /* type to pass to get_attstatsslot */ + int32 atttypmod; /* typmod to pass to get_attstatsslot */ + bool isunique; /* true if matched to a unique index */ +} VariableStatData; + +#define ReleaseVariableStats(vardata) \ + do { \ + if (HeapTupleIsValid((vardata).statsTuple)) \ + ReleaseSysCache((vardata).statsTuple); \ + } while(0) + + typedef enum { Pattern_Type_Like, Pattern_Type_Like_IC, @@ -133,7 +154,8 @@ extern Selectivity estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, extern Datum btcostestimate(PG_FUNCTION_ARGS); extern Datum hashcostestimate(PG_FUNCTION_ARGS); extern Datum gistcostestimate(PG_FUNCTION_ARGS); - -extern Datum parentsel(PG_FUNCTION_ARGS); +extern bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, + VariableStatData *vardata, Node **other, + bool *varonleft); #endif /* SELFUNCS_H */