GUC variable pg_trgm.similarity_threshold insead of set_limit()

Use GUC variable pg_trgm.similarity_threshold insead of
set_limit()/show_limit() which was introduced when defining GUC varuables
by modules was absent.

Author: Artur Zakirov
This commit is contained in:
Teodor Sigaev 2016-03-16 17:44:58 +03:00
parent f9e5ed61ed
commit 5871b88487
6 changed files with 78 additions and 15 deletions

View File

@ -3,11 +3,13 @@
-- complain if script is sourced in psql, rather than via CREATE EXTENSION -- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION pg_trgm" to load this file. \quit \echo Use "CREATE EXTENSION pg_trgm" to load this file. \quit
-- Deprecated function
CREATE FUNCTION set_limit(float4) CREATE FUNCTION set_limit(float4)
RETURNS float4 RETURNS float4
AS 'MODULE_PATHNAME' AS 'MODULE_PATHNAME'
LANGUAGE C STRICT VOLATILE; LANGUAGE C STRICT VOLATILE;
-- Deprecated function
CREATE FUNCTION show_limit() CREATE FUNCTION show_limit()
RETURNS float4 RETURNS float4
AS 'MODULE_PATHNAME' AS 'MODULE_PATHNAME'
@ -26,7 +28,7 @@ LANGUAGE C STRICT IMMUTABLE;
CREATE FUNCTION similarity_op(text,text) CREATE FUNCTION similarity_op(text,text)
RETURNS bool RETURNS bool
AS 'MODULE_PATHNAME' AS 'MODULE_PATHNAME'
LANGUAGE C STRICT STABLE; -- stable because depends on trgm_limit LANGUAGE C STRICT STABLE; -- stable because depends on pg_trgm.similarity_threshold
CREATE OPERATOR % ( CREATE OPERATOR % (
LEFTARG = text, LEFTARG = text,

View File

@ -105,7 +105,7 @@ typedef char *BITVECP;
typedef struct TrgmPackedGraph TrgmPackedGraph; typedef struct TrgmPackedGraph TrgmPackedGraph;
extern float4 trgm_limit; extern double similarity_threshold;
extern uint32 trgm2int(trgm *ptr); extern uint32 trgm2int(trgm *ptr);
extern void compact_trigram(trgm *tptr, char *str, int bytelen); extern void compact_trigram(trgm *tptr, char *str, int bytelen);

View File

@ -206,7 +206,9 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
* similarity is just c / len1. * similarity is just c / len1.
* So, independly on DIVUNION the upper bound formula is the same. * So, independly on DIVUNION the upper bound formula is the same.
*/ */
res = (nkeys == 0) ? false : ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_limit) ? true : false); res = (nkeys == 0) ? false :
((((((float4) ntrue) / ((float4) nkeys))) >= similarity_threshold)
? true : false);
break; break;
case ILikeStrategyNumber: case ILikeStrategyNumber:
#ifndef IGNORECASE #ifndef IGNORECASE
@ -283,7 +285,9 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
/* /*
* See comment in gin_trgm_consistent() about * upper bound formula * See comment in gin_trgm_consistent() about * upper bound formula
*/ */
res = (nkeys == 0) ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE); res = (nkeys == 0) ? GIN_FALSE :
(((((float4) ntrue) / ((float4) nkeys)) >= similarity_threshold)
? GIN_MAYBE : GIN_FALSE);
break; break;
case ILikeStrategyNumber: case ILikeStrategyNumber:
#ifndef IGNORECASE #ifndef IGNORECASE

View File

@ -294,7 +294,8 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
float4 tmpsml = cnt_sml(key, qtrg); float4 tmpsml = cnt_sml(key, qtrg);
/* strange bug at freebsd 5.2.1 and gcc 3.3.3 */ /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */
res = (*(int *) &tmpsml == *(int *) &trgm_limit || tmpsml > trgm_limit) ? true : false; res = (*(int *) &tmpsml == *(int *) &similarity_threshold
|| tmpsml > similarity_threshold) ? true : false;
} }
else if (ISALLTRUE(key)) else if (ISALLTRUE(key))
{ /* non-leaf contains signature */ { /* non-leaf contains signature */
@ -308,7 +309,8 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
if (len == 0) if (len == 0)
res = false; res = false;
else else
res = (((((float8) count) / ((float8) len))) >= trgm_limit) ? true : false; res = (((((float8) count) / ((float8) len))) >= similarity_threshold)
? true : false;
} }
break; break;
case ILikeStrategyNumber: case ILikeStrategyNumber:

View File

@ -14,7 +14,10 @@
PG_MODULE_MAGIC; PG_MODULE_MAGIC;
float4 trgm_limit = 0.3f; /* GUC variables */
double similarity_threshold = 0.3f;
void _PG_init(void);
PG_FUNCTION_INFO_V1(set_limit); PG_FUNCTION_INFO_V1(set_limit);
PG_FUNCTION_INFO_V1(show_limit); PG_FUNCTION_INFO_V1(show_limit);
@ -23,22 +26,52 @@ PG_FUNCTION_INFO_V1(similarity);
PG_FUNCTION_INFO_V1(similarity_dist); PG_FUNCTION_INFO_V1(similarity_dist);
PG_FUNCTION_INFO_V1(similarity_op); PG_FUNCTION_INFO_V1(similarity_op);
/*
* Module load callback
*/
void
_PG_init(void)
{
/* Define custom GUC variables. */
DefineCustomRealVariable("pg_trgm.similarity_threshold",
"Sets the threshold used by the %% operator.",
"Valid range is 0.0 .. 1.0.",
&similarity_threshold,
0.3,
0.0,
1.0,
PGC_USERSET,
0,
NULL,
NULL,
NULL);
}
/*
* Deprecated function.
* Use "pg_trgm.similarity_threshold" GUC variable instead of this function
*/
Datum Datum
set_limit(PG_FUNCTION_ARGS) set_limit(PG_FUNCTION_ARGS)
{ {
float4 nlimit = PG_GETARG_FLOAT4(0); float4 nlimit = PG_GETARG_FLOAT4(0);
if (nlimit < 0 || nlimit > 1.0) if (nlimit < 0 || nlimit > 1.0)
elog(ERROR, "wrong limit, should be between 0 and 1"); ereport(ERROR,
trgm_limit = nlimit; (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
PG_RETURN_FLOAT4(trgm_limit); errmsg("wrong limit, should be between 0 and 1")));
similarity_threshold = nlimit;
PG_RETURN_FLOAT4(similarity_threshold);
} }
/*
* Deprecated function.
* Use "pg_trgm.similarity_threshold" GUC variable instead of this function
*/
Datum Datum
show_limit(PG_FUNCTION_ARGS) show_limit(PG_FUNCTION_ARGS)
{ {
PG_RETURN_FLOAT4(trgm_limit); PG_RETURN_FLOAT4(similarity_threshold);
} }
static int static int
@ -720,5 +753,5 @@ similarity_op(PG_FUNCTION_ARGS)
PG_GETARG_DATUM(0), PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1))); PG_GETARG_DATUM(1)));
PG_RETURN_BOOL(res >= trgm_limit); PG_RETURN_BOOL(res >= similarity_threshold);
} }

View File

@ -99,7 +99,8 @@
Returns the current similarity threshold used by the <literal>%</> Returns the current similarity threshold used by the <literal>%</>
operator. This sets the minimum similarity between operator. This sets the minimum similarity between
two words for them to be considered similar enough to two words for them to be considered similar enough to
be misspellings of each other, for example. be misspellings of each other, for example
(<emphasis>deprecated</emphasis>).
</entry> </entry>
</row> </row>
<row> <row>
@ -108,7 +109,7 @@
<entry> <entry>
Sets the current similarity threshold that is used by the <literal>%</> Sets the current similarity threshold that is used by the <literal>%</>
operator. The threshold must be between 0 and 1 (default is 0.3). operator. The threshold must be between 0 and 1 (default is 0.3).
Returns the same value passed in. Returns the same value passed in (<emphasis>deprecated</emphasis>).
</entry> </entry>
</row> </row>
</tbody> </tbody>
@ -133,7 +134,7 @@
<entry> <entry>
Returns <literal>true</> if its arguments have a similarity that is Returns <literal>true</> if its arguments have a similarity that is
greater than the current similarity threshold set by greater than the current similarity threshold set by
<function>set_limit</>. <varname>pg_trgm.similarity_threshold</>.
</entry> </entry>
</row> </row>
<row> <row>
@ -149,6 +150,27 @@
</table> </table>
</sect2> </sect2>
<sect2>
<title>GUC Parameters</title>
<variablelist>
<varlistentry id="guc-pgtrgm-similarity-threshold" xreflabel="pg_trgm.similarity_threshold">
<term>
<varname>pg_trgm.similarity_threshold</> (<type>real</type>)
<indexterm>
<primary><varname>pg_trgm.similarity_threshold</> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Sets the current similarity threshold that is used by the <literal>%</>
operator. The threshold must be between 0 and 1 (default is 0.3).
</para>
</listitem>
</varlistentry>
</variablelist>
</sect2>
<sect2> <sect2>
<title>Index Support</title> <title>Index Support</title>