Improve the gin index scan performance in pg_trgm.

Previous coding assumes too pessimistic upper bound of similarity
in consistent method of GIN.

Author: Fornaroli Christophe with comments by me.
This commit is contained in:
Teodor Sigaev 2015-12-25 13:05:13 +03:00
parent a9246fbf66
commit 25bfa7efd0
1 changed files with 20 additions and 8 deletions

View File

@ -190,11 +190,23 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
if (check[i])
ntrue++;
}
#ifdef DIVUNION
res = (nkeys == ntrue) ? true : ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >= trgm_limit) ? true : false);
#else
/*--------------------
* If DIVUNION is defined then similarity formula is:
* c / (len1 + len2 - c)
* where c is number of common trigrams and it stands as ntrue in
* this code. Here we don't know value of len2 but we can assume
* that c (ntrue) is a lower bound of len2, so upper bound of
* similarity is:
* c / (len1 + c - c) => c / len1
* If DIVUNION is not defined then similarity formula is:
* c / max(len1, len2)
* And again, c (ntrue) is a lower bound of len2, but c <= len1
* just by definition and, consequently, upper bound of
* similarity is just c / len1.
* So, independly on DIVUNION the upper bound formula is the same.
*/
res = (nkeys == 0) ? false : ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_limit) ? true : false);
#endif
break;
case ILikeStrategyNumber:
#ifndef IGNORECASE
@ -267,11 +279,11 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
if (check[i] != GIN_FALSE)
ntrue++;
}
#ifdef DIVUNION
res = (nkeys == ntrue) ? GIN_MAYBE : (((((float4) ntrue) / ((float4) (nkeys - ntrue))) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE);
#else
/*
* See comment in gin_trgm_consistent() about * upper bound formula
*/
res = (nkeys == 0) ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE);
#endif
break;
case ILikeStrategyNumber:
#ifndef IGNORECASE