diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml index 31753791cd..9366fdd240 100644 --- a/doc/src/sgml/textsearch.sgml +++ b/doc/src/sgml/textsearch.sgml @@ -1,4 +1,4 @@ - + Full Text Search @@ -940,6 +940,7 @@ SELECT plainto_tsquery('english', 'The Fat & Rats:C'); 4 divides the rank by the mean harmonic distance between extents + (this is implemented only by ts_rank_cd) @@ -953,17 +954,24 @@ SELECT plainto_tsquery('english', 'The Fat & Rats:C'); of unique words in document + + + 32 divides the rank by itself + 1 + + + If more than one flag bit is specified, the transformations are + applied in the order listed. It is important to note that the ranking functions do not use any global - information so it is impossible to produce a fair normalization to 1% or - 100%, as sometimes desired. However, a simple technique like - rank/(rank+1) can be applied. Of course, this is just - a cosmetic change, i.e., the ordering of the search results will not - change. + information, so it is impossible to produce a fair normalization to 1% or + 100% as sometimes desired. Normalization option 32 + (rank/(rank+1)) can be applied to scale all ranks + into the range zero to one, but of course this is just a cosmetic change; + it will not affect the ordering of the search results. @@ -991,7 +999,7 @@ ORDER BY rank DESC LIMIT 10; This is the same example using normalized ranking: -SELECT title, ts_rank_cd(textsearch, query)/(ts_rank_cd(textsearch, query) + 1) AS rank +SELECT title, ts_rank_cd(textsearch, query, 32 /* rank/(rank+1) */ ) AS rank FROM apod, to_tsquery('neutrino|(dark & matter)') query WHERE query @@ textsearch ORDER BY rank DESC LIMIT 10; diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c index bf0016d76b..297724710f 100644 --- a/src/backend/utils/adt/tsrank.c +++ b/src/backend/utils/adt/tsrank.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.8 2007/09/20 18:10:57 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.9 2007/11/14 23:43:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,13 +25,14 @@ static float weights[] = {0.1f, 0.2f, 0.4f, 1.0f}; #define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] ) -#define RANK_NO_NORM 0x00 +#define RANK_NO_NORM 0x00 #define RANK_NORM_LOGLENGTH 0x01 -#define RANK_NORM_LENGTH 0x02 -#define RANK_NORM_EXTDIST 0x04 -#define RANK_NORM_UNIQ 0x08 -#define RANK_NORM_LOGUNIQ 0x10 -#define DEF_NORM_METHOD RANK_NO_NORM +#define RANK_NORM_LENGTH 0x02 +#define RANK_NORM_EXTDIST 0x04 +#define RANK_NORM_UNIQ 0x08 +#define RANK_NORM_LOGUNIQ 0x10 +#define RANK_NORM_RDIVRPLUS1 0x20 +#define DEF_NORM_METHOD RANK_NO_NORM static float calc_rank_or(float *w, TSVector t, TSQuery q); static float calc_rank_and(float *w, TSVector t, TSQuery q); @@ -348,12 +349,17 @@ calc_rank(float *w, TSVector t, TSQuery q, int4 method) res /= (float) len; } + /* RANK_NORM_EXTDIST not applicable */ + if ((method & RANK_NORM_UNIQ) && t->size > 0) res /= (float) (t->size); if ((method & RANK_NORM_LOGUNIQ) && t->size > 0) res /= log((double) (t->size + 1)) / log(2.0); + if (method & RANK_NORM_RDIVRPLUS1) + res /= (res + 1); + return res; } @@ -762,7 +768,7 @@ calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method) Wdoc /= (double) len; } - if ((method & RANK_NORM_EXTDIST) && SumDist > 0) + if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0) Wdoc /= ((double) NExtent) / SumDist; if ((method & RANK_NORM_UNIQ) && txt->size > 0) @@ -771,6 +777,9 @@ calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method) if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0) Wdoc /= log((double) (txt->size + 1)) / log(2.0); + if (method & RANK_NORM_RDIVRPLUS1) + Wdoc /= (Wdoc + 1); + pfree(doc); pfree( qr.operandexist );