2007-09-11 13:54:42 +02:00
|
|
|
--Test text search dictionaries and configurations
|
|
|
|
-- Test ISpell dictionary with ispell affix file
|
|
|
|
CREATE TEXT SEARCH DICTIONARY ispell (
|
|
|
|
Template=ispell,
|
|
|
|
DictFile=ispell_sample,
|
|
|
|
AffFile=ispell_sample
|
|
|
|
);
|
|
|
|
SELECT ts_lexize('ispell', 'skies');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{sky}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'bookings');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'booking');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'foot');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{foot}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'foots');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{foot}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'rebookings');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'rebooking');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'rebook');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'unbookings');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'unbooking');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'unbook');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'footklubber');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{foot,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'footballklubber');
|
|
|
|
ts_lexize
|
|
|
|
------------------------------------------------------
|
|
|
|
{footballklubber,foot,ball,klubber,football,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'ballyklubber');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{ball,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('ispell', 'footballyklubber');
|
|
|
|
ts_lexize
|
|
|
|
---------------------
|
|
|
|
{foot,ball,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- Test ISpell dictionary with hunspell affix file
|
|
|
|
CREATE TEXT SEARCH DICTIONARY hunspell (
|
|
|
|
Template=ispell,
|
|
|
|
DictFile=ispell_sample,
|
|
|
|
AffFile=hunspell_sample
|
|
|
|
);
|
|
|
|
SELECT ts_lexize('hunspell', 'skies');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{sky}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'bookings');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'booking');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'foot');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{foot}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'foots');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{foot}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'rebookings');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'rebooking');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'rebook');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'unbookings');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'unbooking');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'unbook');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'footklubber');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{foot,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'footballklubber');
|
|
|
|
ts_lexize
|
|
|
|
------------------------------------------------------
|
|
|
|
{footballklubber,foot,ball,klubber,football,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'ballyklubber');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{ball,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell', 'footballyklubber');
|
|
|
|
ts_lexize
|
|
|
|
---------------------
|
|
|
|
{foot,ball,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
-- Test ISpell dictionary with hunspell affix file with FLAG long parameter
|
|
|
|
CREATE TEXT SEARCH DICTIONARY hunspell_long (
|
|
|
|
Template=ispell,
|
|
|
|
DictFile=hunspell_sample_long,
|
|
|
|
AffFile=hunspell_sample_long
|
|
|
|
);
|
|
|
|
SELECT ts_lexize('hunspell_long', 'skies');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{sky}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_long', 'bookings');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_long', 'booking');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_long', 'foot');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{foot}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_long', 'foots');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{foot}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_long', 'rebookings');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_long', 'rebooking');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_long', 'rebook');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_long', 'unbookings');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_long', 'unbooking');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_long', 'unbook');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
2018-04-13 19:49:52 +02:00
|
|
|
SELECT ts_lexize('hunspell_long', 'booked');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
SELECT ts_lexize('hunspell_long', 'footklubber');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{foot,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_long', 'footballklubber');
|
|
|
|
ts_lexize
|
|
|
|
------------------------------------------------------
|
|
|
|
{footballklubber,foot,ball,klubber,football,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_long', 'ballyklubber');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{ball,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
2018-04-13 19:49:52 +02:00
|
|
|
SELECT ts_lexize('hunspell_long', 'ballsklubber');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{ball,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
SELECT ts_lexize('hunspell_long', 'footballyklubber');
|
|
|
|
ts_lexize
|
|
|
|
---------------------
|
|
|
|
{foot,ball,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
2018-04-13 19:49:52 +02:00
|
|
|
SELECT ts_lexize('hunspell_long', 'ex-machina');
|
|
|
|
ts_lexize
|
|
|
|
---------------
|
|
|
|
{ex-,machina}
|
|
|
|
(1 row)
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
-- Test ISpell dictionary with hunspell affix file with FLAG num parameter
|
|
|
|
CREATE TEXT SEARCH DICTIONARY hunspell_num (
|
|
|
|
Template=ispell,
|
|
|
|
DictFile=hunspell_sample_num,
|
|
|
|
AffFile=hunspell_sample_num
|
|
|
|
);
|
|
|
|
SELECT ts_lexize('hunspell_num', 'skies');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{sky}
|
|
|
|
(1 row)
|
|
|
|
|
2018-04-13 19:49:52 +02:00
|
|
|
SELECT ts_lexize('hunspell_num', 'sk');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{sky}
|
|
|
|
(1 row)
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
SELECT ts_lexize('hunspell_num', 'bookings');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_num', 'booking');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_num', 'foot');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{foot}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_num', 'foots');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{foot}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_num', 'rebookings');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_num', 'rebooking');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{booking,book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_num', 'rebook');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_num', 'unbookings');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_num', 'unbooking');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_num', 'unbook');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
2018-04-13 19:49:52 +02:00
|
|
|
SELECT ts_lexize('hunspell_num', 'booked');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{book}
|
|
|
|
(1 row)
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
SELECT ts_lexize('hunspell_num', 'footklubber');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{foot,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_num', 'footballklubber');
|
|
|
|
ts_lexize
|
|
|
|
------------------------------------------------------
|
|
|
|
{footballklubber,foot,ball,klubber,football,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_num', 'ballyklubber');
|
|
|
|
ts_lexize
|
|
|
|
----------------
|
|
|
|
{ball,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('hunspell_num', 'footballyklubber');
|
|
|
|
ts_lexize
|
|
|
|
---------------------
|
|
|
|
{foot,ball,klubber}
|
|
|
|
(1 row)
|
|
|
|
|
2019-11-02 21:45:32 +01:00
|
|
|
-- Test suitability of affix and dict files
|
|
|
|
CREATE TEXT SEARCH DICTIONARY hunspell_err (
|
|
|
|
Template=ispell,
|
|
|
|
DictFile=ispell_sample,
|
|
|
|
AffFile=hunspell_sample_long
|
|
|
|
);
|
|
|
|
ERROR: invalid affix alias "GJUS"
|
|
|
|
CREATE TEXT SEARCH DICTIONARY hunspell_err (
|
|
|
|
Template=ispell,
|
|
|
|
DictFile=ispell_sample,
|
|
|
|
AffFile=hunspell_sample_num
|
|
|
|
);
|
|
|
|
ERROR: invalid affix flag "SZ\"
|
|
|
|
CREATE TEXT SEARCH DICTIONARY hunspell_invalid_1 (
|
|
|
|
Template=ispell,
|
|
|
|
DictFile=hunspell_sample_long,
|
|
|
|
AffFile=ispell_sample
|
|
|
|
);
|
|
|
|
CREATE TEXT SEARCH DICTIONARY hunspell_invalid_2 (
|
|
|
|
Template=ispell,
|
|
|
|
DictFile=hunspell_sample_long,
|
|
|
|
AffFile=hunspell_sample_num
|
|
|
|
);
|
|
|
|
CREATE TEXT SEARCH DICTIONARY hunspell_invalid_3 (
|
|
|
|
Template=ispell,
|
|
|
|
DictFile=hunspell_sample_num,
|
|
|
|
AffFile=ispell_sample
|
|
|
|
);
|
|
|
|
CREATE TEXT SEARCH DICTIONARY hunspell_err (
|
|
|
|
Template=ispell,
|
|
|
|
DictFile=hunspell_sample_num,
|
|
|
|
AffFile=hunspell_sample_long
|
|
|
|
);
|
|
|
|
ERROR: invalid affix alias "302,301,202,303"
|
2017-02-06 10:33:58 +01:00
|
|
|
-- Synonym dictionary
|
2007-09-11 13:54:42 +02:00
|
|
|
CREATE TEXT SEARCH DICTIONARY synonym (
|
2010-11-23 21:27:50 +01:00
|
|
|
Template=synonym,
|
2007-09-11 13:54:42 +02:00
|
|
|
Synonyms=synonym_sample
|
|
|
|
);
|
|
|
|
SELECT ts_lexize('synonym', 'PoStGrEs');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{pgsql}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT ts_lexize('synonym', 'Gogle');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{googl}
|
|
|
|
(1 row)
|
|
|
|
|
2009-08-14 16:53:20 +02:00
|
|
|
SELECT ts_lexize('synonym', 'indices');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{index}
|
|
|
|
(1 row)
|
|
|
|
|
2020-03-10 17:29:59 +01:00
|
|
|
-- test altering boolean parameters
|
|
|
|
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
|
|
|
|
dictinitoption
|
|
|
|
-----------------------------
|
|
|
|
synonyms = 'synonym_sample'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 1);
|
|
|
|
SELECT ts_lexize('synonym', 'PoStGrEs');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
|
|
|
|
dictinitoption
|
|
|
|
------------------------------------------------
|
|
|
|
synonyms = 'synonym_sample', casesensitive = 1
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 2); -- fail
|
|
|
|
ERROR: casesensitive requires a Boolean value
|
|
|
|
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = off);
|
|
|
|
SELECT ts_lexize('synonym', 'PoStGrEs');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{pgsql}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
|
|
|
|
dictinitoption
|
|
|
|
----------------------------------------------------
|
|
|
|
synonyms = 'synonym_sample', casesensitive = 'off'
|
|
|
|
(1 row)
|
|
|
|
|
2007-09-11 13:54:42 +02:00
|
|
|
-- Create and simple test thesaurus dictionary
|
2007-10-23 22:46:12 +02:00
|
|
|
-- More tests in configuration checks because ts_lexize()
|
|
|
|
-- cannot pass more than one word to thesaurus.
|
2007-09-11 13:54:42 +02:00
|
|
|
CREATE TEXT SEARCH DICTIONARY thesaurus (
|
|
|
|
Template=thesaurus,
|
2010-11-23 21:27:50 +01:00
|
|
|
DictFile=thesaurus_sample,
|
2007-09-11 13:54:42 +02:00
|
|
|
Dictionary=english_stem
|
|
|
|
);
|
|
|
|
SELECT ts_lexize('thesaurus', 'one');
|
|
|
|
ts_lexize
|
|
|
|
-----------
|
|
|
|
{1}
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- Test ispell dictionary in configuration
|
|
|
|
CREATE TEXT SEARCH CONFIGURATION ispell_tst (
|
|
|
|
COPY=english
|
|
|
|
);
|
|
|
|
ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR
|
2007-10-23 22:46:12 +02:00
|
|
|
word, numword, asciiword, hword, numhword, asciihword, hword_part, hword_numpart, hword_asciipart
|
2007-09-11 13:54:42 +02:00
|
|
|
WITH ispell, english_stem;
|
|
|
|
SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
|
|
|
|
to_tsvector
|
|
|
|
----------------------------------------------------------------------------------------------------
|
2008-05-16 18:31:02 +02:00
|
|
|
'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
|
2007-09-11 13:54:42 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('ispell_tst', 'footballklubber');
|
2016-04-07 17:44:18 +02:00
|
|
|
to_tsquery
|
|
|
|
--------------------------------------------------------------------------
|
|
|
|
'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
|
2007-09-11 13:54:42 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky');
|
|
|
|
to_tsquery
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- Test ispell dictionary with hunspell affix in configuration
|
|
|
|
CREATE TEXT SEARCH CONFIGURATION hunspell_tst (
|
|
|
|
COPY=ispell_tst
|
|
|
|
);
|
|
|
|
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
|
|
|
|
REPLACE ispell WITH hunspell;
|
|
|
|
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
|
|
|
|
to_tsvector
|
|
|
|
----------------------------------------------------------------------------------------------------
|
2016-03-04 18:08:10 +01:00
|
|
|
'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('hunspell_tst', 'footballklubber');
|
2016-04-07 17:44:18 +02:00
|
|
|
to_tsquery
|
|
|
|
--------------------------------------------------------------------------
|
|
|
|
'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
|
2016-03-04 18:08:10 +01:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
|
|
|
|
to_tsquery
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
|
|
|
|
(1 row)
|
|
|
|
|
2016-04-07 17:44:18 +02:00
|
|
|
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
|
Fix strange behavior (and possible crashes) in full text phrase search.
In an attempt to simplify the tsquery matching engine, the original
phrase search patch invented rewrite rules that would rearrange a
tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator.
But this approach had numerous problems. The rearrangement step was
missed by ts_rewrite (and perhaps other places), allowing tsqueries
to be created that would cause Assert failures or perhaps crashes at
execution, as reported by Andreas Seltenreich. The rewrite rules
effectively defined semantics for operators underneath PHRASE that were
buggy, or at least unintuitive. And because rewriting was done in
tsqueryin() rather than at execution, the rearrangement was user-visible,
which is not very desirable --- for example, it might cause unexpected
matches or failures to match in ts_rewrite.
As a somewhat independent problem, the behavior of nested PHRASE operators
was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not
behave intuitively at all.
To fix, get rid of the rewrite logic altogether, and instead teach the
tsquery execution engine to manage AND/OR/NOT below a PHRASE operator
by explicitly computing the match location(s) and match widths for these
operators.
This requires introducing some additional fields into the publicly visible
ExecPhraseData struct; but since there's no way for third-party code to
pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem
as long as we don't move the offsets of the existing fields.
Another related problem was that index searches supposed that "!x <-> y"
could be lossily approximated as "!x & y", which isn't correct because
the latter will reject, say, "x q y" which the query itself accepts.
This required some tweaking in TS_execute_ternary along with the main
tsquery engine.
Back-patch to 9.6 where phrase operators were introduced. While this
could be argued to change behavior more than we'd like in a stable branch,
we have to do something about the crash hazards and index-vs-seqscan
inconsistency, and it doesn't seem desirable to let the unintuitive
behaviors induced by the rewriting implementation stand as precedent.
Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us
Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
|
|
|
to_tsquery
|
|
|
|
-------------------------------------------------
|
|
|
|
( 'foot':B & 'ball':B & 'klubber':B ) <-> 'sky'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
|
Fix strange behavior (and possible crashes) in full text phrase search.
In an attempt to simplify the tsquery matching engine, the original
phrase search patch invented rewrite rules that would rearrange a
tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator.
But this approach had numerous problems. The rearrangement step was
missed by ts_rewrite (and perhaps other places), allowing tsqueries
to be created that would cause Assert failures or perhaps crashes at
execution, as reported by Andreas Seltenreich. The rewrite rules
effectively defined semantics for operators underneath PHRASE that were
buggy, or at least unintuitive. And because rewriting was done in
tsqueryin() rather than at execution, the rearrangement was user-visible,
which is not very desirable --- for example, it might cause unexpected
matches or failures to match in ts_rewrite.
As a somewhat independent problem, the behavior of nested PHRASE operators
was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not
behave intuitively at all.
To fix, get rid of the rewrite logic altogether, and instead teach the
tsquery execution engine to manage AND/OR/NOT below a PHRASE operator
by explicitly computing the match location(s) and match widths for these
operators.
This requires introducing some additional fields into the publicly visible
ExecPhraseData struct; but since there's no way for third-party code to
pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem
as long as we don't move the offsets of the existing fields.
Another related problem was that index searches supposed that "!x <-> y"
could be lossily approximated as "!x & y", which isn't correct because
the latter will reject, say, "x q y" which the query itself accepts.
This required some tweaking in TS_execute_ternary along with the main
tsquery engine.
Back-patch to 9.6 where phrase operators were introduced. While this
could be argued to change behavior more than we'd like in a stable branch,
we have to do something about the crash hazards and index-vs-seqscan
inconsistency, and it doesn't seem desirable to let the unintuitive
behaviors induced by the rewriting implementation stand as precedent.
Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us
Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
|
|
|
phraseto_tsquery
|
|
|
|
-------------------------------------------
|
|
|
|
( 'foot' & 'ball' & 'klubber' ) <-> 'sky'
|
2016-04-07 17:44:18 +02:00
|
|
|
(1 row)
|
|
|
|
|
2016-03-04 18:08:10 +01:00
|
|
|
-- Test ispell dictionary with hunspell affix with FLAG long in configuration
|
|
|
|
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
|
|
|
|
REPLACE hunspell WITH hunspell_long;
|
|
|
|
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
|
|
|
|
to_tsvector
|
|
|
|
----------------------------------------------------------------------------------------------------
|
|
|
|
'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('hunspell_tst', 'footballklubber');
|
2016-04-07 17:44:18 +02:00
|
|
|
to_tsquery
|
|
|
|
--------------------------------------------------------------------------
|
|
|
|
'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
|
2016-03-04 18:08:10 +01:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
|
|
|
|
to_tsquery
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- Test ispell dictionary with hunspell affix with FLAG num in configuration
|
|
|
|
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
|
|
|
|
REPLACE hunspell_long WITH hunspell_num;
|
|
|
|
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
|
|
|
|
to_tsvector
|
|
|
|
----------------------------------------------------------------------------------------------------
|
2008-05-16 18:31:02 +02:00
|
|
|
'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
|
2007-09-11 13:54:42 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('hunspell_tst', 'footballklubber');
|
2016-04-07 17:44:18 +02:00
|
|
|
to_tsquery
|
|
|
|
--------------------------------------------------------------------------
|
|
|
|
'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
|
2007-09-11 13:54:42 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
|
|
|
|
to_tsquery
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
-- Test synonym dictionary in configuration
|
|
|
|
CREATE TEXT SEARCH CONFIGURATION synonym_tst (
|
|
|
|
COPY=english
|
|
|
|
);
|
2010-11-23 21:27:50 +01:00
|
|
|
ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR
|
|
|
|
asciiword, hword_asciipart, asciihword
|
2007-09-11 13:54:42 +02:00
|
|
|
WITH synonym, english_stem;
|
|
|
|
SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre');
|
|
|
|
to_tsvector
|
|
|
|
---------------------------------------------------
|
|
|
|
'call':4 'often':3 'pgsql':1,6,8,12 'pronounc':10
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google');
|
|
|
|
to_tsvector
|
|
|
|
----------------------------------------------------------
|
2008-05-16 18:31:02 +02:00
|
|
|
'common':2 'googl':7,10 'instead':8 'mistak':3 'write':6
|
2007-09-11 13:54:42 +02:00
|
|
|
(1 row)
|
|
|
|
|
2009-08-14 16:53:20 +02:00
|
|
|
SELECT to_tsvector('synonym_tst', 'Indexes or indices - Which is right plural form of index?');
|
|
|
|
to_tsvector
|
|
|
|
----------------------------------------------
|
|
|
|
'form':8 'index':1,3,10 'plural':7 'right':6
|
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsquery('synonym_tst', 'Index & indices');
|
|
|
|
to_tsquery
|
|
|
|
---------------------
|
|
|
|
'index' & 'index':*
|
|
|
|
(1 row)
|
|
|
|
|
2007-09-11 13:54:42 +02:00
|
|
|
-- test thesaurus in configuration
|
|
|
|
-- see thesaurus_sample.ths to understand 'odd' resulting tsvector
|
|
|
|
CREATE TEXT SEARCH CONFIGURATION thesaurus_tst (
|
|
|
|
COPY=synonym_tst
|
|
|
|
);
|
2010-11-23 21:27:50 +01:00
|
|
|
ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR
|
|
|
|
asciiword, hword_asciipart, asciihword
|
2007-09-11 13:54:42 +02:00
|
|
|
WITH synonym, thesaurus, english_stem;
|
|
|
|
SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one');
|
|
|
|
to_tsvector
|
|
|
|
----------------------------------
|
|
|
|
'1':1,5 '12':3 '123':4 'pgsql':2
|
|
|
|
(1 row)
|
|
|
|
|
2017-03-14 17:57:10 +01:00
|
|
|
SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbreviation SN)');
|
|
|
|
to_tsvector
|
|
|
|
--------------------------------------------------------------
|
|
|
|
'abbrevi':10 'call':8 'new':4 'sn':1,9,11 'star':5 'usual':7
|
2007-09-11 13:54:42 +02:00
|
|
|
(1 row)
|
|
|
|
|
|
|
|
SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets');
|
2007-11-10 16:39:34 +01:00
|
|
|
to_tsvector
|
|
|
|
-------------------------------------------------------
|
2008-05-16 18:31:02 +02:00
|
|
|
'card':3,10 'invit':2,9 'like':6 'look':5 'order':1,8
|
2007-09-11 13:54:42 +02:00
|
|
|
(1 row)
|
|
|
|
|
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers.
We have a lot of code in which option names, which from the user's
viewpoint are logically keywords, are passed through the grammar as plain
identifiers, and then matched to string literals during command execution.
This approach avoids making words into lexer keywords unnecessarily. Some
places matched these strings using plain strcmp, some using pg_strcasecmp.
But the latter should be unnecessary since identifiers would have been
downcased on their way through the parser. Aside from any efficiency
concerns (probably not a big factor), the lack of consistency in this area
creates a hazard of subtle bugs due to different places coming to different
conclusions about whether two option names are the same or different.
Hence, standardize on using strcmp() to match any option names that are
expected to have been fed through the parser.
This does create a user-visible behavioral change, which is that while
formerly all of these would work:
alter table foo set (fillfactor = 50);
alter table foo set (FillFactor = 50);
alter table foo set ("fillfactor" = 50);
alter table foo set ("FillFactor" = 50);
now the last case will fail because that double-quoted identifier is
different from the others. However, none of our documentation says that
you can use a quoted identifier in such contexts at all, and we should
discourage doing so since it would break if we ever decide to parse such
constructs as true lexer keywords rather than poor man's substitutes.
So this shouldn't create a significant compatibility issue for users.
Daniel Gustafsson, reviewed by Michael Paquier, small changes by me
Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
|
|
|
-- invalid: non-lowercase quoted identifiers
|
|
|
|
CREATE TEXT SEARCH DICTIONARY tsdict_case
|
|
|
|
(
|
|
|
|
Template = ispell,
|
|
|
|
"DictFile" = ispell_sample,
|
|
|
|
"AffFile" = ispell_sample
|
|
|
|
);
|
|
|
|
ERROR: unrecognized Ispell parameter: "DictFile"
|
2024-01-31 05:15:21 +01:00
|
|
|
-- Test grammar for configurations
|
|
|
|
CREATE TEXT SEARCH CONFIGURATION dummy_tst (COPY=english);
|
2024-04-18 21:28:07 +02:00
|
|
|
-- Overridden mapping change with duplicated tokens.
|
2024-01-31 05:15:21 +01:00
|
|
|
ALTER TEXT SEARCH CONFIGURATION dummy_tst
|
|
|
|
ALTER MAPPING FOR word, word WITH ispell;
|
|
|
|
-- Not a token supported by the configuration's parser, fails.
|
|
|
|
ALTER TEXT SEARCH CONFIGURATION dummy_tst
|
|
|
|
DROP MAPPING FOR not_a_token, not_a_token;
|
|
|
|
ERROR: token type "not_a_token" does not exist
|
|
|
|
-- Not a token supported by the configuration's parser, fails even
|
|
|
|
-- with IF EXISTS.
|
|
|
|
ALTER TEXT SEARCH CONFIGURATION dummy_tst
|
|
|
|
DROP MAPPING IF EXISTS FOR not_a_token, not_a_token;
|
|
|
|
ERROR: token type "not_a_token" does not exist
|
|
|
|
-- Token supported by the configuration's parser, succeeds.
|
|
|
|
ALTER TEXT SEARCH CONFIGURATION dummy_tst
|
|
|
|
DROP MAPPING FOR word, word;
|
|
|
|
-- No mapping for token supported by the configuration's parser, fails.
|
|
|
|
ALTER TEXT SEARCH CONFIGURATION dummy_tst
|
|
|
|
DROP MAPPING FOR word;
|
|
|
|
ERROR: mapping for token type "word" does not exist
|
|
|
|
-- Token supported by the configuration's parser, cannot be found,
|
|
|
|
-- succeeds with IF EXISTS.
|
|
|
|
ALTER TEXT SEARCH CONFIGURATION dummy_tst
|
|
|
|
DROP MAPPING IF EXISTS FOR word, word;
|
|
|
|
NOTICE: mapping for token type "word" does not exist, skipping
|
|
|
|
-- Re-add mapping, with duplicated tokens supported by the parser.
|
|
|
|
ALTER TEXT SEARCH CONFIGURATION dummy_tst
|
|
|
|
ADD MAPPING FOR word, word WITH ispell;
|
|
|
|
-- Not a token supported by the configuration's parser, fails.
|
|
|
|
ALTER TEXT SEARCH CONFIGURATION dummy_tst
|
|
|
|
ADD MAPPING FOR not_a_token WITH ispell;
|
|
|
|
ERROR: token type "not_a_token" does not exist
|
|
|
|
DROP TEXT SEARCH CONFIGURATION dummy_tst;
|