postgresql/src/test/regress/expected/tsdicts.out

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

724 lines
20 KiB
Plaintext
Raw Normal View History

--Test text search dictionaries and configurations
-- Test ISpell dictionary with ispell affix file
CREATE TEXT SEARCH DICTIONARY ispell (
Template=ispell,
DictFile=ispell_sample,
AffFile=ispell_sample
);
SELECT ts_lexize('ispell', 'skies');
ts_lexize
-----------
{sky}
(1 row)
SELECT ts_lexize('ispell', 'bookings');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('ispell', 'booking');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('ispell', 'foot');
ts_lexize
-----------
{foot}
(1 row)
SELECT ts_lexize('ispell', 'foots');
ts_lexize
-----------
{foot}
(1 row)
SELECT ts_lexize('ispell', 'rebookings');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('ispell', 'rebooking');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('ispell', 'rebook');
ts_lexize
-----------
(1 row)
SELECT ts_lexize('ispell', 'unbookings');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('ispell', 'unbooking');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('ispell', 'unbook');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('ispell', 'footklubber');
ts_lexize
----------------
{foot,klubber}
(1 row)
SELECT ts_lexize('ispell', 'footballklubber');
ts_lexize
------------------------------------------------------
{footballklubber,foot,ball,klubber,football,klubber}
(1 row)
SELECT ts_lexize('ispell', 'ballyklubber');
ts_lexize
----------------
{ball,klubber}
(1 row)
SELECT ts_lexize('ispell', 'footballyklubber');
ts_lexize
---------------------
{foot,ball,klubber}
(1 row)
-- Test ISpell dictionary with hunspell affix file
CREATE TEXT SEARCH DICTIONARY hunspell (
Template=ispell,
DictFile=ispell_sample,
AffFile=hunspell_sample
);
SELECT ts_lexize('hunspell', 'skies');
ts_lexize
-----------
{sky}
(1 row)
SELECT ts_lexize('hunspell', 'bookings');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell', 'booking');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell', 'foot');
ts_lexize
-----------
{foot}
(1 row)
SELECT ts_lexize('hunspell', 'foots');
ts_lexize
-----------
{foot}
(1 row)
SELECT ts_lexize('hunspell', 'rebookings');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell', 'rebooking');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell', 'rebook');
ts_lexize
-----------
(1 row)
SELECT ts_lexize('hunspell', 'unbookings');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell', 'unbooking');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell', 'unbook');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell', 'footklubber');
ts_lexize
----------------
{foot,klubber}
(1 row)
SELECT ts_lexize('hunspell', 'footballklubber');
ts_lexize
------------------------------------------------------
{footballklubber,foot,ball,klubber,football,klubber}
(1 row)
SELECT ts_lexize('hunspell', 'ballyklubber');
ts_lexize
----------------
{ball,klubber}
(1 row)
SELECT ts_lexize('hunspell', 'footballyklubber');
ts_lexize
---------------------
{foot,ball,klubber}
(1 row)
-- Test ISpell dictionary with hunspell affix file with FLAG long parameter
CREATE TEXT SEARCH DICTIONARY hunspell_long (
Template=ispell,
DictFile=hunspell_sample_long,
AffFile=hunspell_sample_long
);
SELECT ts_lexize('hunspell_long', 'skies');
ts_lexize
-----------
{sky}
(1 row)
SELECT ts_lexize('hunspell_long', 'bookings');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell_long', 'booking');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell_long', 'foot');
ts_lexize
-----------
{foot}
(1 row)
SELECT ts_lexize('hunspell_long', 'foots');
ts_lexize
-----------
{foot}
(1 row)
SELECT ts_lexize('hunspell_long', 'rebookings');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell_long', 'rebooking');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell_long', 'rebook');
ts_lexize
-----------
(1 row)
SELECT ts_lexize('hunspell_long', 'unbookings');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell_long', 'unbooking');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell_long', 'unbook');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell_long', 'booked');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell_long', 'footklubber');
ts_lexize
----------------
{foot,klubber}
(1 row)
SELECT ts_lexize('hunspell_long', 'footballklubber');
ts_lexize
------------------------------------------------------
{footballklubber,foot,ball,klubber,football,klubber}
(1 row)
SELECT ts_lexize('hunspell_long', 'ballyklubber');
ts_lexize
----------------
{ball,klubber}
(1 row)
SELECT ts_lexize('hunspell_long', 'ballsklubber');
ts_lexize
----------------
{ball,klubber}
(1 row)
SELECT ts_lexize('hunspell_long', 'footballyklubber');
ts_lexize
---------------------
{foot,ball,klubber}
(1 row)
SELECT ts_lexize('hunspell_long', 'ex-machina');
ts_lexize
---------------
{ex-,machina}
(1 row)
-- Test ISpell dictionary with hunspell affix file with FLAG num parameter
CREATE TEXT SEARCH DICTIONARY hunspell_num (
Template=ispell,
DictFile=hunspell_sample_num,
AffFile=hunspell_sample_num
);
SELECT ts_lexize('hunspell_num', 'skies');
ts_lexize
-----------
{sky}
(1 row)
SELECT ts_lexize('hunspell_num', 'sk');
ts_lexize
-----------
{sky}
(1 row)
SELECT ts_lexize('hunspell_num', 'bookings');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell_num', 'booking');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell_num', 'foot');
ts_lexize
-----------
{foot}
(1 row)
SELECT ts_lexize('hunspell_num', 'foots');
ts_lexize
-----------
{foot}
(1 row)
SELECT ts_lexize('hunspell_num', 'rebookings');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell_num', 'rebooking');
ts_lexize
----------------
{booking,book}
(1 row)
SELECT ts_lexize('hunspell_num', 'rebook');
ts_lexize
-----------
(1 row)
SELECT ts_lexize('hunspell_num', 'unbookings');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell_num', 'unbooking');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell_num', 'unbook');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell_num', 'booked');
ts_lexize
-----------
{book}
(1 row)
SELECT ts_lexize('hunspell_num', 'footklubber');
ts_lexize
----------------
{foot,klubber}
(1 row)
SELECT ts_lexize('hunspell_num', 'footballklubber');
ts_lexize
------------------------------------------------------
{footballklubber,foot,ball,klubber,football,klubber}
(1 row)
SELECT ts_lexize('hunspell_num', 'ballyklubber');
ts_lexize
----------------
{ball,klubber}
(1 row)
SELECT ts_lexize('hunspell_num', 'footballyklubber');
ts_lexize
---------------------
{foot,ball,klubber}
(1 row)
-- Test suitability of affix and dict files
CREATE TEXT SEARCH DICTIONARY hunspell_err (
Template=ispell,
DictFile=ispell_sample,
AffFile=hunspell_sample_long
);
ERROR: invalid affix alias "GJUS"
CREATE TEXT SEARCH DICTIONARY hunspell_err (
Template=ispell,
DictFile=ispell_sample,
AffFile=hunspell_sample_num
);
ERROR: invalid affix flag "SZ\"
CREATE TEXT SEARCH DICTIONARY hunspell_invalid_1 (
Template=ispell,
DictFile=hunspell_sample_long,
AffFile=ispell_sample
);
CREATE TEXT SEARCH DICTIONARY hunspell_invalid_2 (
Template=ispell,
DictFile=hunspell_sample_long,
AffFile=hunspell_sample_num
);
CREATE TEXT SEARCH DICTIONARY hunspell_invalid_3 (
Template=ispell,
DictFile=hunspell_sample_num,
AffFile=ispell_sample
);
CREATE TEXT SEARCH DICTIONARY hunspell_err (
Template=ispell,
DictFile=hunspell_sample_num,
AffFile=hunspell_sample_long
);
ERROR: invalid affix alias "302,301,202,303"
-- Synonym dictionary
CREATE TEXT SEARCH DICTIONARY synonym (
Template=synonym,
Synonyms=synonym_sample
);
SELECT ts_lexize('synonym', 'PoStGrEs');
ts_lexize
-----------
{pgsql}
(1 row)
SELECT ts_lexize('synonym', 'Gogle');
ts_lexize
-----------
{googl}
(1 row)
SELECT ts_lexize('synonym', 'indices');
ts_lexize
-----------
{index}
(1 row)
-- test altering boolean parameters
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
dictinitoption
-----------------------------
synonyms = 'synonym_sample'
(1 row)
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 1);
SELECT ts_lexize('synonym', 'PoStGrEs');
ts_lexize
-----------
(1 row)
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
dictinitoption
------------------------------------------------
synonyms = 'synonym_sample', casesensitive = 1
(1 row)
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 2); -- fail
ERROR: casesensitive requires a Boolean value
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = off);
SELECT ts_lexize('synonym', 'PoStGrEs');
ts_lexize
-----------
{pgsql}
(1 row)
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
dictinitoption
----------------------------------------------------
synonyms = 'synonym_sample', casesensitive = 'off'
(1 row)
-- Create and simple test thesaurus dictionary
-- More tests in configuration checks because ts_lexize()
-- cannot pass more than one word to thesaurus.
CREATE TEXT SEARCH DICTIONARY thesaurus (
Template=thesaurus,
DictFile=thesaurus_sample,
Dictionary=english_stem
);
SELECT ts_lexize('thesaurus', 'one');
ts_lexize
-----------
{1}
(1 row)
-- Test ispell dictionary in configuration
CREATE TEXT SEARCH CONFIGURATION ispell_tst (
COPY=english
);
ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR
word, numword, asciiword, hword, numhword, asciihword, hword_part, hword_numpart, hword_asciipart
WITH ispell, english_stem;
SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
to_tsvector
----------------------------------------------------------------------------------------------------
'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
(1 row)
SELECT to_tsquery('ispell_tst', 'footballklubber');
to_tsquery
--------------------------------------------------------------------------
'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
(1 row)
SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky');
to_tsquery
------------------------------------------------------------------------
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
(1 row)
-- Test ispell dictionary with hunspell affix in configuration
CREATE TEXT SEARCH CONFIGURATION hunspell_tst (
COPY=ispell_tst
);
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
REPLACE ispell WITH hunspell;
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
to_tsvector
----------------------------------------------------------------------------------------------------
'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballklubber');
to_tsquery
--------------------------------------------------------------------------
'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
to_tsquery
------------------------------------------------------------------------
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
to_tsquery
-------------------------------------------------
( 'foot':B & 'ball':B & 'klubber':B ) <-> 'sky'
(1 row)
SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
Fix strange behavior (and possible crashes) in full text phrase search. In an attempt to simplify the tsquery matching engine, the original phrase search patch invented rewrite rules that would rearrange a tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator. But this approach had numerous problems. The rearrangement step was missed by ts_rewrite (and perhaps other places), allowing tsqueries to be created that would cause Assert failures or perhaps crashes at execution, as reported by Andreas Seltenreich. The rewrite rules effectively defined semantics for operators underneath PHRASE that were buggy, or at least unintuitive. And because rewriting was done in tsqueryin() rather than at execution, the rearrangement was user-visible, which is not very desirable --- for example, it might cause unexpected matches or failures to match in ts_rewrite. As a somewhat independent problem, the behavior of nested PHRASE operators was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not behave intuitively at all. To fix, get rid of the rewrite logic altogether, and instead teach the tsquery execution engine to manage AND/OR/NOT below a PHRASE operator by explicitly computing the match location(s) and match widths for these operators. This requires introducing some additional fields into the publicly visible ExecPhraseData struct; but since there's no way for third-party code to pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem as long as we don't move the offsets of the existing fields. Another related problem was that index searches supposed that "!x <-> y" could be lossily approximated as "!x & y", which isn't correct because the latter will reject, say, "x q y" which the query itself accepts. This required some tweaking in TS_execute_ternary along with the main tsquery engine. Back-patch to 9.6 where phrase operators were introduced. While this could be argued to change behavior more than we'd like in a stable branch, we have to do something about the crash hazards and index-vs-seqscan inconsistency, and it doesn't seem desirable to let the unintuitive behaviors induced by the rewriting implementation stand as precedent. Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
2016-12-21 21:18:25 +01:00
phraseto_tsquery
-------------------------------------------
( 'foot' & 'ball' & 'klubber' ) <-> 'sky'
(1 row)
-- Test ispell dictionary with hunspell affix with FLAG long in configuration
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
REPLACE hunspell WITH hunspell_long;
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
to_tsvector
----------------------------------------------------------------------------------------------------
'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballklubber');
to_tsquery
--------------------------------------------------------------------------
'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
to_tsquery
------------------------------------------------------------------------
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
(1 row)
-- Test ispell dictionary with hunspell affix with FLAG num in configuration
ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
REPLACE hunspell_long WITH hunspell_num;
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
to_tsvector
----------------------------------------------------------------------------------------------------
'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballklubber');
to_tsquery
--------------------------------------------------------------------------
'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
to_tsquery
------------------------------------------------------------------------
'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
(1 row)
-- Test synonym dictionary in configuration
CREATE TEXT SEARCH CONFIGURATION synonym_tst (
COPY=english
);
ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR
asciiword, hword_asciipart, asciihword
WITH synonym, english_stem;
SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgsql and pronounced as postgre');
to_tsvector
---------------------------------------------------
'call':4 'often':3 'pgsql':1,6,8,12 'pronounc':10
(1 row)
SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google');
to_tsvector
----------------------------------------------------------
'common':2 'googl':7,10 'instead':8 'mistak':3 'write':6
(1 row)
SELECT to_tsvector('synonym_tst', 'Indexes or indices - Which is right plural form of index?');
to_tsvector
----------------------------------------------
'form':8 'index':1,3,10 'plural':7 'right':6
(1 row)
SELECT to_tsquery('synonym_tst', 'Index & indices');
to_tsquery
---------------------
'index' & 'index':*
(1 row)
-- test thesaurus in configuration
-- see thesaurus_sample.ths to understand 'odd' resulting tsvector
CREATE TEXT SEARCH CONFIGURATION thesaurus_tst (
COPY=synonym_tst
);
ALTER TEXT SEARCH CONFIGURATION thesaurus_tst ALTER MAPPING FOR
asciiword, hword_asciipart, asciihword
WITH synonym, thesaurus, english_stem;
SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one');
to_tsvector
----------------------------------
'1':1,5 '12':3 '123':4 'pgsql':2
(1 row)
SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbreviation SN)');
to_tsvector
--------------------------------------------------------------
'abbrevi':10 'call':8 'new':4 'sn':1,9,11 'star':5 'usual':7
(1 row)
SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets');
to_tsvector
-------------------------------------------------------
'card':3,10 'invit':2,9 'like':6 'look':5 'order':1,8
(1 row)
Avoid unnecessary use of pg_strcasecmp for already-downcased identifiers. We have a lot of code in which option names, which from the user's viewpoint are logically keywords, are passed through the grammar as plain identifiers, and then matched to string literals during command execution. This approach avoids making words into lexer keywords unnecessarily. Some places matched these strings using plain strcmp, some using pg_strcasecmp. But the latter should be unnecessary since identifiers would have been downcased on their way through the parser. Aside from any efficiency concerns (probably not a big factor), the lack of consistency in this area creates a hazard of subtle bugs due to different places coming to different conclusions about whether two option names are the same or different. Hence, standardize on using strcmp() to match any option names that are expected to have been fed through the parser. This does create a user-visible behavioral change, which is that while formerly all of these would work: alter table foo set (fillfactor = 50); alter table foo set (FillFactor = 50); alter table foo set ("fillfactor" = 50); alter table foo set ("FillFactor" = 50); now the last case will fail because that double-quoted identifier is different from the others. However, none of our documentation says that you can use a quoted identifier in such contexts at all, and we should discourage doing so since it would break if we ever decide to parse such constructs as true lexer keywords rather than poor man's substitutes. So this shouldn't create a significant compatibility issue for users. Daniel Gustafsson, reviewed by Michael Paquier, small changes by me Discussion: https://postgr.es/m/29405B24-564E-476B-98C0-677A29805B84@yesql.se
2018-01-27 00:25:02 +01:00
-- invalid: non-lowercase quoted identifiers
CREATE TEXT SEARCH DICTIONARY tsdict_case
(
Template = ispell,
"DictFile" = ispell_sample,
"AffFile" = ispell_sample
);
ERROR: unrecognized Ispell parameter: "DictFile"
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
-- Test grammar for configurations
CREATE TEXT SEARCH CONFIGURATION dummy_tst (COPY=english);
-- Overridden mapping change with duplicated tokens.
Fix various issues with ALTER TEXT SEARCH CONFIGURATION This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4ebcb46e, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
2024-01-31 05:15:21 +01:00
ALTER TEXT SEARCH CONFIGURATION dummy_tst
ALTER MAPPING FOR word, word WITH ispell;
-- Not a token supported by the configuration's parser, fails.
ALTER TEXT SEARCH CONFIGURATION dummy_tst
DROP MAPPING FOR not_a_token, not_a_token;
ERROR: token type "not_a_token" does not exist
-- Not a token supported by the configuration's parser, fails even
-- with IF EXISTS.
ALTER TEXT SEARCH CONFIGURATION dummy_tst
DROP MAPPING IF EXISTS FOR not_a_token, not_a_token;
ERROR: token type "not_a_token" does not exist
-- Token supported by the configuration's parser, succeeds.
ALTER TEXT SEARCH CONFIGURATION dummy_tst
DROP MAPPING FOR word, word;
-- No mapping for token supported by the configuration's parser, fails.
ALTER TEXT SEARCH CONFIGURATION dummy_tst
DROP MAPPING FOR word;
ERROR: mapping for token type "word" does not exist
-- Token supported by the configuration's parser, cannot be found,
-- succeeds with IF EXISTS.
ALTER TEXT SEARCH CONFIGURATION dummy_tst
DROP MAPPING IF EXISTS FOR word, word;
NOTICE: mapping for token type "word" does not exist, skipping
-- Re-add mapping, with duplicated tokens supported by the parser.
ALTER TEXT SEARCH CONFIGURATION dummy_tst
ADD MAPPING FOR word, word WITH ispell;
-- Not a token supported by the configuration's parser, fails.
ALTER TEXT SEARCH CONFIGURATION dummy_tst
ADD MAPPING FOR not_a_token WITH ispell;
ERROR: token type "not_a_token" does not exist
DROP TEXT SEARCH CONFIGURATION dummy_tst;