mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-10-01 15:11:18 +02:00
27b62377b4
If the ICU locale is not specified, initialize the default collator and retrieve the locale name from that. Discussion: https://postgr.es/m/510d284759f6e943ce15096167760b2edcb2e700.camel@j-davis.com Reviewed-by: Peter Eisentraut
153 lines
2.2 KiB
Plaintext
153 lines
2.2 KiB
Plaintext
-- unaccent is broken if the default collation is provided by ICU and
|
||
-- LC_CTYPE=C
|
||
SELECT current_setting('lc_ctype') = 'C' AND
|
||
(SELECT datlocprovider='i' FROM pg_database
|
||
WHERE datname=current_database())
|
||
AS skip_test \gset
|
||
\if :skip_test
|
||
\quit
|
||
\endif
|
||
CREATE EXTENSION unaccent;
|
||
-- must have a UTF8 database
|
||
SELECT getdatabaseencoding();
|
||
getdatabaseencoding
|
||
---------------------
|
||
UTF8
|
||
(1 row)
|
||
|
||
SET client_encoding TO 'UTF8';
|
||
SELECT unaccent('foobar');
|
||
unaccent
|
||
----------
|
||
foobar
|
||
(1 row)
|
||
|
||
SELECT unaccent('ёлка');
|
||
unaccent
|
||
----------
|
||
елка
|
||
(1 row)
|
||
|
||
SELECT unaccent('ЁЖИК');
|
||
unaccent
|
||
----------
|
||
ЕЖИК
|
||
(1 row)
|
||
|
||
SELECT unaccent('˃˖˗˜');
|
||
unaccent
|
||
----------
|
||
>+-~
|
||
(1 row)
|
||
|
||
SELECT unaccent('À'); -- Remove combining diacritical 0x0300
|
||
unaccent
|
||
----------
|
||
A
|
||
(1 row)
|
||
|
||
SELECT unaccent('℃℉'); -- degree signs
|
||
unaccent
|
||
----------
|
||
°C°F
|
||
(1 row)
|
||
|
||
SELECT unaccent('℗'); -- sound recording copyright
|
||
unaccent
|
||
----------
|
||
(P)
|
||
(1 row)
|
||
|
||
SELECT unaccent('unaccent', 'foobar');
|
||
unaccent
|
||
----------
|
||
foobar
|
||
(1 row)
|
||
|
||
SELECT unaccent('unaccent', 'ёлка');
|
||
unaccent
|
||
----------
|
||
елка
|
||
(1 row)
|
||
|
||
SELECT unaccent('unaccent', 'ЁЖИК');
|
||
unaccent
|
||
----------
|
||
ЕЖИК
|
||
(1 row)
|
||
|
||
SELECT unaccent('unaccent', '˃˖˗˜');
|
||
unaccent
|
||
----------
|
||
>+-~
|
||
(1 row)
|
||
|
||
SELECT unaccent('unaccent', 'À');
|
||
unaccent
|
||
----------
|
||
A
|
||
(1 row)
|
||
|
||
SELECT unaccent('unaccent', '℃℉');
|
||
unaccent
|
||
----------
|
||
°C°F
|
||
(1 row)
|
||
|
||
SELECT unaccent('unaccent', '℗');
|
||
unaccent
|
||
----------
|
||
(P)
|
||
(1 row)
|
||
|
||
SELECT ts_lexize('unaccent', 'foobar');
|
||
ts_lexize
|
||
-----------
|
||
|
||
(1 row)
|
||
|
||
SELECT ts_lexize('unaccent', 'ёлка');
|
||
ts_lexize
|
||
-----------
|
||
{елка}
|
||
(1 row)
|
||
|
||
SELECT ts_lexize('unaccent', 'ЁЖИК');
|
||
ts_lexize
|
||
-----------
|
||
{ЕЖИК}
|
||
(1 row)
|
||
|
||
SELECT ts_lexize('unaccent', '˃˖˗˜');
|
||
ts_lexize
|
||
-----------
|
||
{>+-~}
|
||
(1 row)
|
||
|
||
SELECT ts_lexize('unaccent', 'À');
|
||
ts_lexize
|
||
-----------
|
||
{A}
|
||
(1 row)
|
||
|
||
SELECT ts_lexize('unaccent', '℃℉');
|
||
ts_lexize
|
||
-----------
|
||
{°C°F}
|
||
(1 row)
|
||
|
||
SELECT ts_lexize('unaccent', '℗');
|
||
ts_lexize
|
||
-----------
|
||
{(P)}
|
||
(1 row)
|
||
|
||
-- Controversial case. Black-Letter Capital H (U+210C) is translated by
|
||
-- Latin-ASCII.xml as 'x', but it should be 'H'.
|
||
SELECT unaccent('ℌ');
|
||
unaccent
|
||
----------
|
||
x
|
||
(1 row)
|
||
|