postgresql/contrib/unaccent/expected/unaccent.out
Jeff Davis 27b62377b4 Use ICU by default at initdb time.
If the ICU locale is not specified, initialize the default collator
and retrieve the locale name from that.

Discussion: https://postgr.es/m/510d284759f6e943ce15096167760b2edcb2e700.camel@j-davis.com
Reviewed-by: Peter Eisentraut
2023-03-09 10:52:41 -08:00

153 lines
2.2 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- unaccent is broken if the default collation is provided by ICU and
-- LC_CTYPE=C
SELECT current_setting('lc_ctype') = 'C' AND
(SELECT datlocprovider='i' FROM pg_database
WHERE datname=current_database())
AS skip_test \gset
\if :skip_test
\quit
\endif
CREATE EXTENSION unaccent;
-- must have a UTF8 database
SELECT getdatabaseencoding();
getdatabaseencoding
---------------------
UTF8
(1 row)
SET client_encoding TO 'UTF8';
SELECT unaccent('foobar');
unaccent
----------
foobar
(1 row)
SELECT unaccent('ёлка');
unaccent
----------
елка
(1 row)
SELECT unaccent('ЁЖИК');
unaccent
----------
ЕЖИК
(1 row)
SELECT unaccent('˃˖˗˜');
unaccent
----------
>+-~
(1 row)
SELECT unaccent('À'); -- Remove combining diacritical 0x0300
unaccent
----------
A
(1 row)
SELECT unaccent('℃℉'); -- degree signs
unaccent
----------
°C°F
(1 row)
SELECT unaccent('℗'); -- sound recording copyright
unaccent
----------
(P)
(1 row)
SELECT unaccent('unaccent', 'foobar');
unaccent
----------
foobar
(1 row)
SELECT unaccent('unaccent', 'ёлка');
unaccent
----------
елка
(1 row)
SELECT unaccent('unaccent', 'ЁЖИК');
unaccent
----------
ЕЖИК
(1 row)
SELECT unaccent('unaccent', '˃˖˗˜');
unaccent
----------
>+-~
(1 row)
SELECT unaccent('unaccent', 'À');
unaccent
----------
A
(1 row)
SELECT unaccent('unaccent', '℃℉');
unaccent
----------
°C°F
(1 row)
SELECT unaccent('unaccent', '℗');
unaccent
----------
(P)
(1 row)
SELECT ts_lexize('unaccent', 'foobar');
ts_lexize
-----------
(1 row)
SELECT ts_lexize('unaccent', 'ёлка');
ts_lexize
-----------
{елка}
(1 row)
SELECT ts_lexize('unaccent', 'ЁЖИК');
ts_lexize
-----------
{ЕЖИК}
(1 row)
SELECT ts_lexize('unaccent', '˃˖˗˜');
ts_lexize
-----------
{>+-~}
(1 row)
SELECT ts_lexize('unaccent', 'À');
ts_lexize
-----------
{A}
(1 row)
SELECT ts_lexize('unaccent', '℃℉');
ts_lexize
-----------
{°C°F}
(1 row)
SELECT ts_lexize('unaccent', '℗');
ts_lexize
-----------
{(P)}
(1 row)
-- Controversial case. Black-Letter Capital H (U+210C) is translated by
-- Latin-ASCII.xml as 'x', but it should be 'H'.
SELECT unaccent('');
unaccent
----------
x
(1 row)