diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index ed84465996..22721b105f 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -377,10 +377,13 @@ initdb --locale-provider=icu --icu-locale=en variants and customization options. + ICU Locales + ICU Locale Names + The ICU format for the locale name is a Language Tag. @@ -412,16 +415,19 @@ NOTICE: using standard form "de-DE" for locale "de_DE.utf8" linkend="icu-language-tag">language tag instead of relying on the transformation. + A locale with no language name, or the special language name root, is transformed to have the language und ("undefined"). + ICU can transform most libc locale names, as well as some other formats, into language tags for easier transition to ICU. If a libc locale name is used in ICU, it may not have precisely the same behavior as in libc. + If there is a problem interpreting the locale name, or if the locale name represents a language or region that ICU does not recognize, you will see @@ -442,10 +448,12 @@ CREATE COLLATION Language Tag + A language tag, defined in BCP 47, is a standardized identifier used to identify languages, regions, and other information about a locale. + Basic language tags are simply language-region; @@ -457,6 +465,7 @@ CREATE COLLATION ja-JP, de, or fr-CA. + Collation settings may be included in the language tag to customize collation behavior. ICU allows extensive customization, such as @@ -464,6 +473,7 @@ CREATE COLLATION treatment of digits within text; and many other options to satisfy a variety of uses. + To include this additional collation information in a language tag, append -u, which indicates there are additional @@ -477,6 +487,7 @@ CREATE COLLATION -value, which implies a value of true. + For example, the language tag en-US-u-kn-ks-level2 means the locale with the English language in the US region, with @@ -500,6 +511,7 @@ SELECT 'N-45' < 'N-123' COLLATE mycollation5 as result; (1 row) + See for details and additional examples of using language tags with custom collation information for the @@ -507,6 +519,7 @@ SELECT 'N-45' < 'N-123' COLLATE mycollation5 as result; + Problems @@ -1100,6 +1113,7 @@ CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-tr + ICU Custom Collations @@ -1129,8 +1143,10 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true linkend="icu-collation-settings"/>, or see for more details. + ICU Comparison Levels + Comparison of two strings (collation) in ICU is determined by a multi-level process, where textual features are grouped into @@ -1138,6 +1154,7 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true linkend="icu-collation-settings-table">collation settings. Higher levels correspond to finer textual features. + shows which textual feature differences are considered significant when determining equality at the @@ -1145,7 +1162,7 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true invisible separator, and as seen in the table, is ignored for at all levels of comparison less than identic. - + ICU Collation Levels @@ -1157,6 +1174,7 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true + Level @@ -1169,6 +1187,7 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true 'y' = 'z' + level1 @@ -1224,6 +1243,7 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
+ At every level, even with full normalization off, basic normalization is performed. For example, 'á' may be composed of the code points U&'\0061\0301' or the single code @@ -1233,9 +1253,9 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true created with deterministic set to true. + Collation Level Examples - CREATE COLLATION level3 (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-level3'); @@ -1251,18 +1271,18 @@ SELECT 'x-y' = 'x_y' COLLATE level3; -- true SELECT 'x-y' = 'x_y' COLLATE level4; -- false -
Collation Settings for an ICU Locale + shows the available collation settings, which can be used as part of a language tag to customize a collation. - + ICU Collation Settings @@ -1270,6 +1290,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false + Key @@ -1278,6 +1299,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false Description + co @@ -1287,6 +1309,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false Collation type. See for additional options and details. + ka noignore, shifted @@ -1299,6 +1322,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false character classes are ignored. + kb true, false @@ -1309,6 +1333,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false before 'aé'. + kc true, false @@ -1325,6 +1350,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false + kf @@ -1339,6 +1365,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false the rules of the locale. + kn true, false @@ -1350,6 +1377,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false 'id-123'. + kk true, false @@ -1373,6 +1401,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false + kr @@ -1398,6 +1427,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false + ks level1, level2, level3, level4, identic @@ -1409,6 +1439,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false for details. + kv @@ -1429,10 +1460,13 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
- Defaults may depend on locale. The above table is not meant to be - complete. See for additional - options and details. + + + Defaults may depend on locale. The above table is not meant to be + complete. See for additional + options and details. + For many collation settings, you must create the collation with @@ -1448,7 +1482,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false Examples - + CREATE COLLATION "de-u-co-phonebk-x-icu" (provider = icu, locale = 'de-u-co-phonebk'); @@ -1494,22 +1528,21 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false - External References for ICU + This section () is only a brief overview of ICU behavior and language tags. Refer to the following documents for technical details, additional options, and new behavior: + - Unicode - Technical Standard #35 + Unicode Technical Standard #35 @@ -1519,8 +1552,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false - CLDR - repository + CLDR repository