From f4a9422c0c37ba638adbab853b8badb98a53ce04 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Thu, 25 May 2023 17:05:50 -0700 Subject: [PATCH] Doc fixes for commit 1e16af8ab5. Discussion: https://postgr.es/m/275c47ea-e7f3-e654-c99a-63bc116997d7@enterprisedb.com --- doc/src/sgml/charset.sgml | 132 ++++++++++++++++++++------------------ 1 file changed, 71 insertions(+), 61 deletions(-) diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index 51746e83ae..ed84465996 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -386,11 +386,12 @@ initdb --locale-provider=icu --icu-locale=en linkend="icu-language-tag">Language Tag. -CREATE COLLATION mycollation1 (PROVIDER = icu, LOCALE = 'ja-JP'); -CREATE COLLATION mycollation2 (PROVIDER = icu, LOCALE = 'fr'); +CREATE COLLATION mycollation1 (provider = icu, locale = 'ja-JP'); +CREATE COLLATION mycollation2 (provider = icu, locale = 'fr'); + Locale Canonicalization and Validation @@ -399,14 +400,14 @@ CREATE COLLATION mycollation2 (PROVIDER = icu, LOCALE = 'fr'); language tag if not already in that form. For instance, -CREATE COLLATION mycollation3 (PROVIDER = icu, LOCALE = 'en-US-u-kn-true'); +CREATE COLLATION mycollation3 (provider = icu, locale = 'en-US-u-kn-true'); NOTICE: using standard form "en-US-u-kn" for locale "en-US-u-kn-true" -CREATE COLLATION mycollation4 (PROVIDER = icu, LOCALE = 'de_DE.utf8'); +CREATE COLLATION mycollation4 (provider = icu, locale = 'de_DE.utf8'); NOTICE: using standard form "de-DE" for locale "de_DE.utf8" - If you see this notice, ensure that the PROVIDER and - LOCALE are the expected result. For consistent results + If you see this notice, ensure that the provider and + locale are the expected result. For consistent results when using the ICU provider, specify the canonical language tag instead of relying on the transformation. @@ -427,7 +428,7 @@ NOTICE: using standard form "de-DE" for locale "de_DE.utf8" the following warning: -CREATE COLLATION nonsense (PROVIDER = icu, LOCALE = 'nonsense'); +CREATE COLLATION nonsense (provider = icu, locale = 'nonsense'); WARNING: ICU locale "nonsense" has unknown language "nonsense" HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED. CREATE COLLATION @@ -438,6 +439,7 @@ CREATE COLLATION still be created, but the behavior may not be what the user intended. + Language Tag @@ -484,7 +486,7 @@ CREATE COLLATION of digits as a single number: -CREATE COLLATION mycollation5 (PROVIDER = icu, DETERMINISTIC = false, LOCALE = 'en-US-u-kn-ks-level2'); +CREATE COLLATION mycollation5 (provider = icu, deterministic = false, locale = 'en-US-u-kn-ks-level2'); SELECT 'aB' = 'Ab' COLLATE mycollation5 as result; result -------- @@ -1109,16 +1111,16 @@ CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-tr -- ignore differences in accents and case -CREATE COLLATION ignore_accent_case (PROVIDER = icu, DETERMINISTIC = false, LOCALE = 'und-u-ks-level1'); +CREATE COLLATION ignore_accent_case (provider = icu, deterministic = false, locale = 'und-u-ks-level1'); SELECT 'Å' = 'A' COLLATE ignore_accent_case; -- true SELECT 'z' = 'Z' COLLATE ignore_accent_case; -- true -- upper case letters sort before lower case. -CREATE COLLATION upper_first (PROVIDER=icu, LOCALE = 'und-u-kf-upper'); +CREATE COLLATION upper_first (provider = icu, locale = 'und-u-kf-upper'); SELECT 'B' < 'b' COLLATE upper_first; -- true -- treat digits numerically and ignore punctuation -CREATE COLLATION num_ignore_punct (PROVIDER = icu, DETERMINISTIC = false, LOCALE = 'und-u-ka-shifted-kn'); +CREATE COLLATION num_ignore_punct (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-kn'); SELECT 'id-45' < 'id-123' COLLATE num_ignore_punct; -- true SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true @@ -1136,6 +1138,13 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true linkend="icu-collation-settings-table">collation settings. Higher levels correspond to finer textual features. + + shows which textual feature + differences are considered significant when determining equality at the + given level. The unicode character U+2063 is an + invisible separator, and as seen in the table, is ignored for at all + levels of comparison less than identic. + ICU Collation Levels @@ -1215,20 +1224,13 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
- The above table shows which textual feature differences are - considered significant when determining equality at the given level. The - unicode character U+2063 is an invisible separator, - and as seen in the table, is ignored for at all levels of comparison less - than identic. -
- At every level, even with full normalization off, basic normalization is performed. For example, 'á' may be composed of the code points U&'\0061\0301' or the single code point U&'\00E1', and those sequences will be considered equal even at the identic level. To treat any difference in code point representation as distinct, use a collation - created with DETERMINISTIC set to + created with deterministic set to true. @@ -1236,9 +1238,9 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true -CREATE COLLATION level3 (PROVIDER=icu, DETERMINISTIC=false, LOCALE='und-u-ka-shifted-ks-level3'); -CREATE COLLATION level4 (PROVIDER=icu, DETERMINISTIC=false, LOCALE='und-u-ka-shifted-ks-level4'); -CREATE COLLATION identic (PROVIDER=icu, DETERMINISTIC=false, LOCALE='und-u-ka-shifted-ks-identic'); +CREATE COLLATION level3 (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-level3'); +CREATE COLLATION level4 (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-level4'); +CREATE COLLATION identic (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-identic'); -- invisible separator ignored at all levels except identic SELECT 'ab' = U&'a\2063b' COLLATE level4; -- true @@ -1252,8 +1254,14 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
+ Collation Settings for an ICU Locale + + shows the available + collation settings, which can be used as part of a language tag to + customize a collation. + ICU Collation Settings @@ -1272,14 +1280,11 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false - ks - level1, level2, level3, level4, identic - level3 + co + emoji, phonebk, standard, ... + standard - Sensitivity (or "strength") when determining equality, with - level1 the least sensitive to differences and - identic the most sensitive to differences. See - for details. + Collation type. See for additional options and details. @@ -1304,29 +1309,6 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false before 'aé'. - - kk - true, false - false - - - Enable full normalization; may affect performance. Basic - normalization is performed even when set to - false. Locales for languages that require full - normalization typically enable it by default. - - - Full normalization is important in some cases, such as when - multiple accents are applied to a single character. For example, - the code point sequences U&'\0065\0323\0302' - and U&'\0065\0302\0323' represent - an e with circumflex and dot-below accents - applied in different orders. With full normalization - on, these code point sequences are treated as equal; otherwise they - are unequal. - - - kc true, false @@ -1368,6 +1350,29 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false 'id-123'. + + kk + true, false + false + + + Enable full normalization; may affect performance. Basic + normalization is performed even when set to + false. Locales for languages that require full + normalization typically enable it by default. + + + Full normalization is important in some cases, such as when + multiple accents are applied to a single character. For example, + the code point sequences U&'\0065\0323\0302' + and U&'\0065\0302\0323' represent + an e with circumflex and dot-below accents + applied in different orders. With full normalization + on, these code point sequences are treated as equal; otherwise they + are unequal. + + + kr @@ -1393,6 +1398,17 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false + + ks + level1, level2, level3, level4, identic + level3 + + Sensitivity (or "strength") when determining equality, with + level1 the least sensitive to differences and + identic the most sensitive to differences. See + for details. + + kv @@ -1410,14 +1426,6 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false to level3 or lower to take effect. - - co - emoji, phonebk, standard, ... - standard - - Collation type. See for additional options and details. - -
@@ -1428,7 +1436,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false For many collation settings, you must create the collation with - set to false for the + set to false for the setting to have the desired effect (see ). Additionally, some settings only take effect when the key ka is set to @@ -1437,6 +1445,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
+ Examples @@ -1487,6 +1496,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false + External References for ICU