Doc fixes for commit 1e16af8ab5.

Discussion: https://postgr.es/m/275c47ea-e7f3-e654-c99a-63bc116997d7@enterprisedb.com
This commit is contained in:
Jeff Davis 2023-05-25 17:05:50 -07:00
parent 5abff197cc
commit f4a9422c0c
1 changed files with 71 additions and 61 deletions

View File

@ -386,11 +386,12 @@ initdb --locale-provider=icu --icu-locale=en
linkend="icu-language-tag">Language Tag</link>. linkend="icu-language-tag">Language Tag</link>.
<programlisting> <programlisting>
CREATE COLLATION mycollation1 (PROVIDER = icu, LOCALE = 'ja-JP'); CREATE COLLATION mycollation1 (provider = icu, locale = 'ja-JP');
CREATE COLLATION mycollation2 (PROVIDER = icu, LOCALE = 'fr'); CREATE COLLATION mycollation2 (provider = icu, locale = 'fr');
</programlisting> </programlisting>
</para> </para>
</sect3> </sect3>
<sect3 id="icu-canonicalization"> <sect3 id="icu-canonicalization">
<title>Locale Canonicalization and Validation</title> <title>Locale Canonicalization and Validation</title>
<para> <para>
@ -399,14 +400,14 @@ CREATE COLLATION mycollation2 (PROVIDER = icu, LOCALE = 'fr');
language tag if not already in that form. For instance, language tag if not already in that form. For instance,
<screen> <screen>
CREATE COLLATION mycollation3 (PROVIDER = icu, LOCALE = 'en-US-u-kn-true'); CREATE COLLATION mycollation3 (provider = icu, locale = 'en-US-u-kn-true');
NOTICE: using standard form "en-US-u-kn" for locale "en-US-u-kn-true" NOTICE: using standard form "en-US-u-kn" for locale "en-US-u-kn-true"
CREATE COLLATION mycollation4 (PROVIDER = icu, LOCALE = 'de_DE.utf8'); CREATE COLLATION mycollation4 (provider = icu, locale = 'de_DE.utf8');
NOTICE: using standard form "de-DE" for locale "de_DE.utf8" NOTICE: using standard form "de-DE" for locale "de_DE.utf8"
</screen> </screen>
If you see this notice, ensure that the <symbol>PROVIDER</symbol> and If you see this notice, ensure that the <symbol>provider</symbol> and
<symbol>LOCALE</symbol> are the expected result. For consistent results <symbol>locale</symbol> are the expected result. For consistent results
when using the ICU provider, specify the canonical <link when using the ICU provider, specify the canonical <link
linkend="icu-language-tag">language tag</link> instead of relying on the linkend="icu-language-tag">language tag</link> instead of relying on the
transformation. transformation.
@ -427,7 +428,7 @@ NOTICE: using standard form "de-DE" for locale "de_DE.utf8"
the following warning: the following warning:
<screen> <screen>
CREATE COLLATION nonsense (PROVIDER = icu, LOCALE = 'nonsense'); CREATE COLLATION nonsense (provider = icu, locale = 'nonsense');
WARNING: ICU locale "nonsense" has unknown language "nonsense" WARNING: ICU locale "nonsense" has unknown language "nonsense"
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED. HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
CREATE COLLATION CREATE COLLATION
@ -438,6 +439,7 @@ CREATE COLLATION
still be created, but the behavior may not be what the user intended. still be created, but the behavior may not be what the user intended.
</para> </para>
</sect3> </sect3>
<sect3 id="icu-language-tag"> <sect3 id="icu-language-tag">
<title>Language Tag</title> <title>Language Tag</title>
<para> <para>
@ -484,7 +486,7 @@ CREATE COLLATION
of digits as a single number: of digits as a single number:
<screen> <screen>
CREATE COLLATION mycollation5 (PROVIDER = icu, DETERMINISTIC = false, LOCALE = 'en-US-u-kn-ks-level2'); CREATE COLLATION mycollation5 (provider = icu, deterministic = false, locale = 'en-US-u-kn-ks-level2');
SELECT 'aB' = 'Ab' COLLATE mycollation5 as result; SELECT 'aB' = 'Ab' COLLATE mycollation5 as result;
result result
-------- --------
@ -1109,16 +1111,16 @@ CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-tr
<programlisting> <programlisting>
-- ignore differences in accents and case -- ignore differences in accents and case
CREATE COLLATION ignore_accent_case (PROVIDER = icu, DETERMINISTIC = false, LOCALE = 'und-u-ks-level1'); CREATE COLLATION ignore_accent_case (provider = icu, deterministic = false, locale = 'und-u-ks-level1');
SELECT 'Å' = 'A' COLLATE ignore_accent_case; -- true SELECT 'Å' = 'A' COLLATE ignore_accent_case; -- true
SELECT 'z' = 'Z' COLLATE ignore_accent_case; -- true SELECT 'z' = 'Z' COLLATE ignore_accent_case; -- true
-- upper case letters sort before lower case. -- upper case letters sort before lower case.
CREATE COLLATION upper_first (PROVIDER=icu, LOCALE = 'und-u-kf-upper'); CREATE COLLATION upper_first (provider = icu, locale = 'und-u-kf-upper');
SELECT 'B' &lt; 'b' COLLATE upper_first; -- true SELECT 'B' &lt; 'b' COLLATE upper_first; -- true
-- treat digits numerically and ignore punctuation -- treat digits numerically and ignore punctuation
CREATE COLLATION num_ignore_punct (PROVIDER = icu, DETERMINISTIC = false, LOCALE = 'und-u-ka-shifted-kn'); CREATE COLLATION num_ignore_punct (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-kn');
SELECT 'id-45' &lt; 'id-123' COLLATE num_ignore_punct; -- true SELECT 'id-45' &lt; 'id-123' COLLATE num_ignore_punct; -- true
SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
</programlisting> </programlisting>
@ -1136,6 +1138,13 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
linkend="icu-collation-settings-table">collation settings</link>. Higher linkend="icu-collation-settings-table">collation settings</link>. Higher
levels correspond to finer textual features. levels correspond to finer textual features.
</para> </para>
<para>
<xref linkend="icu-collation-levels"/> shows which textual feature
differences are considered significant when determining equality at the
given level. The unicode character <literal>U+2063</literal> is an
invisible separator, and as seen in the table, is ignored for at all
levels of comparison less than <literal>identic</literal>.
</para>
<para> <para>
<table id="icu-collation-levels"> <table id="icu-collation-levels">
<title>ICU Collation Levels</title> <title>ICU Collation Levels</title>
@ -1215,20 +1224,13 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
</tgroup> </tgroup>
</table> </table>
The above table shows which textual feature differences are
considered significant when determining equality at the given level. The
unicode character <literal>U+2063</literal> is an invisible separator,
and as seen in the table, is ignored for at all levels of comparison less
than <literal>identic</literal>.
</para>
<para>
At every level, even with full normalization off, basic normalization is At every level, even with full normalization off, basic normalization is
performed. For example, <literal>'á'</literal> may be composed of the performed. For example, <literal>'á'</literal> may be composed of the
code points <literal>U&amp;'\0061\0301'</literal> or the single code code points <literal>U&amp;'\0061\0301'</literal> or the single code
point <literal>U&amp;'\00E1'</literal>, and those sequences will be point <literal>U&amp;'\00E1'</literal>, and those sequences will be
considered equal even at the <literal>identic</literal> level. To treat considered equal even at the <literal>identic</literal> level. To treat
any difference in code point representation as distinct, use a collation any difference in code point representation as distinct, use a collation
created with <symbol>DETERMINISTIC</symbol> set to created with <symbol>deterministic</symbol> set to
<literal>true</literal>. <literal>true</literal>.
</para> </para>
<sect4 id="icu-collation-level-examples"> <sect4 id="icu-collation-level-examples">
@ -1236,9 +1238,9 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
<para> <para>
<programlisting> <programlisting>
CREATE COLLATION level3 (PROVIDER=icu, DETERMINISTIC=false, LOCALE='und-u-ka-shifted-ks-level3'); CREATE COLLATION level3 (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-level3');
CREATE COLLATION level4 (PROVIDER=icu, DETERMINISTIC=false, LOCALE='und-u-ka-shifted-ks-level4'); CREATE COLLATION level4 (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-level4');
CREATE COLLATION identic (PROVIDER=icu, DETERMINISTIC=false, LOCALE='und-u-ka-shifted-ks-identic'); CREATE COLLATION identic (provider = icu, deterministic = false, locale = 'und-u-ka-shifted-ks-identic');
-- invisible separator ignored at all levels except identic -- invisible separator ignored at all levels except identic
SELECT 'ab' = U&amp;'a\2063b' COLLATE level4; -- true SELECT 'ab' = U&amp;'a\2063b' COLLATE level4; -- true
@ -1252,8 +1254,14 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
</para> </para>
</sect4> </sect4>
</sect3> </sect3>
<sect3 id="icu-collation-settings"> <sect3 id="icu-collation-settings">
<title>Collation Settings for an ICU Locale</title> <title>Collation Settings for an ICU Locale</title>
<para>
<xref linkend="icu-collation-settings-table"/> shows the available
collation settings, which can be used as part of a language tag to
customize a collation.
</para>
<para> <para>
<table id="icu-collation-settings-table"> <table id="icu-collation-settings-table">
<title>ICU Collation Settings</title> <title>ICU Collation Settings</title>
@ -1272,14 +1280,11 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
</thead> </thead>
<tbody> <tbody>
<row> <row>
<entry><literal>ks</literal></entry> <entry><literal>co</literal></entry>
<entry><literal>level1</literal>, <literal>level2</literal>, <literal>level3</literal>, <literal>level4</literal>, <literal>identic</literal></entry> <entry><literal>emoji</literal>, <literal>phonebk</literal>, <literal>standard</literal>, <replaceable>...</replaceable></entry>
<entry><literal>level3</literal></entry> <entry><literal>standard</literal></entry>
<entry> <entry>
Sensitivity (or "strength") when determining equality, with Collation type. See <xref linkend="icu-external-references"/> for additional options and details.
<literal>level1</literal> the least sensitive to differences and
<literal>identic</literal> the most sensitive to differences. See
<xref linkend="icu-collation-levels"/> for details.
</entry> </entry>
</row> </row>
<row> <row>
@ -1304,29 +1309,6 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
before <literal>'aé'</literal>. before <literal>'aé'</literal>.
</entry> </entry>
</row> </row>
<row>
<entry><literal>kk</literal></entry>
<entry><literal>true</literal>, <literal>false</literal></entry>
<entry><literal>false</literal></entry>
<entry>
<para>
Enable full normalization; may affect performance. Basic
normalization is performed even when set to
<literal>false</literal>. Locales for languages that require full
normalization typically enable it by default.
</para>
<para>
Full normalization is important in some cases, such as when
multiple accents are applied to a single character. For example,
the code point sequences <literal>U&amp;'\0065\0323\0302'</literal>
and <literal>U&amp;'\0065\0302\0323'</literal> represent
an <literal>e</literal> with circumflex and dot-below accents
applied in different orders. With full normalization
on, these code point sequences are treated as equal; otherwise they
are unequal.
</para>
</entry>
</row>
<row> <row>
<entry><literal>kc</literal></entry> <entry><literal>kc</literal></entry>
<entry><literal>true</literal>, <literal>false</literal></entry> <entry><literal>true</literal>, <literal>false</literal></entry>
@ -1368,6 +1350,29 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
<literal>'id-123'</literal>. <literal>'id-123'</literal>.
</entry> </entry>
</row> </row>
<row>
<entry><literal>kk</literal></entry>
<entry><literal>true</literal>, <literal>false</literal></entry>
<entry><literal>false</literal></entry>
<entry>
<para>
Enable full normalization; may affect performance. Basic
normalization is performed even when set to
<literal>false</literal>. Locales for languages that require full
normalization typically enable it by default.
</para>
<para>
Full normalization is important in some cases, such as when
multiple accents are applied to a single character. For example,
the code point sequences <literal>U&amp;'\0065\0323\0302'</literal>
and <literal>U&amp;'\0065\0302\0323'</literal> represent
an <literal>e</literal> with circumflex and dot-below accents
applied in different orders. With full normalization
on, these code point sequences are treated as equal; otherwise they
are unequal.
</para>
</entry>
</row>
<row> <row>
<entry><literal>kr</literal></entry> <entry><literal>kr</literal></entry>
<entry> <entry>
@ -1393,6 +1398,17 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
</para> </para>
</entry> </entry>
</row> </row>
<row>
<entry><literal>ks</literal></entry>
<entry><literal>level1</literal>, <literal>level2</literal>, <literal>level3</literal>, <literal>level4</literal>, <literal>identic</literal></entry>
<entry><literal>level3</literal></entry>
<entry>
Sensitivity (or "strength") when determining equality, with
<literal>level1</literal> the least sensitive to differences and
<literal>identic</literal> the most sensitive to differences. See
<xref linkend="icu-collation-levels"/> for details.
</entry>
</row>
<row> <row>
<entry><literal>kv</literal></entry> <entry><literal>kv</literal></entry>
<entry> <entry>
@ -1410,14 +1426,6 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
to <literal>level3</literal> or lower to take effect. to <literal>level3</literal> or lower to take effect.
</entry> </entry>
</row> </row>
<row>
<entry><literal>co</literal></entry>
<entry><literal>emoji</literal>, <literal>phonebk</literal>, <literal>standard</literal>, <replaceable>...</replaceable></entry>
<entry><literal>standard</literal></entry>
<entry>
Collation type. See <xref linkend="icu-external-references"/> for additional options and details.
</entry>
</row>
</tbody> </tbody>
</tgroup> </tgroup>
</table> </table>
@ -1428,7 +1436,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
<note> <note>
<para> <para>
For many collation settings, you must create the collation with For many collation settings, you must create the collation with
<option>DETERMINISTIC</option> set to <literal>false</literal> for the <option>deterministic</option> set to <literal>false</literal> for the
setting to have the desired effect (see <xref setting to have the desired effect (see <xref
linkend="collation-nondeterministic"/>). Additionally, some settings linkend="collation-nondeterministic"/>). Additionally, some settings
only take effect when the key <literal>ka</literal> is set to only take effect when the key <literal>ka</literal> is set to
@ -1437,6 +1445,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
</para> </para>
</note> </note>
</sect3> </sect3>
<sect3 id="icu-locale-examples"> <sect3 id="icu-locale-examples">
<title>Examples</title> <title>Examples</title>
<para> <para>
@ -1487,6 +1496,7 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
</variablelist> </variablelist>
</para> </para>
</sect3> </sect3>
<sect3 id="icu-external-references"> <sect3 id="icu-external-references">
<title>External References for ICU</title> <title>External References for ICU</title>
<para> <para>