diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c index 36ff2aab21..06bf921e45 100644 --- a/src/common/unicode_norm.c +++ b/src/common/unicode_norm.c @@ -439,6 +439,10 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input) decomp_chars[decomp_size] = '\0'; Assert(decomp_size == current_size); + /* Leave if there is nothing to decompose */ + if (decomp_size == 0) + return decomp_chars; + /* * Now apply canonical ordering. */ diff --git a/src/test/regress/expected/unicode.out b/src/test/regress/expected/unicode.out index 2a1e903696..f2713a2326 100644 --- a/src/test/regress/expected/unicode.out +++ b/src/test/regress/expected/unicode.out @@ -8,6 +8,12 @@ SELECT U&'\0061\0308bc' <> U&'\00E4bc' COLLATE "C" AS sanity_check; t (1 row) +SELECT normalize(''); + normalize +----------- + +(1 row) + SELECT normalize(U&'\0061\0308\24D1c') = U&'\00E4\24D1c' COLLATE "C" AS test_default; test_default -------------- @@ -67,7 +73,8 @@ FROM (VALUES (1, U&'\00E4bc'), (2, U&'\0061\0308bc'), (3, U&'\00E4\24D1c'), - (4, U&'\0061\0308\24D1c')) vals (num, val) + (4, U&'\0061\0308\24D1c'), + (5, '')) vals (num, val) ORDER BY num; num | val | nfc | nfd | nfkc | nfkd -----+-----+-----+-----+------+------ @@ -75,7 +82,8 @@ ORDER BY num; 2 | äbc | f | t | f | t 3 | äⓑc | t | f | f | f 4 | äⓑc | f | t | f | f -(4 rows) + 5 | | t | t | t | t +(5 rows) SELECT is_normalized('abc', 'def'); -- run-time error ERROR: invalid normalization form: def diff --git a/src/test/regress/sql/unicode.sql b/src/test/regress/sql/unicode.sql index ccfc6fa77a..63cd523f85 100644 --- a/src/test/regress/sql/unicode.sql +++ b/src/test/regress/sql/unicode.sql @@ -5,6 +5,7 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset SELECT U&'\0061\0308bc' <> U&'\00E4bc' COLLATE "C" AS sanity_check; +SELECT normalize(''); SELECT normalize(U&'\0061\0308\24D1c') = U&'\00E4\24D1c' COLLATE "C" AS test_default; SELECT normalize(U&'\0061\0308\24D1c', NFC) = U&'\00E4\24D1c' COLLATE "C" AS test_nfc; SELECT normalize(U&'\00E4bc', NFC) = U&'\00E4bc' COLLATE "C" AS test_nfc_idem; @@ -26,7 +27,8 @@ FROM (VALUES (1, U&'\00E4bc'), (2, U&'\0061\0308bc'), (3, U&'\00E4\24D1c'), - (4, U&'\0061\0308\24D1c')) vals (num, val) + (4, U&'\0061\0308\24D1c'), + (5, '')) vals (num, val) ORDER BY num; SELECT is_normalized('abc', 'def'); -- run-time error