From 3a5222a432beeb980208a2d9abeb517412f4d469 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 12 Jun 2023 09:14:13 +0900 Subject: [PATCH] hstore: Tighten key/value parsing check for whitespaces isspace() can be locale-sensitive depending on the platform, causing hstore to consider as whitespaces characters it should not see as such. For example, U+0105, being decoded as 0xC4 0x85 in UTF-8, would be discarded from the input given. This problem is similar to 9ae2661, though it was missed that hstore can also manipulate non-ASCII inputs, so replace the existing isspace() calls with scanner_isspace(). This problem exists for a long time, so backpatch all the way down. Author: Evan Jones Discussion: https://postgr.es/m/CA+HWA9awUW0+RV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig@mail.gmail.com Backpatch-through: 11 --- contrib/hstore/Makefile | 2 +- contrib/hstore/expected/hstore_utf8.out | 36 +++++++++++++++++++++++ contrib/hstore/expected/hstore_utf8_1.out | 8 +++++ contrib/hstore/hstore_io.c | 9 +++--- contrib/hstore/sql/hstore_utf8.sql | 19 ++++++++++++ 5 files changed, 69 insertions(+), 5 deletions(-) create mode 100644 contrib/hstore/expected/hstore_utf8.out create mode 100644 contrib/hstore/expected/hstore_utf8_1.out create mode 100644 contrib/hstore/sql/hstore_utf8.sql diff --git a/contrib/hstore/Makefile b/contrib/hstore/Makefile index c4e339b57c..48ee98f0d5 100644 --- a/contrib/hstore/Makefile +++ b/contrib/hstore/Makefile @@ -22,7 +22,7 @@ PGFILEDESC = "hstore - key/value pair data type" HEADERS = hstore.h -REGRESS = hstore +REGRESS = hstore hstore_utf8 ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/contrib/hstore/expected/hstore_utf8.out b/contrib/hstore/expected/hstore_utf8.out new file mode 100644 index 0000000000..4405824413 --- /dev/null +++ b/contrib/hstore/expected/hstore_utf8.out @@ -0,0 +1,36 @@ +/* + * This test must be run in a database with UTF-8 encoding, + * because other encodings don't support all the characters used. + */ +SELECT getdatabaseencoding() <> 'UTF8' + AS skip_test \gset +\if :skip_test +\quit +\endif +SET client_encoding = utf8; +-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes +-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace. +SELECT E'key\u0105=>value\u0105'::hstore; + hstore +------------------ + "keyą"=>"valueą" +(1 row) + +SELECT 'keyą=>valueą'::hstore; + hstore +------------------ + "keyą"=>"valueą" +(1 row) + +SELECT 'ą=>ą'::hstore; + hstore +---------- + "ą"=>"ą" +(1 row) + +SELECT 'keyąfoo=>valueą'::hstore; + hstore +--------------------- + "keyąfoo"=>"valueą" +(1 row) + diff --git a/contrib/hstore/expected/hstore_utf8_1.out b/contrib/hstore/expected/hstore_utf8_1.out new file mode 100644 index 0000000000..37aead89c0 --- /dev/null +++ b/contrib/hstore/expected/hstore_utf8_1.out @@ -0,0 +1,8 @@ +/* + * This test must be run in a database with UTF-8 encoding, + * because other encodings don't support all the characters used. + */ +SELECT getdatabaseencoding() <> 'UTF8' + AS skip_test \gset +\if :skip_test +\quit diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c index b3304ff844..03057f085d 100644 --- a/contrib/hstore/hstore_io.c +++ b/contrib/hstore/hstore_io.c @@ -12,6 +12,7 @@ #include "hstore.h" #include "lib/stringinfo.h" #include "libpq/pqformat.h" +#include "parser/scansup.h" #include "utils/builtins.h" #include "utils/json.h" #include "utils/jsonb.h" @@ -88,7 +89,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped) { st = GV_WAITESCIN; } - else if (!isspace((unsigned char) *(state->ptr))) + else if (!scanner_isspace((unsigned char) *(state->ptr))) { *(state->cur) = *(state->ptr); state->cur++; @@ -111,7 +112,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped) state->ptr--; return true; } - else if (isspace((unsigned char) *(state->ptr))) + else if (scanner_isspace((unsigned char) *(state->ptr))) { return true; } @@ -219,7 +220,7 @@ parse_hstore(HSParser *state) { elog(ERROR, "Unexpected end of string"); } - else if (!isspace((unsigned char) *(state->ptr))) + else if (!scanner_isspace((unsigned char) *(state->ptr))) { elog(ERROR, "Syntax error near \"%.*s\" at position %d", pg_mblen(state->ptr), state->ptr, @@ -271,7 +272,7 @@ parse_hstore(HSParser *state) { return; } - else if (!isspace((unsigned char) *(state->ptr))) + else if (!scanner_isspace((unsigned char) *(state->ptr))) { elog(ERROR, "Syntax error near \"%.*s\" at position %d", pg_mblen(state->ptr), state->ptr, diff --git a/contrib/hstore/sql/hstore_utf8.sql b/contrib/hstore/sql/hstore_utf8.sql new file mode 100644 index 0000000000..face878324 --- /dev/null +++ b/contrib/hstore/sql/hstore_utf8.sql @@ -0,0 +1,19 @@ +/* + * This test must be run in a database with UTF-8 encoding, + * because other encodings don't support all the characters used. + */ + +SELECT getdatabaseencoding() <> 'UTF8' + AS skip_test \gset +\if :skip_test +\quit +\endif + +SET client_encoding = utf8; + +-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes +-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace. +SELECT E'key\u0105=>value\u0105'::hstore; +SELECT 'keyą=>valueą'::hstore; +SELECT 'ą=>ą'::hstore; +SELECT 'keyąfoo=>valueą'::hstore;