From d522b05c8c1c2c430b2c8be795c609f6f4f2ce44 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 12 Jun 2023 09:14:03 +0900 Subject: [PATCH] hstore: Tighten key/value parsing check for whitespaces isspace() can be locale-sensitive depending on the platform, causing hstore to consider as whitespaces characters it should not see as such. For example, U+0105, being decoded as 0xC4 0x85 in UTF-8, would be discarded from the input given. This problem is similar to 9ae2661, though it was missed that hstore can also manipulate non-ASCII inputs, so replace the existing isspace() calls with scanner_isspace(). This problem exists for a long time, so backpatch all the way down. Author: Evan Jones Discussion: https://postgr.es/m/CA+HWA9awUW0+RV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig@mail.gmail.com Backpatch-through: 11 --- contrib/hstore/Makefile | 2 +- contrib/hstore/expected/hstore_utf8.out | 36 +++++++++++++++++++++++ contrib/hstore/expected/hstore_utf8_1.out | 8 +++++ contrib/hstore/hstore_io.c | 9 +++--- contrib/hstore/meson.build | 1 + contrib/hstore/sql/hstore_utf8.sql | 19 ++++++++++++ 6 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 contrib/hstore/expected/hstore_utf8.out create mode 100644 contrib/hstore/expected/hstore_utf8_1.out create mode 100644 contrib/hstore/sql/hstore_utf8.sql diff --git a/contrib/hstore/Makefile b/contrib/hstore/Makefile index c4e339b57c..48ee98f0d5 100644 --- a/contrib/hstore/Makefile +++ b/contrib/hstore/Makefile @@ -22,7 +22,7 @@ PGFILEDESC = "hstore - key/value pair data type" HEADERS = hstore.h -REGRESS = hstore +REGRESS = hstore hstore_utf8 ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/contrib/hstore/expected/hstore_utf8.out b/contrib/hstore/expected/hstore_utf8.out new file mode 100644 index 0000000000..4405824413 --- /dev/null +++ b/contrib/hstore/expected/hstore_utf8.out @@ -0,0 +1,36 @@ +/* + * This test must be run in a database with UTF-8 encoding, + * because other encodings don't support all the characters used. + */ +SELECT getdatabaseencoding() <> 'UTF8' + AS skip_test \gset +\if :skip_test +\quit +\endif +SET client_encoding = utf8; +-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes +-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace. +SELECT E'key\u0105=>value\u0105'::hstore; + hstore +------------------ + "keyą"=>"valueą" +(1 row) + +SELECT 'keyą=>valueą'::hstore; + hstore +------------------ + "keyą"=>"valueą" +(1 row) + +SELECT 'ą=>ą'::hstore; + hstore +---------- + "ą"=>"ą" +(1 row) + +SELECT 'keyąfoo=>valueą'::hstore; + hstore +--------------------- + "keyąfoo"=>"valueą" +(1 row) + diff --git a/contrib/hstore/expected/hstore_utf8_1.out b/contrib/hstore/expected/hstore_utf8_1.out new file mode 100644 index 0000000000..37aead89c0 --- /dev/null +++ b/contrib/hstore/expected/hstore_utf8_1.out @@ -0,0 +1,8 @@ +/* + * This test must be run in a database with UTF-8 encoding, + * because other encodings don't support all the characters used. + */ +SELECT getdatabaseencoding() <> 'UTF8' + AS skip_test \gset +\if :skip_test +\quit diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c index cec7df71a2..999ddad76d 100644 --- a/contrib/hstore/hstore_io.c +++ b/contrib/hstore/hstore_io.c @@ -13,6 +13,7 @@ #include "lib/stringinfo.h" #include "libpq/pqformat.h" #include "nodes/miscnodes.h" +#include "parser/scansup.h" #include "utils/builtins.h" #include "utils/json.h" #include "utils/jsonb.h" @@ -118,7 +119,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped) { st = GV_WAITESCIN; } - else if (!isspace((unsigned char) *(state->ptr))) + else if (!scanner_isspace((unsigned char) *(state->ptr))) { *(state->cur) = *(state->ptr); state->cur++; @@ -141,7 +142,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped) state->ptr--; return true; } - else if (isspace((unsigned char) *(state->ptr))) + else if (scanner_isspace((unsigned char) *(state->ptr))) { return true; } @@ -255,7 +256,7 @@ parse_hstore(HSParser *state) { PRSEOF; } - else if (!isspace((unsigned char) *(state->ptr))) + else if (!scanner_isspace((unsigned char) *(state->ptr))) { PRSSYNTAXERROR; } @@ -309,7 +310,7 @@ parse_hstore(HSParser *state) { return true; } - else if (!isspace((unsigned char) *(state->ptr))) + else if (!scanner_isspace((unsigned char) *(state->ptr))) { PRSSYNTAXERROR; } diff --git a/contrib/hstore/meson.build b/contrib/hstore/meson.build index 99c3a3160d..20acc45ad8 100644 --- a/contrib/hstore/meson.build +++ b/contrib/hstore/meson.build @@ -50,6 +50,7 @@ tests += { 'regress': { 'sql': [ 'hstore', + 'hstore_utf8', ], }, } diff --git a/contrib/hstore/sql/hstore_utf8.sql b/contrib/hstore/sql/hstore_utf8.sql new file mode 100644 index 0000000000..face878324 --- /dev/null +++ b/contrib/hstore/sql/hstore_utf8.sql @@ -0,0 +1,19 @@ +/* + * This test must be run in a database with UTF-8 encoding, + * because other encodings don't support all the characters used. + */ + +SELECT getdatabaseencoding() <> 'UTF8' + AS skip_test \gset +\if :skip_test +\quit +\endif + +SET client_encoding = utf8; + +-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes +-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace. +SELECT E'key\u0105=>value\u0105'::hstore; +SELECT 'keyą=>valueą'::hstore; +SELECT 'ą=>ą'::hstore; +SELECT 'keyąfoo=>valueą'::hstore;