From ab40b0395a75e3faf43e5a7d4b521b3bc1130d3a Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 15 Jun 2023 13:45:44 +0900 Subject: [PATCH] intarray: Prevent out-of-bound memory reads with gist__int_ops As gist__int_ops stands in intarray, it is possible to store GiST entries for leaf pages that can cause corruptions when decompressed. Leaf nodes are stored as decompressed all the time by the compression method, and the decompression method should map with that, retrieving the contents of the page without doing any decompression. However, the code authorized the insertion of leaf page data with a higher number of array items than what can be supported, generating a NOTICE message to inform about this matter (199 for a 8k page, for reference). When calling the decompression method, a decompression would be attempted on this leaf node item but the contents should be retrieved as they are. The NOTICE message generated when dealing with the compression of a leaf page and too many elements in the input array for gist__int_ops has been introduced by 08ee64e, removing the marker stored in the array to track if this is actually a leaf node. However, it also missed the fact that the decompression path should do nothing for a leaf page. Hence, as the code stand, a too-large array would be stored as uncompressed but the decompression path would attempt a decompression rather that retrieving the contents as they are. This leads to various problems. First, even if 08ee64e tried to address that, it is possible to do out-of-bound chunk writes with a large input array, with the backend informing about that with WARNINGs. On decompression, retrieving the stored leaf data would lead to incorrect memory reads, leading to crashes or even worse. Perhaps somebody would be interested in expanding the number of array items that can be handled in a leaf page for this operator in the future, which would require revisiting the choice done in 08ee64e, but based on the lack of reports about this problem since 2005 it does not look so. For now, this commit prevents the insertion of data for leaf pages when using more array items that the code can handle on decompression, switching the NOTICE message to an ERROR. If one wishes to use more array items, gist__intbig_ops is an optional choice. While on it, use ERRCODE_PROGRAM_LIMIT_EXCEEDED as error code when a limit is reached, because that's what the module is facing in such cases. Author: Ankit Kumar Pandey, Alexander Lakhin Reviewed-by: Richard Guo, Michael Paquier Discussion: https://postgr.es/m/796b65c3-57b7-bddf-b0d5-a8afafb8b627@gmail.com Discussion: https://postgr.es/m/17888-f72930e6b5ce8c14@postgresql.org Backpatch-through: 11 --- contrib/intarray/_int_gist.c | 12 ++++++++---- contrib/intarray/expected/_int.out | 2 ++ contrib/intarray/sql/_int.sql | 2 ++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/contrib/intarray/_int_gist.c b/contrib/intarray/_int_gist.c index 5db04c75ed..87cd64a11c 100644 --- a/contrib/intarray/_int_gist.c +++ b/contrib/intarray/_int_gist.c @@ -172,8 +172,10 @@ g_int_compress(PG_FUNCTION_ARGS) PREPAREARR(r); if (ARRNELEMS(r) >= 2 * MAXNUMRANGE) - elog(NOTICE, "input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead", - 2 * MAXNUMRANGE - 1, ARRNELEMS(r)); + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("input array is too big (%d maximum allowed, %d current), use gist__intbig_ops opclass instead", + 2 * MAXNUMRANGE - 1, ARRNELEMS(r)))); retval = palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(r), @@ -258,7 +260,8 @@ g_int_compress(PG_FUNCTION_ARGS) lenr = internal_size(dr, len); if (lenr < 0 || lenr > MAXNUMELTS) ereport(ERROR, - (errmsg("data is too sparse, recreate index using gist__intbig_ops opclass instead"))); + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("data is too sparse, recreate index using gist__intbig_ops opclass instead"))); r = resize_intArrayType(r, len); retval = palloc(sizeof(GISTENTRY)); @@ -319,7 +322,8 @@ g_int_decompress(PG_FUNCTION_ARGS) lenr = internal_size(din, lenin); if (lenr < 0 || lenr > MAXNUMELTS) ereport(ERROR, - (errmsg("compressed array is too big, recreate index using gist__intbig_ops opclass instead"))); + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("compressed array is too big, recreate index using gist__intbig_ops opclass instead"))); r = new_intArrayType(lenr); dr = ARRPTR(r); diff --git a/contrib/intarray/expected/_int.out b/contrib/intarray/expected/_int.out index c92a56524a..d6218df47f 100644 --- a/contrib/intarray/expected/_int.out +++ b/contrib/intarray/expected/_int.out @@ -547,6 +547,8 @@ SELECT count(*) from test__int WHERE a @@ '!20 & !21'; 6343 (1 row) +INSERT INTO test__int SELECT array(SELECT x FROM generate_series(1, 1001) x); -- should fail +ERROR: input array is too big (199 maximum allowed, 1001 current), use gist__intbig_ops opclass instead DROP INDEX text_idx; CREATE INDEX text_idx on test__int using gist ( a gist__intbig_ops ); SELECT count(*) from test__int WHERE a && '{23,50}'; diff --git a/contrib/intarray/sql/_int.sql b/contrib/intarray/sql/_int.sql index 6ca7e3cca7..72a85c9638 100644 --- a/contrib/intarray/sql/_int.sql +++ b/contrib/intarray/sql/_int.sql @@ -110,6 +110,8 @@ SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)'; SELECT count(*) from test__int WHERE a @@ '20 | !21'; SELECT count(*) from test__int WHERE a @@ '!20 & !21'; +INSERT INTO test__int SELECT array(SELECT x FROM generate_series(1, 1001) x); -- should fail + DROP INDEX text_idx; CREATE INDEX text_idx on test__int using gist ( a gist__intbig_ops );