postgresql/contrib/intarray/_int_tool.c
Andrew Gierth 757c5182f2 Avoid crashes in contrib/intarray gist__int_ops (bug #15518)
1. Integer overflow in internal_size could result in memory corruption
in decompression since a zero-length array would be allocated and then
written to. This leads to crashes or corruption when traversing an
index which has been populated with sufficiently sparse values. Fix by
using int64 for computations and checking for overflow.

2. Integer overflow in g_int_compress could cause pessimal merge
choices, resulting in unnecessarily large ranges (which would in turn
trigger issue 1 above). Fix by using int64 again.

3. Even without overflow, array sizes could become large enough to
cause unexplained memory allocation errors. Fix by capping the sizes
to a safe limit and report actual errors pointing at gist__intbig_ops
as needed.

4. Large inputs to the compression function always consist of large
runs of consecutive integers, and the compression loop was processing
these one at a time in an O(N^2) manner with a lot of overhead. The
expected runtime of this function could easily exceed 6 months for a
single call as a result. Fix by performing a linear-time first pass,
which reduces the worst case to something on the order of seconds.

Backpatch all the way, since this has been wrong forever.

Per bug #15518 from report from irc user "dymk", analysis and patch by
me.

Discussion: https://postgr.es/m/15518-799e426c3b4f8358@postgresql.org
2018-11-24 08:48:36 +00:00

422 lines
6.6 KiB
C

/*
* contrib/intarray/_int_tool.c
*/
#include "postgres.h"
#include <limits.h>
#include "catalog/pg_type.h"
#include "_int.h"
/* arguments are assumed sorted & unique-ified */
bool
inner_int_contains(ArrayType *a, ArrayType *b)
{
int na,
nb;
int i,
j,
n;
int *da,
*db;
na = ARRNELEMS(a);
nb = ARRNELEMS(b);
da = ARRPTR(a);
db = ARRPTR(b);
i = j = n = 0;
while (i < na && j < nb)
{
if (da[i] < db[j])
i++;
else if (da[i] == db[j])
{
n++;
i++;
j++;
}
else
break; /* db[j] is not in da */
}
return (n == nb) ? true : false;
}
/* arguments are assumed sorted */
bool
inner_int_overlap(ArrayType *a, ArrayType *b)
{
int na,
nb;
int i,
j;
int *da,
*db;
na = ARRNELEMS(a);
nb = ARRNELEMS(b);
da = ARRPTR(a);
db = ARRPTR(b);
i = j = 0;
while (i < na && j < nb)
{
if (da[i] < db[j])
i++;
else if (da[i] == db[j])
return true;
else
j++;
}
return false;
}
ArrayType *
inner_int_union(ArrayType *a, ArrayType *b)
{
ArrayType *r = NULL;
CHECKARRVALID(a);
CHECKARRVALID(b);
if (ARRISEMPTY(a) && ARRISEMPTY(b))
return new_intArrayType(0);
if (ARRISEMPTY(a))
r = copy_intArrayType(b);
if (ARRISEMPTY(b))
r = copy_intArrayType(a);
if (!r)
{
int na = ARRNELEMS(a),
nb = ARRNELEMS(b);
int *da = ARRPTR(a),
*db = ARRPTR(b);
int i,
j,
*dr;
r = new_intArrayType(na + nb);
dr = ARRPTR(r);
/* union */
i = j = 0;
while (i < na && j < nb)
{
if (da[i] == db[j])
{
*dr++ = da[i++];
j++;
}
else if (da[i] < db[j])
*dr++ = da[i++];
else
*dr++ = db[j++];
}
while (i < na)
*dr++ = da[i++];
while (j < nb)
*dr++ = db[j++];
r = resize_intArrayType(r, dr - ARRPTR(r));
}
if (ARRNELEMS(r) > 1)
r = _int_unique(r);
return r;
}
ArrayType *
inner_int_inter(ArrayType *a, ArrayType *b)
{
ArrayType *r;
int na,
nb;
int *da,
*db,
*dr;
int i,
j,
k;
if (ARRISEMPTY(a) || ARRISEMPTY(b))
return new_intArrayType(0);
na = ARRNELEMS(a);
nb = ARRNELEMS(b);
da = ARRPTR(a);
db = ARRPTR(b);
r = new_intArrayType(Min(na, nb));
dr = ARRPTR(r);
i = j = k = 0;
while (i < na && j < nb)
{
if (da[i] < db[j])
i++;
else if (da[i] == db[j])
{
if (k == 0 || dr[k - 1] != db[j])
dr[k++] = db[j];
i++;
j++;
}
else
j++;
}
if (k == 0)
{
pfree(r);
return new_intArrayType(0);
}
else
return resize_intArrayType(r, k);
}
void
rt__int_size(ArrayType *a, float *size)
{
*size = (float) ARRNELEMS(a);
}
/* qsort_arg comparison function for isort() */
static int
isort_cmp(const void *a, const void *b, void *arg)
{
int32 aval = *((const int32 *) a);
int32 bval = *((const int32 *) b);
if (aval < bval)
return -1;
if (aval > bval)
return 1;
/*
* Report if we have any duplicates. If there are equal keys, qsort must
* compare them at some point, else it wouldn't know whether one should go
* before or after the other.
*/
*((bool *) arg) = true;
return 0;
}
/* Sort the given data (len >= 2). Return true if any duplicates found */
bool
isort(int32 *a, int len)
{
bool r = false;
qsort_arg(a, len, sizeof(int32), isort_cmp, (void *) &r);
return r;
}
/* Create a new int array with room for "num" elements */
ArrayType *
new_intArrayType(int num)
{
ArrayType *r;
int nbytes;
/* if no elements, return a zero-dimensional array */
if (num <= 0)
{
Assert(num == 0);
r = construct_empty_array(INT4OID);
return r;
}
nbytes = ARR_OVERHEAD_NONULLS(1) + sizeof(int) * num;
r = (ArrayType *) palloc0(nbytes);
SET_VARSIZE(r, nbytes);
ARR_NDIM(r) = 1;
r->dataoffset = 0; /* marker for no null bitmap */
ARR_ELEMTYPE(r) = INT4OID;
ARR_DIMS(r)[0] = num;
ARR_LBOUND(r)[0] = 1;
return r;
}
ArrayType *
resize_intArrayType(ArrayType *a, int num)
{
int nbytes;
int i;
/* if no elements, return a zero-dimensional array */
if (num <= 0)
{
Assert(num == 0);
ARR_NDIM(a) = 0;
return a;
}
if (num == ARRNELEMS(a))
return a;
nbytes = ARR_DATA_OFFSET(a) + sizeof(int) * num;
a = (ArrayType *) repalloc(a, nbytes);
SET_VARSIZE(a, nbytes);
/* usually the array should be 1-D already, but just in case ... */
for (i = 0; i < ARR_NDIM(a); i++)
{
ARR_DIMS(a)[i] = num;
num = 1;
}
return a;
}
ArrayType *
copy_intArrayType(ArrayType *a)
{
ArrayType *r;
int n = ARRNELEMS(a);
r = new_intArrayType(n);
memcpy(ARRPTR(r), ARRPTR(a), n * sizeof(int32));
return r;
}
/* num for compressed key */
int
internal_size(int *a, int len)
{
int i;
int64 size = 0;
for (i = 0; i < len; i += 2)
{
if (!i || a[i] != a[i - 1]) /* do not count repeated range */
size += (int64)(a[i + 1]) - (int64)(a[i]) + 1;
}
if (size > (int64)INT_MAX || size < (int64)INT_MIN)
return -1; /* overflow */
return (int) size;
}
/* unique-ify elements of r in-place ... r must be sorted already */
ArrayType *
_int_unique(ArrayType *r)
{
int *tmp,
*dr,
*data;
int num = ARRNELEMS(r);
if (num < 2)
return r;
data = tmp = dr = ARRPTR(r);
while (tmp - data < num)
{
if (*tmp != *dr)
*(++dr) = *tmp++;
else
tmp++;
}
return resize_intArrayType(r, dr + 1 - ARRPTR(r));
}
void
gensign(BITVEC sign, int *a, int len)
{
int i;
/* we assume that the sign vector is previously zeroed */
for (i = 0; i < len; i++)
{
HASH(sign, *a);
a++;
}
}
int32
intarray_match_first(ArrayType *a, int32 elem)
{
int32 *aa,
c,
i;
CHECKARRVALID(a);
c = ARRNELEMS(a);
aa = ARRPTR(a);
for (i = 0; i < c; i++)
if (aa[i] == elem)
return (i + 1);
return 0;
}
ArrayType *
intarray_add_elem(ArrayType *a, int32 elem)
{
ArrayType *result;
int32 *r;
int32 c;
CHECKARRVALID(a);
c = ARRNELEMS(a);
result = new_intArrayType(c + 1);
r = ARRPTR(result);
if (c > 0)
memcpy(r, ARRPTR(a), c * sizeof(int32));
r[c] = elem;
return result;
}
ArrayType *
intarray_concat_arrays(ArrayType *a, ArrayType *b)
{
ArrayType *result;
int32 ac = ARRNELEMS(a);
int32 bc = ARRNELEMS(b);
CHECKARRVALID(a);
CHECKARRVALID(b);
result = new_intArrayType(ac + bc);
if (ac)
memcpy(ARRPTR(result), ARRPTR(a), ac * sizeof(int32));
if (bc)
memcpy(ARRPTR(result) + ac, ARRPTR(b), bc * sizeof(int32));
return result;
}
ArrayType *
int_to_intset(int32 n)
{
ArrayType *result;
int32 *aa;
result = new_intArrayType(1);
aa = ARRPTR(result);
aa[0] = n;
return result;
}
int
compASC(const void *a, const void *b)
{
if (*(const int32 *) a == *(const int32 *) b)
return 0;
return (*(const int32 *) a > *(const int32 *) b) ? 1 : -1;
}
int
compDESC(const void *a, const void *b)
{
if (*(const int32 *) a == *(const int32 *) b)
return 0;
return (*(const int32 *) a < *(const int32 *) b) ? 1 : -1;
}