Adjust bytea get_bit/set_bit to use int8 not int4 for bit numbering.

Since the existing bit number argument can't exceed INT32_MAX, it's
not possible for these functions to manipulate bits beyond the first
256MB of a bytea value.  Lift that restriction by redeclaring the
bit number arguments as int8 (which requires a catversion bump,
hence is not back-patchable).

The similarly-named functions for bit/varbit don't really have a
problem because we restrict those types to at most VARBITMAXLEN bits;
hence leave them alone.

While here, extend the encode/decode functions in utils/adt/encode.c
to allow dealing with values wider than 1GB.  This is not a live bug
or restriction in current usage, because no input could be more than
1GB, and since none of the encoders can expand a string more than 4X,
the result size couldn't overflow uint32.  But it might be desirable
to support more in future, so make the input length values size_t
and the potential-output-length values uint64.

Also add some test cases to improve the miserable code coverage
of these functions.

Movead Li, editorialized some by me; also reviewed by Ashutosh Bapat

Discussion: https://postgr.es/m/20200312115135445367128@highgo.ca
This commit is contained in:
Tom Lane 2020-04-07 15:57:58 -04:00
parent 9c74ceb20b
commit 26a944cf29
8 changed files with 217 additions and 71 deletions

View File

@ -2905,7 +2905,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
<indexterm>
<primary>get_bit</primary>
</indexterm>
<literal><function>get_bit(<parameter>bytes</parameter> <type>bytea</type>, <parameter>offset</parameter> <type>int</type>)</function></literal>
<literal><function>get_bit(<parameter>bytes</parameter> <type>bytea</type>, <parameter>offset</parameter> <type>bigint</type>)</function></literal>
</entry>
<entry><type>int</type></entry>
<entry>
@ -2990,7 +2990,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
<primary>set_bit</primary>
</indexterm>
<literal><function>set_bit(<parameter>bytes</parameter> <type>bytea</type>,
<parameter>offset</parameter> <type>int</type>,
<parameter>offset</parameter> <type>bigint</type>,
<parameter>newvalue</parameter> <type>int</type>)</function></literal>
</entry>
<entry><type>bytea</type></entry>

View File

@ -16,14 +16,24 @@
#include <ctype.h>
#include "utils/builtins.h"
#include "utils/memutils.h"
/*
* Encoding conversion API.
* encode_len() and decode_len() compute the amount of space needed, while
* encode() and decode() perform the actual conversions. It is okay for
* the _len functions to return an overestimate, but not an underestimate.
* (Having said that, large overestimates could cause unnecessary errors,
* so it's better to get it right.) The conversion routines write to the
* buffer at *res and return the true length of their output.
*/
struct pg_encoding
{
unsigned (*encode_len) (const char *data, unsigned dlen);
unsigned (*decode_len) (const char *data, unsigned dlen);
unsigned (*encode) (const char *data, unsigned dlen, char *res);
unsigned (*decode) (const char *data, unsigned dlen, char *res);
uint64 (*encode_len) (const char *data, size_t dlen);
uint64 (*decode_len) (const char *data, size_t dlen);
uint64 (*encode) (const char *data, size_t dlen, char *res);
uint64 (*decode) (const char *data, size_t dlen, char *res);
};
static const struct pg_encoding *pg_find_encoding(const char *name);
@ -39,13 +49,12 @@ binary_encode(PG_FUNCTION_ARGS)
Datum name = PG_GETARG_DATUM(1);
text *result;
char *namebuf;
int datalen,
resultlen,
res;
char *dataptr;
size_t datalen;
uint64 resultlen;
uint64 res;
const struct pg_encoding *enc;
datalen = VARSIZE_ANY_EXHDR(data);
namebuf = TextDatumGetCString(name);
enc = pg_find_encoding(namebuf);
@ -54,10 +63,23 @@ binary_encode(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized encoding: \"%s\"", namebuf)));
resultlen = enc->encode_len(VARDATA_ANY(data), datalen);
dataptr = VARDATA_ANY(data);
datalen = VARSIZE_ANY_EXHDR(data);
resultlen = enc->encode_len(dataptr, datalen);
/*
* resultlen possibly overflows uint32, therefore on 32-bit machines it's
* unsafe to rely on palloc's internal check.
*/
if (resultlen > MaxAllocSize - VARHDRSZ)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("result of encoding conversion is too large")));
result = palloc(VARHDRSZ + resultlen);
res = enc->encode(VARDATA_ANY(data), datalen, VARDATA(result));
res = enc->encode(dataptr, datalen, VARDATA(result));
/* Make this FATAL 'cause we've trodden on memory ... */
if (res > resultlen)
@ -75,13 +97,12 @@ binary_decode(PG_FUNCTION_ARGS)
Datum name = PG_GETARG_DATUM(1);
bytea *result;
char *namebuf;
int datalen,
resultlen,
res;
char *dataptr;
size_t datalen;
uint64 resultlen;
uint64 res;
const struct pg_encoding *enc;
datalen = VARSIZE_ANY_EXHDR(data);
namebuf = TextDatumGetCString(name);
enc = pg_find_encoding(namebuf);
@ -90,10 +111,23 @@ binary_decode(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized encoding: \"%s\"", namebuf)));
resultlen = enc->decode_len(VARDATA_ANY(data), datalen);
dataptr = VARDATA_ANY(data);
datalen = VARSIZE_ANY_EXHDR(data);
resultlen = enc->decode_len(dataptr, datalen);
/*
* resultlen possibly overflows uint32, therefore on 32-bit machines it's
* unsafe to rely on palloc's internal check.
*/
if (resultlen > MaxAllocSize - VARHDRSZ)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("result of decoding conversion is too large")));
result = palloc(VARHDRSZ + resultlen);
res = enc->decode(VARDATA_ANY(data), datalen, VARDATA(result));
res = enc->decode(dataptr, datalen, VARDATA(result));
/* Make this FATAL 'cause we've trodden on memory ... */
if (res > resultlen)
@ -122,8 +156,8 @@ static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
unsigned
hex_encode(const char *src, unsigned len, char *dst)
uint64
hex_encode(const char *src, size_t len, char *dst)
{
const char *end = src + len;
@ -133,7 +167,7 @@ hex_encode(const char *src, unsigned len, char *dst)
*dst++ = hextbl[*src & 0xF];
src++;
}
return len * 2;
return (uint64) len * 2;
}
static inline char
@ -152,8 +186,8 @@ get_hex(char c)
return (char) res;
}
unsigned
hex_decode(const char *src, unsigned len, char *dst)
uint64
hex_decode(const char *src, size_t len, char *dst)
{
const char *s,
*srcend;
@ -184,16 +218,16 @@ hex_decode(const char *src, unsigned len, char *dst)
return p - dst;
}
static unsigned
hex_enc_len(const char *src, unsigned srclen)
static uint64
hex_enc_len(const char *src, size_t srclen)
{
return srclen << 1;
return (uint64) srclen << 1;
}
static unsigned
hex_dec_len(const char *src, unsigned srclen)
static uint64
hex_dec_len(const char *src, size_t srclen)
{
return srclen >> 1;
return (uint64) srclen >> 1;
}
/*
@ -214,8 +248,8 @@ static const int8 b64lookup[128] = {
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
};
static unsigned
pg_base64_encode(const char *src, unsigned len, char *dst)
static uint64
pg_base64_encode(const char *src, size_t len, char *dst)
{
char *p,
*lend = dst + 76;
@ -261,8 +295,8 @@ pg_base64_encode(const char *src, unsigned len, char *dst)
return p - dst;
}
static unsigned
pg_base64_decode(const char *src, unsigned len, char *dst)
static uint64
pg_base64_decode(const char *src, size_t len, char *dst)
{
const char *srcend = src + len,
*s = src;
@ -331,17 +365,17 @@ pg_base64_decode(const char *src, unsigned len, char *dst)
}
static unsigned
pg_base64_enc_len(const char *src, unsigned srclen)
static uint64
pg_base64_enc_len(const char *src, size_t srclen)
{
/* 3 bytes will be converted to 4, linefeed after 76 chars */
return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
return ((uint64) srclen + 2) * 4 / 3 + (uint64) srclen / (76 * 3 / 4);
}
static unsigned
pg_base64_dec_len(const char *src, unsigned srclen)
static uint64
pg_base64_dec_len(const char *src, size_t srclen)
{
return (srclen * 3) >> 2;
return ((uint64) srclen * 3) >> 2;
}
/*
@ -361,12 +395,12 @@ pg_base64_dec_len(const char *src, unsigned srclen)
#define VAL(CH) ((CH) - '0')
#define DIG(VAL) ((VAL) + '0')
static unsigned
esc_encode(const char *src, unsigned srclen, char *dst)
static uint64
esc_encode(const char *src, size_t srclen, char *dst)
{
const char *end = src + srclen;
char *rp = dst;
int len = 0;
uint64 len = 0;
while (src < end)
{
@ -400,12 +434,12 @@ esc_encode(const char *src, unsigned srclen, char *dst)
return len;
}
static unsigned
esc_decode(const char *src, unsigned srclen, char *dst)
static uint64
esc_decode(const char *src, size_t srclen, char *dst)
{
const char *end = src + srclen;
char *rp = dst;
int len = 0;
uint64 len = 0;
while (src < end)
{
@ -448,11 +482,11 @@ esc_decode(const char *src, unsigned srclen, char *dst)
return len;
}
static unsigned
esc_enc_len(const char *src, unsigned srclen)
static uint64
esc_enc_len(const char *src, size_t srclen)
{
const char *end = src + srclen;
int len = 0;
uint64 len = 0;
while (src < end)
{
@ -469,11 +503,11 @@ esc_enc_len(const char *src, unsigned srclen)
return len;
}
static unsigned
esc_dec_len(const char *src, unsigned srclen)
static uint64
esc_dec_len(const char *src, size_t srclen)
{
const char *end = src + srclen;
int len = 0;
uint64 len = 0;
while (src < end)
{

View File

@ -389,7 +389,7 @@ byteaout(PG_FUNCTION_ARGS)
{
/* Print traditional escaped format */
char *vp;
int len;
uint64 len;
int i;
len = 1; /* empty string has 1 char */
@ -403,7 +403,18 @@ byteaout(PG_FUNCTION_ARGS)
else
len++;
}
/*
* In principle len can't overflow uint32 if the input fit in 1GB, but
* for safety let's check rather than relying on palloc's internal
* check.
*/
if (len > MaxAllocSize)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg_internal("result of bytea output conversion is too large")));
rp = result = (char *) palloc(len);
vp = VARDATA_ANY(vlena);
for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
{
@ -3456,7 +3467,7 @@ Datum
byteaGetBit(PG_FUNCTION_ARGS)
{
bytea *v = PG_GETARG_BYTEA_PP(0);
int32 n = PG_GETARG_INT32(1);
int64 n = PG_GETARG_INT64(1);
int byteNo,
bitNo;
int len;
@ -3464,14 +3475,15 @@ byteaGetBit(PG_FUNCTION_ARGS)
len = VARSIZE_ANY_EXHDR(v);
if (n < 0 || n >= len * 8)
if (n < 0 || n >= (int64) len * 8)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("index %d out of valid range, 0..%d",
n, len * 8 - 1)));
errmsg("index %lld out of valid range, 0..%lld",
(long long) n, (long long) len * 8 - 1)));
byteNo = n / 8;
bitNo = n % 8;
/* n/8 is now known < len, so safe to cast to int */
byteNo = (int) (n / 8);
bitNo = (int) (n % 8);
byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
@ -3525,7 +3537,7 @@ Datum
byteaSetBit(PG_FUNCTION_ARGS)
{
bytea *res = PG_GETARG_BYTEA_P_COPY(0);
int32 n = PG_GETARG_INT32(1);
int64 n = PG_GETARG_INT64(1);
int32 newBit = PG_GETARG_INT32(2);
int len;
int oldByte,
@ -3535,14 +3547,15 @@ byteaSetBit(PG_FUNCTION_ARGS)
len = VARSIZE(res) - VARHDRSZ;
if (n < 0 || n >= len * 8)
if (n < 0 || n >= (int64) len * 8)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("index %d out of valid range, 0..%d",
n, len * 8 - 1)));
errmsg("index %lld out of valid range, 0..%lld",
(long long) n, (long long) len * 8 - 1)));
byteNo = n / 8;
bitNo = n % 8;
/* n/8 is now known < len, so safe to cast to int */
byteNo = (int) (n / 8);
bitNo = (int) (n % 8);
/*
* sanity check!

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202004062
#define CATALOG_VERSION_NO 202004071
#endif

View File

@ -1439,10 +1439,10 @@
proname => 'set_byte', prorettype => 'bytea',
proargtypes => 'bytea int4 int4', prosrc => 'byteaSetByte' },
{ oid => '723', descr => 'get bit',
proname => 'get_bit', prorettype => 'int4', proargtypes => 'bytea int4',
proname => 'get_bit', prorettype => 'int4', proargtypes => 'bytea int8',
prosrc => 'byteaGetBit' },
{ oid => '724', descr => 'set bit',
proname => 'set_bit', prorettype => 'bytea', proargtypes => 'bytea int4 int4',
proname => 'set_bit', prorettype => 'bytea', proargtypes => 'bytea int8 int4',
prosrc => 'byteaSetBit' },
{ oid => '749', descr => 'substitute portion of string',
proname => 'overlay', prorettype => 'bytea',

View File

@ -32,8 +32,8 @@ extern int errdatatype(Oid datatypeOid);
extern int errdomainconstraint(Oid datatypeOid, const char *conname);
/* encode.c */
extern unsigned hex_encode(const char *src, unsigned len, char *dst);
extern unsigned hex_decode(const char *src, unsigned len, char *dst);
extern uint64 hex_encode(const char *src, size_t len, char *dst);
extern uint64 hex_decode(const char *src, size_t len, char *dst);
/* int.c */
extern int2vector *buildint2vector(const int16 *int2s, int n);

View File

@ -1726,6 +1726,82 @@ SELECT sha512('The quick brown fox jumps over the lazy dog.');
\x91ea1245f20d46ae9a037a989f54f1f790f0a47607eeb8a14d12890cea77a1bbc6c7ed9cf205e67b7f2b8fd4c7dfd3a7a8617e45f3c463d481c7e586c39ac1ed
(1 row)
--
-- encode/decode
--
SELECT encode('\x1234567890abcdef00', 'hex');
encode
--------------------
1234567890abcdef00
(1 row)
SELECT decode('1234567890abcdef00', 'hex');
decode
----------------------
\x1234567890abcdef00
(1 row)
SELECT encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea, 'base64');
encode
------------------------------------------------------------------------------
EjRWeJCrze8AARI0VniQq83vAAESNFZ4kKvN7wABEjRWeJCrze8AARI0VniQq83vAAESNFZ4kKvN+
7wABEjRWeJCrze8AAQ==
(1 row)
SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea,
'base64'), 'base64');
decode
------------------------------------------------------------------------------------------------------------------------------------------------
\x1234567890abcdef00011234567890abcdef00011234567890abcdef00011234567890abcdef00011234567890abcdef00011234567890abcdef00011234567890abcdef0001
(1 row)
SELECT encode('\x1234567890abcdef00', 'escape');
encode
-----------------------------
\x124Vx\220\253\315\357\000
(1 row)
SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
decode
----------------------
\x1234567890abcdef00
(1 row)
--
-- get_bit/set_bit etc
--
SELECT get_bit('\x1234567890abcdef00'::bytea, 43);
get_bit
---------
1
(1 row)
SELECT get_bit('\x1234567890abcdef00'::bytea, 99); -- error
ERROR: index 99 out of valid range, 0..71
SELECT set_bit('\x1234567890abcdef00'::bytea, 43, 0);
set_bit
----------------------
\x1234567890a3cdef00
(1 row)
SELECT set_bit('\x1234567890abcdef00'::bytea, 99, 0); -- error
ERROR: index 99 out of valid range, 0..71
SELECT get_byte('\x1234567890abcdef00'::bytea, 3);
get_byte
----------
120
(1 row)
SELECT get_byte('\x1234567890abcdef00'::bytea, 99); -- error
ERROR: index 99 out of valid range, 0..8
SELECT set_byte('\x1234567890abcdef00'::bytea, 7, 11);
set_byte
----------------------
\x1234567890abcd0b00
(1 row)
SELECT set_byte('\x1234567890abcdef00'::bytea, 99, 11); -- error
ERROR: index 99 out of valid range, 0..8
--
-- test behavior of escape_string_warning and standard_conforming_strings options
--

View File

@ -597,6 +597,29 @@ SELECT sha384('The quick brown fox jumps over the lazy dog.');
SELECT sha512('');
SELECT sha512('The quick brown fox jumps over the lazy dog.');
--
-- encode/decode
--
SELECT encode('\x1234567890abcdef00', 'hex');
SELECT decode('1234567890abcdef00', 'hex');
SELECT encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea, 'base64');
SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea,
'base64'), 'base64');
SELECT encode('\x1234567890abcdef00', 'escape');
SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
--
-- get_bit/set_bit etc
--
SELECT get_bit('\x1234567890abcdef00'::bytea, 43);
SELECT get_bit('\x1234567890abcdef00'::bytea, 99); -- error
SELECT set_bit('\x1234567890abcdef00'::bytea, 43, 0);
SELECT set_bit('\x1234567890abcdef00'::bytea, 99, 0); -- error
SELECT get_byte('\x1234567890abcdef00'::bytea, 3);
SELECT get_byte('\x1234567890abcdef00'::bytea, 99); -- error
SELECT set_byte('\x1234567890abcdef00'::bytea, 7, 11);
SELECT set_byte('\x1234567890abcdef00'::bytea, 99, 11); -- error
--
-- test behavior of escape_string_warning and standard_conforming_strings options
--