Improve pglz_decompress() so that it cannot clobber memory beyond the

available output buffer when presented with corrupt input.  Some testing
suggests that this slows the decompression loop about 1%, which seems an
acceptable price to pay for more robustness.  (Curiously, the penalty
seems to be *less* on not-very-compressible data, which I didn't expect
since the overhead per output byte ought to be more in the literal-bytes
path.)

Patch from Zdenek Kotala.  I fixed a corner case and did some renaming
of variables to make the routine more readable.
This commit is contained in:
Tom Lane 2008-03-08 01:09:36 +00:00
parent ad434473eb
commit 9c767ad57b
1 changed files with 45 additions and 30 deletions

View File

@ -166,7 +166,7 @@
* *
* Copyright (c) 1999-2008, PostgreSQL Global Development Group * Copyright (c) 1999-2008, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/backend/utils/adt/pg_lzcompress.c,v 1.30 2008/03/07 23:20:21 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/pg_lzcompress.c,v 1.31 2008/03/08 01:09:36 tgl Exp $
* ---------- * ----------
*/ */
#include "postgres.h" #include "postgres.h"
@ -641,26 +641,26 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
void void
pglz_decompress(const PGLZ_Header *source, char *dest) pglz_decompress(const PGLZ_Header *source, char *dest)
{ {
const unsigned char *dp; const unsigned char *sp;
const unsigned char *dend; const unsigned char *srcend;
unsigned char *bp; unsigned char *dp;
unsigned char ctrl; unsigned char *destend;
int32 ctrlc;
int32 len;
int32 off;
int32 destsize;
dp = ((const unsigned char *) source) + sizeof(PGLZ_Header); sp = ((const unsigned char *) source) + sizeof(PGLZ_Header);
dend = ((const unsigned char *) source) + VARSIZE(source); srcend = ((const unsigned char *) source) + VARSIZE(source);
bp = (unsigned char *) dest; dp = (unsigned char *) dest;
destend = dp + source->rawsize;
while (dp < dend) while (sp < srcend && dp < destend)
{ {
/* /*
* Read one control byte and process the next 8 items. * Read one control byte and process the next 8 items (or as many
* as remain in the compressed input).
*/ */
ctrl = *dp++; unsigned char ctrl = *sp++;
for (ctrlc = 0; ctrlc < 8 && dp < dend; ctrlc++) int ctrlc;
for (ctrlc = 0; ctrlc < 8 && sp < srcend; ctrlc++)
{ {
if (ctrl & 1) if (ctrl & 1)
{ {
@ -671,11 +671,27 @@ pglz_decompress(const PGLZ_Header *source, char *dest)
* coded as 18, another extension tag byte tells how much * coded as 18, another extension tag byte tells how much
* longer the match really was (0-255). * longer the match really was (0-255).
*/ */
len = (dp[0] & 0x0f) + 3; int32 len;
off = ((dp[0] & 0xf0) << 4) | dp[1]; int32 off;
dp += 2;
len = (sp[0] & 0x0f) + 3;
off = ((sp[0] & 0xf0) << 4) | sp[1];
sp += 2;
if (len == 18) if (len == 18)
len += *dp++; len += *sp++;
/*
* Check for output buffer overrun, to ensure we don't
* clobber memory in case of corrupt input. Note: we must
* advance dp here to ensure the error is detected below
* the loop. We don't simply put the elog inside the loop
* since that will probably interfere with optimization.
*/
if (dp + len > destend)
{
dp += len;
break;
}
/* /*
* Now we copy the bytes specified by the tag from OUTPUT to * Now we copy the bytes specified by the tag from OUTPUT to
@ -685,8 +701,8 @@ pglz_decompress(const PGLZ_Header *source, char *dest)
*/ */
while (len--) while (len--)
{ {
*bp = bp[-off]; *dp = dp[-off];
bp++; dp++;
} }
} }
else else
@ -695,7 +711,10 @@ pglz_decompress(const PGLZ_Header *source, char *dest)
* An unset control bit means LITERAL BYTE. So we just copy * An unset control bit means LITERAL BYTE. So we just copy
* one from INPUT to OUTPUT. * one from INPUT to OUTPUT.
*/ */
*bp++ = *dp++; if (dp >= destend) /* check for buffer overrun */
break; /* do not clobber memory */
*dp++ = *sp++;
} }
/* /*
@ -706,14 +725,10 @@ pglz_decompress(const PGLZ_Header *source, char *dest)
} }
/* /*
* Check we decompressed the right amount, else die. This is a FATAL * Check we decompressed the right amount.
* condition if we tromped on more memory than expected (we assume we have
* not tromped on shared memory, though, so need not PANIC).
*/ */
destsize = (char *) bp - dest; if (dp != destend || sp != srcend)
if (destsize != source->rawsize) elog(ERROR, "compressed data is corrupt");
elog(destsize > source->rawsize ? FATAL : ERROR,
"compressed data is corrupt");
/* /*
* That's it. * That's it.