diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c index d8a58160f4..4461141e66 100644 --- a/src/backend/utils/adt/varbit.c +++ b/src/backend/utils/adt/varbit.c @@ -3,6 +3,21 @@ * varbit.c * Functions for the SQL datatypes BIT() and BIT VARYING(). * + * The data structure contains the following elements: + * header -- length of the whole data structure (incl header) + * in bytes (as with all varying length datatypes) + * data section -- private data section for the bits data structures + * bitlength -- length of the bit string in bits + * bitdata -- bit string, most significant byte first + * + * The length of the bitdata vector should always be exactly as many + * bytes as are needed for the given bitlength. If the bitlength is + * not a multiple of 8, the extra low-order padding bits of the last + * byte must be zeroes. + * + * attypmod is defined as the length of the bit string in bits, or for + * varying bits the maximum length. + * * Code originally contributed by Adriaan Joubert. * * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group @@ -26,6 +41,40 @@ #define HEXDIG(z) ((z)<10 ? ((z)+'0') : ((z)-10+'A')) +/* Mask off any bits that should be zero in the last byte of a bitstring */ +#define VARBIT_PAD(vb) \ + do { \ + int32 pad_ = VARBITPAD(vb); \ + Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \ + if (pad_ > 0) \ + *(VARBITS(vb) + VARBITBYTES(vb) - 1) &= BITMASK << pad_; \ + } while (0) + +/* + * Many functions work byte-by-byte, so they have a pointer handy to the + * last-plus-one byte, which saves a cycle or two. + */ +#define VARBIT_PAD_LAST(vb, ptr) \ + do { \ + int32 pad_ = VARBITPAD(vb); \ + Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \ + if (pad_ > 0) \ + *((ptr) - 1) &= BITMASK << pad_; \ + } while (0) + +/* Assert proper padding of a bitstring */ +#ifdef USE_ASSERT_CHECKING +#define VARBIT_CORRECTLY_PADDED(vb) \ + do { \ + int32 pad_ = VARBITPAD(vb); \ + Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \ + Assert(pad_ == 0 || \ + (*(VARBITS(vb) + VARBITBYTES(vb) - 1) & ~(BITMASK << pad_)) == 0); \ + } while (0) +#else +#define VARBIT_CORRECTLY_PADDED(vb) ((void) 0) +#endif + static VarBit *bit_catenate(VarBit *arg1, VarBit *arg2); static VarBit *bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified); @@ -86,24 +135,6 @@ anybit_typmodout(int32 typmod) } -/*---------- - * attypmod -- contains the length of the bit string in bits, or for - * varying bits the maximum length. - * - * The data structure contains the following elements: - * header -- length of the whole data structure (incl header) - * in bytes. (as with all varying length datatypes) - * data section -- private data section for the bits data structures - * bitlength -- length of the bit string in bits - * bitdata -- bit string, most significant byte first - * - * The length of the bitdata vector should always be exactly as many - * bytes as are needed for the given bitlength. If the bitlength is - * not a multiple of 8, the extra low-order padding bits of the last - * byte must be zeroes. - *---------- - */ - /* * bit_in - * converts a char string to the internal representation of a bitstring. @@ -263,6 +294,9 @@ bit_out(PG_FUNCTION_ARGS) len, bitlen; + /* Assertion to help catch any bit functions that don't pad correctly */ + VARBIT_CORRECTLY_PADDED(s); + bitlen = VARBITLEN(s); len = (bitlen + 3) / 4; result = (char *) palloc(len + 2); @@ -303,8 +337,6 @@ bit_recv(PG_FUNCTION_ARGS) VarBit *result; int len, bitlen; - int ipad; - bits8 mask; bitlen = pq_getmsgint(buf, sizeof(int32)); if (bitlen < 0 || bitlen > VARBITMAXLEN) @@ -329,13 +361,8 @@ bit_recv(PG_FUNCTION_ARGS) pq_copymsgbytes(buf, (char *) VARBITS(result), VARBITBYTES(result)); - /* Make sure last byte is zero-padded if needed */ - ipad = VARBITPAD(result); - if (ipad > 0) - { - mask = BITMASK << ipad; - *(VARBITS(result) + VARBITBYTES(result) - 1) &= mask; - } + /* Make sure last byte is correctly zero-padded */ + VARBIT_PAD(result); PG_RETURN_VARBIT_P(result); } @@ -366,8 +393,6 @@ bit(PG_FUNCTION_ARGS) bool isExplicit = PG_GETARG_BOOL(2); VarBit *result; int rlen; - int ipad; - bits8 mask; /* No work if typmod is invalid or supplied data matches it already */ if (len <= 0 || len > VARBITMAXLEN || len == VARBITLEN(arg)) @@ -393,12 +418,7 @@ bit(PG_FUNCTION_ARGS) * if source data was shorter than target length (we assume the last byte * of the source data was itself correctly zero-padded). */ - ipad = VARBITPAD(result); - if (ipad > 0) - { - mask = BITMASK << ipad; - *(VARBITS(result) + VARBITBYTES(result) - 1) &= mask; - } + VARBIT_PAD(result); PG_RETURN_VARBIT_P(result); } @@ -573,6 +593,9 @@ varbit_out(PG_FUNCTION_ARGS) k, len; + /* Assertion to help catch any bit functions that don't pad correctly */ + VARBIT_CORRECTLY_PADDED(s); + len = VARBITLEN(s); result = (char *) palloc(len + 1); sp = VARBITS(s); @@ -619,8 +642,6 @@ varbit_recv(PG_FUNCTION_ARGS) VarBit *result; int len, bitlen; - int ipad; - bits8 mask; bitlen = pq_getmsgint(buf, sizeof(int32)); if (bitlen < 0 || bitlen > VARBITMAXLEN) @@ -645,13 +666,8 @@ varbit_recv(PG_FUNCTION_ARGS) pq_copymsgbytes(buf, (char *) VARBITS(result), VARBITBYTES(result)); - /* Make sure last byte is zero-padded if needed */ - ipad = VARBITPAD(result); - if (ipad > 0) - { - mask = BITMASK << ipad; - *(VARBITS(result) + VARBITBYTES(result) - 1) &= mask; - } + /* Make sure last byte is correctly zero-padded */ + VARBIT_PAD(result); PG_RETURN_VARBIT_P(result); } @@ -719,8 +735,6 @@ varbit(PG_FUNCTION_ARGS) bool isExplicit = PG_GETARG_BOOL(2); VarBit *result; int rlen; - int ipad; - bits8 mask; /* No work if typmod is invalid or supplied data matches it already */ if (len <= 0 || len >= VARBITLEN(arg)) @@ -739,13 +753,8 @@ varbit(PG_FUNCTION_ARGS) memcpy(VARBITS(result), VARBITS(arg), VARBITBYTES(result)); - /* Make sure last byte is zero-padded if needed */ - ipad = VARBITPAD(result); - if (ipad > 0) - { - mask = BITMASK << ipad; - *(VARBITS(result) + VARBITBYTES(result) - 1) &= mask; - } + /* Make sure last byte is correctly zero-padded */ + VARBIT_PAD(result); PG_RETURN_VARBIT_P(result); } @@ -1003,6 +1012,8 @@ bit_catenate(VarBit *arg1, VarBit *arg2) } } + /* The pad bits should be already zero at this point */ + return result; } @@ -1036,14 +1047,12 @@ bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified) int bitlen, rbitlen, len, - ipad = 0, ishift, i; int e, s1, e1; - bits8 mask, - *r, + bits8 *r, *ps; bitlen = VARBITLEN(arg); @@ -1108,13 +1117,9 @@ bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified) r++; } } - /* Do we need to pad at the end? */ - ipad = VARBITPAD(result); - if (ipad > 0) - { - mask = BITMASK << ipad; - *(VARBITS(result) + len - 1) &= mask; - } + + /* Make sure last byte is correctly zero-padded */ + VARBIT_PAD(result); } return result; @@ -1236,7 +1241,7 @@ bit_and(PG_FUNCTION_ARGS) for (i = 0; i < VARBITBYTES(arg1); i++) *r++ = *p1++ & *p2++; - /* Padding is not needed as & of 0 pad is 0 */ + /* Padding is not needed as & of 0 pads is 0 */ PG_RETURN_VARBIT_P(result); } @@ -1258,7 +1263,6 @@ bit_or(PG_FUNCTION_ARGS) bits8 *p1, *p2, *r; - bits8 mask; bitlen1 = VARBITLEN(arg1); bitlen2 = VARBITLEN(arg2); @@ -1277,13 +1281,7 @@ bit_or(PG_FUNCTION_ARGS) for (i = 0; i < VARBITBYTES(arg1); i++) *r++ = *p1++ | *p2++; - /* Pad the result */ - mask = BITMASK << VARBITPAD(result); - if (mask) - { - r--; - *r &= mask; - } + /* Padding is not needed as | of 0 pads is 0 */ PG_RETURN_VARBIT_P(result); } @@ -1305,7 +1303,6 @@ bitxor(PG_FUNCTION_ARGS) bits8 *p1, *p2, *r; - bits8 mask; bitlen1 = VARBITLEN(arg1); bitlen2 = VARBITLEN(arg2); @@ -1325,13 +1322,7 @@ bitxor(PG_FUNCTION_ARGS) for (i = 0; i < VARBITBYTES(arg1); i++) *r++ = *p1++ ^ *p2++; - /* Pad the result */ - mask = BITMASK << VARBITPAD(result); - if (mask) - { - r--; - *r &= mask; - } + /* Padding is not needed as ^ of 0 pads is 0 */ PG_RETURN_VARBIT_P(result); } @@ -1347,7 +1338,6 @@ bitnot(PG_FUNCTION_ARGS) VarBit *result; bits8 *p, *r; - bits8 mask; result = (VarBit *) palloc(VARSIZE(arg)); SET_VARSIZE(result, VARSIZE(arg)); @@ -1358,13 +1348,8 @@ bitnot(PG_FUNCTION_ARGS) for (; p < VARBITEND(arg); p++) *r++ = ~*p; - /* Pad the result */ - mask = BITMASK << VARBITPAD(result); - if (mask) - { - r--; - *r &= mask; - } + /* Must zero-pad the result, because extra bits are surely 1's here */ + VARBIT_PAD_LAST(result, r); PG_RETURN_VARBIT_P(result); } @@ -1431,6 +1416,8 @@ bitshiftleft(PG_FUNCTION_ARGS) *r = 0; } + /* The pad bits should be already zero at this point */ + PG_RETURN_VARBIT_P(result); } @@ -1497,6 +1484,8 @@ bitshiftright(PG_FUNCTION_ARGS) if ((++r) < VARBITEND(result)) *r = (*p << (BITS_PER_BYTE - ishift)) & BITMASK; } + /* We may have shifted 1's into the pad bits, so fix that */ + VARBIT_PAD_LAST(result, r); } PG_RETURN_VARBIT_P(result); diff --git a/src/include/utils/varbit.h b/src/include/utils/varbit.h index 3278cb5461..c0826bf655 100644 --- a/src/include/utils/varbit.h +++ b/src/include/utils/varbit.h @@ -21,6 +21,11 @@ /* * Modeled on struct varlena from postgres.h, but data type is bits8. + * + * Caution: if bit_len is not a multiple of BITS_PER_BYTE, the low-order + * bits of the last byte of bit_dat[] are unused and MUST be zeroes. + * (This allows bit_cmp() to not bother masking the last byte.) + * Also, there should not be any excess bytes counted in the header length. */ typedef struct { diff --git a/src/test/regress/expected/bit.out b/src/test/regress/expected/bit.out index ef8aaea3ef..255b39b07e 100644 --- a/src/test/regress/expected/bit.out +++ b/src/test/regress/expected/bit.out @@ -477,6 +477,50 @@ SELECT POSITION(B'1101' IN b), 0 | 0 | 0000000000000001 (16 rows) +SELECT b, b >> 1 AS bsr, b << 1 AS bsl + FROM BIT_SHIFT_TABLE ; + b | bsr | bsl +------------------+------------------+------------------ + 1101100000000000 | 0110110000000000 | 1011000000000000 + 0110110000000000 | 0011011000000000 | 1101100000000000 + 0011011000000000 | 0001101100000000 | 0110110000000000 + 0001101100000000 | 0000110110000000 | 0011011000000000 + 0000110110000000 | 0000011011000000 | 0001101100000000 + 0000011011000000 | 0000001101100000 | 0000110110000000 + 0000001101100000 | 0000000110110000 | 0000011011000000 + 0000000110110000 | 0000000011011000 | 0000001101100000 + 0000000011011000 | 0000000001101100 | 0000000110110000 + 0000000001101100 | 0000000000110110 | 0000000011011000 + 0000000000110110 | 0000000000011011 | 0000000001101100 + 0000000000011011 | 0000000000001101 | 0000000000110110 + 0000000000001101 | 0000000000000110 | 0000000000011010 + 0000000000000110 | 0000000000000011 | 0000000000001100 + 0000000000000011 | 0000000000000001 | 0000000000000110 + 0000000000000001 | 0000000000000000 | 0000000000000010 +(16 rows) + +SELECT b::bit(15), b::bit(15) >> 1 AS bsr, b::bit(15) << 1 AS bsl + FROM BIT_SHIFT_TABLE ; + b | bsr | bsl +-----------------+-----------------+----------------- + 110110000000000 | 011011000000000 | 101100000000000 + 011011000000000 | 001101100000000 | 110110000000000 + 001101100000000 | 000110110000000 | 011011000000000 + 000110110000000 | 000011011000000 | 001101100000000 + 000011011000000 | 000001101100000 | 000110110000000 + 000001101100000 | 000000110110000 | 000011011000000 + 000000110110000 | 000000011011000 | 000001101100000 + 000000011011000 | 000000001101100 | 000000110110000 + 000000001101100 | 000000000110110 | 000000011011000 + 000000000110110 | 000000000011011 | 000000001101100 + 000000000011011 | 000000000001101 | 000000000110110 + 000000000001101 | 000000000000110 | 000000000011010 + 000000000000110 | 000000000000011 | 000000000001100 + 000000000000011 | 000000000000001 | 000000000000110 + 000000000000001 | 000000000000000 | 000000000000010 + 000000000000000 | 000000000000000 | 000000000000000 +(16 rows) + CREATE TABLE VARBIT_SHIFT_TABLE(v BIT VARYING(20)); INSERT INTO VARBIT_SHIFT_TABLE VALUES (B'11011'); INSERT INTO VARBIT_SHIFT_TABLE SELECT CAST(v || B'0' AS BIT VARYING(6)) >>1 FROM VARBIT_SHIFT_TABLE; @@ -507,6 +551,28 @@ SELECT POSITION(B'1101' IN v), 16 | 16 | 00000000000000011011 (16 rows) +SELECT v, v >> 1 AS vsr, v << 1 AS vsl + FROM VARBIT_SHIFT_TABLE ; + v | vsr | vsl +----------------------+----------------------+---------------------- + 11011 | 01101 | 10110 + 011011 | 001101 | 110110 + 0011011 | 0001101 | 0110110 + 00011011 | 00001101 | 00110110 + 000011011 | 000001101 | 000110110 + 0000011011 | 0000001101 | 0000110110 + 00000011011 | 00000001101 | 00000110110 + 000000011011 | 000000001101 | 000000110110 + 0000000011011 | 0000000001101 | 0000000110110 + 00000000011011 | 00000000001101 | 00000000110110 + 000000000011011 | 000000000001101 | 000000000110110 + 0000000000011011 | 0000000000001101 | 0000000000110110 + 00000000000011011 | 00000000000001101 | 00000000000110110 + 000000000000011011 | 000000000000001101 | 000000000000110110 + 0000000000000011011 | 0000000000000001101 | 0000000000000110110 + 00000000000000011011 | 00000000000000001101 | 00000000000000110110 +(16 rows) + DROP TABLE BIT_SHIFT_TABLE; DROP TABLE VARBIT_SHIFT_TABLE; -- Get/Set bit diff --git a/src/test/regress/sql/bit.sql b/src/test/regress/sql/bit.sql index a73da8cb8f..f47b869920 100644 --- a/src/test/regress/sql/bit.sql +++ b/src/test/regress/sql/bit.sql @@ -168,6 +168,10 @@ SELECT POSITION(B'1101' IN b), POSITION(B'11011' IN b), b FROM BIT_SHIFT_TABLE ; +SELECT b, b >> 1 AS bsr, b << 1 AS bsl + FROM BIT_SHIFT_TABLE ; +SELECT b::bit(15), b::bit(15) >> 1 AS bsr, b::bit(15) << 1 AS bsl + FROM BIT_SHIFT_TABLE ; CREATE TABLE VARBIT_SHIFT_TABLE(v BIT VARYING(20)); @@ -180,7 +184,8 @@ SELECT POSITION(B'1101' IN v), POSITION(B'11011' IN v), v FROM VARBIT_SHIFT_TABLE ; - +SELECT v, v >> 1 AS vsr, v << 1 AS vsl + FROM VARBIT_SHIFT_TABLE ; DROP TABLE BIT_SHIFT_TABLE; DROP TABLE VARBIT_SHIFT_TABLE;