Use a safer outfuncs/readfuncs representation for BitStrings.

For a long time, our outfuncs.c code has supposed that the string
contents of a BitString node could just be printed literally with
no concern for quoting/escaping.  Now, that's okay if the string
literal contains only valid binary or hex digits ... but our lexer
doesn't check that, preferring to let bitin() be the sole authority
on what's valid.  So we could have raw parse trees that contain
incorrect BitString literals, and that can result in failures when
WRITE_READ_PARSE_PLAN_TREES debugging is enabled.

Fix by using outToken() to print the string field, and debackslash()
to read it.  This results in a change in the emitted representation
only in cases that would have failed before, and don't represent valid
SQL in the first place.  Between that and the fact that we don't store
raw parse trees in the catalogs, I judge this safe to apply without a
catversion bump.

Per bug #18340 from Alexander Lakhin.  Back-patch to v16; before that,
we lacked readfuncs support for BitString nodes, so that the problem
was only cosmetic.

Discussion: https://postgr.es/m/18340-4aa1ae6ed4121912@postgresql.org
This commit is contained in:
Tom Lane 2024-02-13 12:18:25 -05:00
parent c1fc502f59
commit 0736a8ef6f
4 changed files with 32 additions and 10 deletions

View File

@ -680,8 +680,13 @@ _outString(StringInfo str, const String *node)
static void
_outBitString(StringInfo str, const BitString *node)
{
/* internal representation already has leading 'b' */
appendStringInfoString(str, node->bsval);
/*
* The lexer will always produce a string starting with 'b' or 'x'. There
* might be characters following that that need escaping, but outToken
* won't escape the 'b' or 'x'. This is relied on by nodeTokenType.
*/
Assert(node->bsval[0] == 'b' || node->bsval[0] == 'x');
outToken(str, node->bsval);
}
static void

View File

@ -498,14 +498,9 @@ nodeRead(const char *token, int tok_len)
result = (Node *) makeString(debackslash(token + 1, tok_len - 2));
break;
case T_BitString:
{
char *val = palloc(tok_len + 1);
memcpy(val, token, tok_len);
val[tok_len] = '\0';
result = (Node *) makeBitString(val);
break;
}
/* need to remove backslashes, but there are no quotes */
result = (Node *) makeBitString(debackslash(token, tok_len));
break;
default:
elog(ERROR, "unrecognized node type: %d", (int) type);
result = NULL; /* keep compiler happy */

View File

@ -40,6 +40,23 @@ SELECT * FROM VARBIT_TABLE;
01010101010
(4 rows)
-- Literals with syntax errors
SELECT b' 0';
ERROR: " " is not a valid binary digit
LINE 1: SELECT b' 0';
^
SELECT b'0 ';
ERROR: " " is not a valid binary digit
LINE 1: SELECT b'0 ';
^
SELECT x' 0';
ERROR: " " is not a valid hexadecimal digit
LINE 1: SELECT x' 0';
^
SELECT x'0 ';
ERROR: " " is not a valid hexadecimal digit
LINE 1: SELECT x'0 ';
^
-- Concatenation
SELECT v, b, (v || b) AS concat
FROM BIT_TABLE, VARBIT_TABLE

View File

@ -29,6 +29,11 @@ INSERT INTO VARBIT_TABLE VALUES (B'101011111010'); -- too long
--INSERT INTO VARBIT_TABLE VALUES ('X555');
SELECT * FROM VARBIT_TABLE;
-- Literals with syntax errors
SELECT b' 0';
SELECT b'0 ';
SELECT x' 0';
SELECT x'0 ';
-- Concatenation
SELECT v, b, (v || b) AS concat