pg_clean_ascii(): escape bytes rather than lose them

Rather than replace each unprintable byte with a '?' character, replace
it with a hex escape instead. The API now allocates a copy rather than
modifying the input in place.

Author: Jacob Champion <jchampion@timescale.com>
Discussion: https://www.postgresql.org/message-id/CAAWbhmgsvHrH9wLU2kYc3pOi1KSenHSLAHBbCVmmddW6-mc_=w@mail.gmail.com
This commit is contained in:
Peter Eisentraut 2022-09-13 16:10:44 +02:00
parent da5d4ea5aa
commit 45b1a67a0f
4 changed files with 67 additions and 19 deletions

View File

@ -2280,11 +2280,7 @@ retry1:
*/
if (strcmp(nameptr, "application_name") == 0)
{
char *tmp_app_name = pstrdup(valptr);
pg_clean_ascii(tmp_app_name);
port->application_name = tmp_app_name;
port->application_name = pg_clean_ascii(valptr, 0);
}
}
offset = valoffset + strlen(valptr) + 1;

View File

@ -12921,9 +12921,18 @@ assign_maintenance_io_concurrency(int newval, void *extra)
static bool
check_application_name(char **newval, void **extra, GucSource source)
{
/* Only allow clean ASCII chars in the application name */
pg_clean_ascii(*newval);
char *clean;
/* Only allow clean ASCII chars in the application name */
clean = pg_clean_ascii(*newval, MCXT_ALLOC_NO_OOM);
if (!clean)
return false;
clean = guc_strdup(WARNING, clean);
if (!clean)
return false;
*newval = clean;
return true;
}
@ -12937,9 +12946,18 @@ assign_application_name(const char *newval, void *extra)
static bool
check_cluster_name(char **newval, void **extra, GucSource source)
{
/* Only allow clean ASCII chars in the cluster name */
pg_clean_ascii(*newval);
char *clean;
/* Only allow clean ASCII chars in the cluster name */
clean = pg_clean_ascii(*newval, MCXT_ALLOC_NO_OOM);
if (!clean)
return false;
clean = guc_strdup(WARNING, clean);
if (!clean)
return false;
*newval = clean;
return true;
}

View File

@ -22,6 +22,7 @@
#endif
#include "common/string.h"
#include "lib/stringinfo.h"
/*
@ -59,9 +60,12 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
/*
* pg_clean_ascii -- Replace any non-ASCII chars with a '?' char
* pg_clean_ascii -- Replace any non-ASCII chars with a "\xXX" string
*
* Modifies the string passed in which must be '\0'-terminated.
* Makes a newly allocated copy of the string passed in, which must be
* '\0'-terminated. In the backend, additional alloc_flags may be provided and
* will be passed as-is to palloc_extended(); in the frontend, alloc_flags is
* ignored and the copy is malloc'd.
*
* This function exists specifically to deal with filtering out
* non-ASCII characters in a few places where the client can provide an almost
@ -73,22 +77,52 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base)
* In general, this function should NOT be used- instead, consider how to handle
* the string without needing to filter out the non-ASCII characters.
*
* Ultimately, we'd like to improve the situation to not require stripping out
* all non-ASCII but perform more intelligent filtering which would allow UTF or
* Ultimately, we'd like to improve the situation to not require replacing all
* non-ASCII but perform more intelligent filtering which would allow UTF or
* similar, but it's unclear exactly what we should allow, so stick to ASCII only
* for now.
*/
void
pg_clean_ascii(char *str)
char *
pg_clean_ascii(const char *str, int alloc_flags)
{
/* Only allow clean ASCII chars in the string */
char *p;
size_t dstlen;
char *dst;
const char *p;
size_t i = 0;
/* Worst case, each byte can become four bytes, plus a null terminator. */
dstlen = strlen(str) * 4 + 1;
#ifdef FRONTEND
dst = malloc(dstlen);
#else
dst = palloc_extended(dstlen, alloc_flags);
#endif
if (!dst)
return NULL;
for (p = str; *p != '\0'; p++)
{
/* Only allow clean ASCII chars in the string */
if (*p < 32 || *p > 126)
*p = '?';
{
Assert(i < (dstlen - 3));
snprintf(&dst[i], dstlen - i, "\\x%02x", (unsigned char) *p);
i += 4;
}
else
{
Assert(i < dstlen);
dst[i] = *p;
i++;
}
}
Assert(i < dstlen);
dst[i] = '\0';
return dst;
}

View File

@ -24,7 +24,7 @@ typedef struct PromptInterruptContext
extern bool pg_str_endswith(const char *str, const char *end);
extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr,
int base);
extern void pg_clean_ascii(char *str);
extern char *pg_clean_ascii(const char *str, int alloc_flags);
extern int pg_strip_crlf(char *str);
extern bool pg_is_ascii(const char *str);