postgresql/src/fe_utils/string_utils.c

1018 lines
26 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* String-processing utility routines for frontend code
*
* Assorted utility functions that are useful in constructing SQL queries
* and interpreting backend output.
*
*
2017-01-03 19:48:53 +01:00
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/fe_utils/string_utils.c
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include <ctype.h>
#include "fe_utils/string_utils.h"
#include "common/keywords.h"
static PQExpBuffer defaultGetLocalPQExpBuffer(void);
/* Globals exported by this file */
int quote_all_identifiers = 0;
PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer;
/*
* Returns a temporary PQExpBuffer, valid until the next call to the function.
* This is used by fmtId and fmtQualifiedId.
*
* Non-reentrant and non-thread-safe but reduces memory leakage. You can
* replace this with a custom version by setting the getLocalPQExpBuffer
* function pointer.
*/
static PQExpBuffer
defaultGetLocalPQExpBuffer(void)
{
static PQExpBuffer id_return = NULL;
if (id_return) /* first time through? */
{
/* same buffer, just wipe contents */
resetPQExpBuffer(id_return);
}
else
{
/* new buffer */
id_return = createPQExpBuffer();
}
return id_return;
}
/*
* Quotes input string if it's not a legitimate SQL identifier as-is.
*
* Note that the returned string must be used before calling fmtId again,
* since we re-use the same return buffer each time.
*/
const char *
fmtId(const char *rawid)
{
PQExpBuffer id_return = getLocalPQExpBuffer();
const char *cp;
bool need_quotes = false;
/*
* These checks need to match the identifier production in scan.l. Don't
* use islower() etc.
*/
if (quote_all_identifiers)
need_quotes = true;
/* slightly different rules for first character */
else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_'))
need_quotes = true;
else
{
/* otherwise check the entire string */
for (cp = rawid; *cp; cp++)
{
if (!((*cp >= 'a' && *cp <= 'z')
|| (*cp >= '0' && *cp <= '9')
|| (*cp == '_')))
{
need_quotes = true;
break;
}
}
}
if (!need_quotes)
{
/*
* Check for keyword. We quote keywords except for unreserved ones.
* (In some cases we could avoid quoting a col_name or type_func_name
* keyword, but it seems much harder than it's worth to tell that.)
*
* Note: ScanKeywordLookup() does case-insensitive comparison, but
* that's fine, since we already know we have all-lower-case.
*/
const ScanKeyword *keyword = ScanKeywordLookup(rawid,
ScanKeywords,
NumScanKeywords);
if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
need_quotes = true;
}
if (!need_quotes)
{
/* no quoting needed */
appendPQExpBufferStr(id_return, rawid);
}
else
{
appendPQExpBufferChar(id_return, '"');
for (cp = rawid; *cp; cp++)
{
/*
* Did we find a double-quote in the string? Then make this a
* double double-quote per SQL99. Before, we put in a
* backslash/double-quote pair. - thomas 2000-08-05
*/
if (*cp == '"')
appendPQExpBufferChar(id_return, '"');
appendPQExpBufferChar(id_return, *cp);
}
appendPQExpBufferChar(id_return, '"');
}
return id_return->data;
}
/*
* fmtQualifiedId - convert a qualified name to the proper format for
* the source database.
*
* Like fmtId, use the result before calling again.
*
* Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
* use that buffer until we're finished with calling fmtId().
*/
const char *
fmtQualifiedId(int remoteVersion, const char *schema, const char *id)
{
PQExpBuffer id_return;
PQExpBuffer lcl_pqexp = createPQExpBuffer();
/* Suppress schema name if fetching from pre-7.3 DB */
if (remoteVersion >= 70300 && schema && *schema)
{
appendPQExpBuffer(lcl_pqexp, "%s.", fmtId(schema));
}
appendPQExpBufferStr(lcl_pqexp, fmtId(id));
id_return = getLocalPQExpBuffer();
appendPQExpBufferStr(id_return, lcl_pqexp->data);
destroyPQExpBuffer(lcl_pqexp);
return id_return->data;
}
/*
* Format a Postgres version number (in the PG_VERSION_NUM integer format
* returned by PQserverVersion()) as a string. This exists mainly to
* encapsulate knowledge about two-part vs. three-part version numbers.
*
* For reentrancy, caller must supply the buffer the string is put in.
* Recommended size of the buffer is 32 bytes.
*
* Returns address of 'buf', as a notational convenience.
*/
char *
formatPGVersionNumber(int version_number, bool include_minor,
char *buf, size_t buflen)
{
if (version_number >= 100000)
{
/* New two-part style */
if (include_minor)
snprintf(buf, buflen, "%d.%d", version_number / 10000,
version_number % 10000);
else
snprintf(buf, buflen, "%d", version_number / 10000);
}
else
{
/* Old three-part style */
if (include_minor)
snprintf(buf, buflen, "%d.%d.%d", version_number / 10000,
(version_number / 100) % 100,
version_number % 100);
else
snprintf(buf, buflen, "%d.%d", version_number / 10000,
(version_number / 100) % 100);
}
return buf;
}
/*
* Convert a string value to an SQL string literal and append it to
* the given buffer. We assume the specified client_encoding and
* standard_conforming_strings settings.
*
* This is essentially equivalent to libpq's PQescapeStringInternal,
* except for the output buffer structure. We need it in situations
* where we do not have a PGconn available. Where we do,
* appendStringLiteralConn is a better choice.
*/
void
appendStringLiteral(PQExpBuffer buf, const char *str,
int encoding, bool std_strings)
{
size_t length = strlen(str);
const char *source = str;
char *target;
if (!enlargePQExpBuffer(buf, 2 * length + 2))
return;
target = buf->data + buf->len;
*target++ = '\'';
while (*source != '\0')
{
char c = *source;
int len;
int i;
/* Fast path for plain ASCII */
if (!IS_HIGHBIT_SET(c))
{
/* Apply quoting if needed */
if (SQL_STR_DOUBLE(c, !std_strings))
*target++ = c;
/* Copy the character */
*target++ = c;
source++;
continue;
}
/* Slow path for possible multibyte characters */
len = PQmblen(source, encoding);
/* Copy the character */
for (i = 0; i < len; i++)
{
if (*source == '\0')
break;
*target++ = *source++;
}
/*
* If we hit premature end of string (ie, incomplete multibyte
* character), try to pad out to the correct length with spaces. We
* may not be able to pad completely, but we will always be able to
* insert at least one pad space (since we'd not have quoted a
* multibyte character). This should be enough to make a string that
* the server will error out on.
*/
if (i < len)
{
char *stop = buf->data + buf->maxlen - 2;
for (; i < len; i++)
{
if (target >= stop)
break;
*target++ = ' ';
}
break;
}
}
/* Write the terminating quote and NUL character. */
*target++ = '\'';
*target = '\0';
buf->len = target - buf->data;
}
/*
* Convert a string value to an SQL string literal and append it to
* the given buffer. Encoding and string syntax rules are as indicated
* by current settings of the PGconn.
*/
void
appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn)
{
size_t length = strlen(str);
/*
* XXX This is a kluge to silence escape_string_warning in our utility
* programs. It should go away someday.
*/
if (strchr(str, '\\') != NULL && PQserverVersion(conn) >= 80100)
{
/* ensure we are not adjacent to an identifier */
if (buf->len > 0 && buf->data[buf->len - 1] != ' ')
appendPQExpBufferChar(buf, ' ');
appendPQExpBufferChar(buf, ESCAPE_STRING_SYNTAX);
appendStringLiteral(buf, str, PQclientEncoding(conn), false);
return;
}
/* XXX end kluge */
if (!enlargePQExpBuffer(buf, 2 * length + 2))
return;
appendPQExpBufferChar(buf, '\'');
buf->len += PQescapeStringConn(conn, buf->data + buf->len,
str, length, NULL);
appendPQExpBufferChar(buf, '\'');
}
/*
* Convert a string value to a dollar quoted literal and append it to
* the given buffer. If the dqprefix parameter is not NULL then the
* dollar quote delimiter will begin with that (after the opening $).
*
* No escaping is done at all on str, in compliance with the rules
* for parsing dollar quoted strings. Also, we need not worry about
* encoding issues.
*/
void
appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix)
{
static const char suffixes[] = "_XXXXXXX";
int nextchar = 0;
PQExpBuffer delimBuf = createPQExpBuffer();
/* start with $ + dqprefix if not NULL */
appendPQExpBufferChar(delimBuf, '$');
if (dqprefix)
appendPQExpBufferStr(delimBuf, dqprefix);
/*
* Make sure we choose a delimiter which (without the trailing $) is not
* present in the string being quoted. We don't check with the trailing $
* because a string ending in $foo must not be quoted with $foo$.
*/
while (strstr(str, delimBuf->data) != NULL)
{
appendPQExpBufferChar(delimBuf, suffixes[nextchar++]);
nextchar %= sizeof(suffixes) - 1;
}
/* add trailing $ */
appendPQExpBufferChar(delimBuf, '$');
/* quote it and we are all done */
appendPQExpBufferStr(buf, delimBuf->data);
appendPQExpBufferStr(buf, str);
appendPQExpBufferStr(buf, delimBuf->data);
destroyPQExpBuffer(delimBuf);
}
/*
* Convert a bytea value (presented as raw bytes) to an SQL string literal
* and append it to the given buffer. We assume the specified
* standard_conforming_strings setting.
*
* This is needed in situations where we do not have a PGconn available.
* Where we do, PQescapeByteaConn is a better choice.
*/
void
appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length,
bool std_strings)
{
const unsigned char *source = str;
char *target;
static const char hextbl[] = "0123456789abcdef";
/*
* This implementation is hard-wired to produce hex-format output. We do
* not know the server version the output will be loaded into, so making
* an intelligent format choice is impossible. It might be better to
* always use the old escaped format.
*/
if (!enlargePQExpBuffer(buf, 2 * length + 5))
return;
target = buf->data + buf->len;
*target++ = '\'';
if (!std_strings)
*target++ = '\\';
*target++ = '\\';
*target++ = 'x';
while (length-- > 0)
{
unsigned char c = *source++;
*target++ = hextbl[(c >> 4) & 0xF];
*target++ = hextbl[c & 0xF];
}
/* Write the terminating quote and NUL character. */
*target++ = '\'';
*target = '\0';
buf->len = target - buf->data;
}
/*
* Append the given string to the shell command being built in the buffer,
* with shell-style quoting as needed to create exactly one argument.
*
* Forbid LF or CR characters, which have scant practical use beyond designing
* security breaches. The Windows command shell is unusable as a conduit for
* arguments containing LF or CR characters. A future major release should
* reject those characters in CREATE ROLE and CREATE DATABASE, because use
* there eventually leads to errors here.
*
* appendShellString() simply prints an error and dies if LF or CR appears.
* appendShellStringNoError() omits those characters from the result, and
* returns false if there were any.
*/
void
appendShellString(PQExpBuffer buf, const char *str)
{
if (!appendShellStringNoError(buf, str))
{
fprintf(stderr,
_("shell command argument contains a newline or carriage return: \"%s\"\n"),
str);
exit(EXIT_FAILURE);
}
}
bool
appendShellStringNoError(PQExpBuffer buf, const char *str)
{
#ifdef WIN32
int backslash_run_length = 0;
#endif
bool ok = true;
const char *p;
/*
* Don't bother with adding quotes if the string is nonempty and clearly
* contains only safe characters.
*/
if (*str != '\0' &&
strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str))
{
appendPQExpBufferStr(buf, str);
return ok;
}
#ifndef WIN32
appendPQExpBufferChar(buf, '\'');
for (p = str; *p; p++)
{
if (*p == '\n' || *p == '\r')
{
ok = false;
continue;
}
if (*p == '\'')
appendPQExpBufferStr(buf, "'\"'\"'");
else
appendPQExpBufferChar(buf, *p);
}
appendPQExpBufferChar(buf, '\'');
#else /* WIN32 */
/*
* A Windows system() argument experiences two layers of interpretation.
* First, cmd.exe interprets the string. Its behavior is undocumented,
* but a caret escapes any byte except LF or CR that would otherwise have
* special meaning. Handling of a caret before LF or CR differs between
* "cmd.exe /c" and other modes, and it is unusable here.
*
* Second, the new process parses its command line to construct argv (see
* https://msdn.microsoft.com/en-us/library/17w5ykft.aspx). This treats
* backslash-double quote sequences specially.
*/
appendPQExpBufferStr(buf, "^\"");
for (p = str; *p; p++)
{
if (*p == '\n' || *p == '\r')
{
ok = false;
continue;
}
/* Change N backslashes before a double quote to 2N+1 backslashes. */
if (*p == '"')
{
while (backslash_run_length)
{
appendPQExpBufferStr(buf, "^\\");
backslash_run_length--;
}
appendPQExpBufferStr(buf, "^\\");
}
else if (*p == '\\')
backslash_run_length++;
else
backslash_run_length = 0;
/*
* Decline to caret-escape the most mundane characters, to ease
* debugging and lest we approach the command length limit.
*/
if (!((*p >= 'a' && *p <= 'z') ||
(*p >= 'A' && *p <= 'Z') ||
(*p >= '0' && *p <= '9')))
appendPQExpBufferChar(buf, '^');
appendPQExpBufferChar(buf, *p);
}
/*
* Change N backslashes at end of argument to 2N backslashes, because they
* precede the double quote that terminates the argument.
*/
while (backslash_run_length)
{
appendPQExpBufferStr(buf, "^\\");
backslash_run_length--;
}
appendPQExpBufferStr(buf, "^\"");
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
#endif /* WIN32 */
return ok;
}
/*
* Append the given string to the buffer, with suitable quoting for passing
* the string as a value, in a keyword/pair value in a libpq connection
* string
*/
void
appendConnStrVal(PQExpBuffer buf, const char *str)
{
const char *s;
bool needquotes;
/*
* If the string is one or more plain ASCII characters, no need to quote
* it. This is quite conservative, but better safe than sorry.
*/
needquotes = true;
for (s = str; *s; s++)
{
if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
(*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
{
needquotes = true;
break;
}
needquotes = false;
}
if (needquotes)
{
appendPQExpBufferChar(buf, '\'');
while (*str)
{
/* ' and \ must be escaped by to \' and \\ */
if (*str == '\'' || *str == '\\')
appendPQExpBufferChar(buf, '\\');
appendPQExpBufferChar(buf, *str);
str++;
}
appendPQExpBufferChar(buf, '\'');
}
else
appendPQExpBufferStr(buf, str);
}
/*
* Append a psql meta-command that connects to the given database with the
* then-current connection's user, host and port.
*/
void
appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname)
{
const char *s;
2017-06-21 20:39:04 +02:00
bool complex;
/*
* If the name is plain ASCII characters, emit a trivial "\connect "foo"".
* For other names, even many not technically requiring it, skip to the
* general case. No database has a zero-length name.
*/
complex = false;
2017-06-21 20:39:04 +02:00
for (s = dbname; *s; s++)
{
if (*s == '\n' || *s == '\r')
{
fprintf(stderr,
_("database name contains a newline or carriage return: \"%s\"\n"),
dbname);
exit(EXIT_FAILURE);
}
if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
(*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
{
complex = true;
}
}
appendPQExpBufferStr(buf, "\\connect ");
if (complex)
{
PQExpBufferData connstr;
initPQExpBuffer(&connstr);
appendPQExpBuffer(&connstr, "dbname=");
appendConnStrVal(&connstr, dbname);
appendPQExpBuffer(buf, "-reuse-previous=on ");
/*
* As long as the name does not contain a newline, SQL identifier
* quoting satisfies the psql meta-command parser. Prefer not to
* involve psql-interpreted single quotes, which behaved differently
* before PostgreSQL 9.2.
*/
appendPQExpBufferStr(buf, fmtId(connstr.data));
termPQExpBuffer(&connstr);
}
else
appendPQExpBufferStr(buf, fmtId(dbname));
appendPQExpBufferChar(buf, '\n');
}
/*
* Deconstruct the text representation of a 1-dimensional Postgres array
* into individual items.
*
* On success, returns true and sets *itemarray and *nitems to describe
* an array of individual strings. On parse failure, returns false;
* *itemarray may exist or be NULL.
*
* NOTE: free'ing itemarray is sufficient to deallocate the working storage.
*/
bool
parsePGArray(const char *atext, char ***itemarray, int *nitems)
{
int inputlen;
char **items;
char *strings;
int curitem;
/*
* We expect input in the form of "{item,item,item}" where any item is
* either raw data, or surrounded by double quotes (in which case embedded
* characters including backslashes and quotes are backslashed).
*
* We build the result as an array of pointers followed by the actual
* string data, all in one malloc block for convenience of deallocation.
* The worst-case storage need is not more than one pointer and one
* character for each input character (consider "{,,,,,,,,,,}").
*/
*itemarray = NULL;
*nitems = 0;
inputlen = strlen(atext);
if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}')
return false; /* bad input */
items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char)));
if (items == NULL)
return false; /* out of memory */
*itemarray = items;
strings = (char *) (items + inputlen);
atext++; /* advance over initial '{' */
curitem = 0;
while (*atext != '}')
{
if (*atext == '\0')
return false; /* premature end of string */
items[curitem] = strings;
while (*atext != '}' && *atext != ',')
{
if (*atext == '\0')
return false; /* premature end of string */
if (*atext != '"')
*strings++ = *atext++; /* copy unquoted data */
else
{
/* process quoted substring */
atext++;
while (*atext != '"')
{
if (*atext == '\0')
return false; /* premature end of string */
if (*atext == '\\')
{
atext++;
if (*atext == '\0')
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
return false; /* premature end of string */
}
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
*strings++ = *atext++; /* copy quoted data */
}
atext++;
}
}
*strings++ = '\0';
if (*atext == ',')
atext++;
curitem++;
}
if (atext[1] != '\0')
return false; /* bogus syntax (embedded '}') */
*nitems = curitem;
return true;
}
/*
* Format a reloptions array and append it to the given buffer.
*
* "prefix" is prepended to the option names; typically it's "" or "toast.".
*
* Returns false if the reloptions array could not be parsed (in which case
* nothing will have been appended to the buffer), or true on success.
*
* Note: this logic should generally match the backend's flatten_reloptions()
* (in adt/ruleutils.c).
*/
bool
appendReloptionsArray(PQExpBuffer buffer, const char *reloptions,
const char *prefix, int encoding, bool std_strings)
{
char **options;
int noptions;
int i;
if (!parsePGArray(reloptions, &options, &noptions))
{
if (options)
free(options);
return false;
}
for (i = 0; i < noptions; i++)
{
char *option = options[i];
char *name;
char *separator;
char *value;
/*
* Each array element should have the form name=value. If the "=" is
* missing for some reason, treat it like an empty value.
*/
name = option;
separator = strchr(option, '=');
if (separator)
{
*separator = '\0';
value = separator + 1;
}
else
value = "";
if (i > 0)
appendPQExpBufferStr(buffer, ", ");
appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name));
/*
* In general we need to quote the value; but to avoid unnecessary
* clutter, do not quote if it is an identifier that would not need
* quoting. (We could also allow numbers, but that is a bit trickier
* than it looks --- for example, are leading zeroes significant? We
* don't want to assume very much here about what custom reloptions
* might mean.)
*/
if (strcmp(fmtId(value), value) == 0)
appendPQExpBufferStr(buffer, value);
else
appendStringLiteral(buffer, value, encoding, std_strings);
}
if (options)
free(options);
return true;
}
/*
* processSQLNamePattern
*
* Scan a wildcard-pattern string and generate appropriate WHERE clauses
* to limit the set of objects returned. The WHERE clauses are appended
* to the already-partially-constructed query in buf. Returns whether
* any clause was added.
*
* conn: connection query will be sent to (consulted for escaping rules).
* buf: output parameter.
* pattern: user-specified pattern option, or NULL if none ("*" is implied).
* have_where: true if caller already emitted "WHERE" (clauses will be ANDed
* onto the existing WHERE clause).
* force_escape: always quote regexp special characters, even outside
* double quotes (else they are quoted only between double quotes).
* schemavar: name of query variable to match against a schema-name pattern.
* Can be NULL if no schema.
* namevar: name of query variable to match against an object-name pattern.
* altnamevar: NULL, or name of an alternative variable to match against name.
* visibilityrule: clause to use if we want to restrict to visible objects
* (for example, "pg_catalog.pg_table_is_visible(p.oid)"). Can be NULL.
*
* Formatting note: the text already present in buf should end with a newline.
* The appended text, if any, will end with one too.
*/
bool
processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
bool have_where, bool force_escape,
const char *schemavar, const char *namevar,
const char *altnamevar, const char *visibilityrule)
{
PQExpBufferData schemabuf;
PQExpBufferData namebuf;
int encoding = PQclientEncoding(conn);
bool inquotes;
const char *cp;
int i;
bool added_clause = false;
#define WHEREAND() \
(appendPQExpBufferStr(buf, have_where ? " AND " : "WHERE "), \
have_where = true, added_clause = true)
if (pattern == NULL)
{
/* Default: select all visible objects */
if (visibilityrule)
{
WHEREAND();
appendPQExpBuffer(buf, "%s\n", visibilityrule);
}
return added_clause;
}
initPQExpBuffer(&schemabuf);
initPQExpBuffer(&namebuf);
/*
* Parse the pattern, converting quotes and lower-casing unquoted letters.
* Also, adjust shell-style wildcard characters into regexp notation.
*
* We surround the pattern with "^(...)$" to force it to match the whole
* string, as per SQL practice. We have to have parens in case the string
* contains "|", else the "^" and "$" will be bound into the first and
* last alternatives which is not what we want.
*
* Note: the result of this pass is the actual regexp pattern(s) we want
* to execute. Quoting/escaping into SQL literal format will be done
* below using appendStringLiteralConn().
*/
appendPQExpBufferStr(&namebuf, "^(");
inquotes = false;
cp = pattern;
while (*cp)
{
char ch = *cp;
if (ch == '"')
{
if (inquotes && cp[1] == '"')
{
/* emit one quote, stay in inquotes mode */
appendPQExpBufferChar(&namebuf, '"');
cp++;
}
else
inquotes = !inquotes;
cp++;
}
else if (!inquotes && isupper((unsigned char) ch))
{
appendPQExpBufferChar(&namebuf,
pg_tolower((unsigned char) ch));
cp++;
}
else if (!inquotes && ch == '*')
{
appendPQExpBufferStr(&namebuf, ".*");
cp++;
}
else if (!inquotes && ch == '?')
{
appendPQExpBufferChar(&namebuf, '.');
cp++;
}
else if (!inquotes && ch == '.')
{
/* Found schema/name separator, move current pattern to schema */
resetPQExpBuffer(&schemabuf);
appendPQExpBufferStr(&schemabuf, namebuf.data);
resetPQExpBuffer(&namebuf);
appendPQExpBufferStr(&namebuf, "^(");
cp++;
}
else if (ch == '$')
{
/*
* Dollar is always quoted, whether inside quotes or not. The
* reason is that it's allowed in SQL identifiers, so there's a
* significant use-case for treating it literally, while because
* we anchor the pattern automatically there is no use-case for
* having it possess its regexp meaning.
*/
appendPQExpBufferStr(&namebuf, "\\$");
cp++;
}
else
{
/*
* Ordinary data character, transfer to pattern
*
* Inside double quotes, or at all times if force_escape is true,
* quote regexp special characters with a backslash to avoid
* regexp errors. Outside quotes, however, let them pass through
* as-is; this lets knowledgeable users build regexp expressions
* that are more powerful than shell-style patterns.
*/
if ((inquotes || force_escape) &&
strchr("|*+?()[]{}.^$\\", ch))
appendPQExpBufferChar(&namebuf, '\\');
i = PQmblen(cp, encoding);
while (i-- && *cp)
{
appendPQExpBufferChar(&namebuf, *cp);
cp++;
}
}
}
/*
* Now decide what we need to emit. Note there will be a leading "^(" in
* the patterns in any case.
*/
if (namebuf.len > 2)
{
/* We have a name pattern, so constrain the namevar(s) */
appendPQExpBufferStr(&namebuf, ")$");
/* Optimize away a "*" pattern */
if (strcmp(namebuf.data, "^(.*)$") != 0)
{
WHEREAND();
if (altnamevar)
{
appendPQExpBuffer(buf, "(%s ~ ", namevar);
appendStringLiteralConn(buf, namebuf.data, conn);
appendPQExpBuffer(buf, "\n OR %s ~ ", altnamevar);
appendStringLiteralConn(buf, namebuf.data, conn);
appendPQExpBufferStr(buf, ")\n");
}
else
{
appendPQExpBuffer(buf, "%s ~ ", namevar);
appendStringLiteralConn(buf, namebuf.data, conn);
appendPQExpBufferChar(buf, '\n');
}
}
}
if (schemabuf.len > 2)
{
/* We have a schema pattern, so constrain the schemavar */
appendPQExpBufferStr(&schemabuf, ")$");
/* Optimize away a "*" pattern */
if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar)
{
WHEREAND();
appendPQExpBuffer(buf, "%s ~ ", schemavar);
appendStringLiteralConn(buf, schemabuf.data, conn);
appendPQExpBufferChar(buf, '\n');
}
}
else
{
/* No schema pattern given, so select only visible objects */
if (visibilityrule)
{
WHEREAND();
appendPQExpBuffer(buf, "%s\n", visibilityrule);
}
}
termPQExpBuffer(&schemabuf);
termPQExpBuffer(&namebuf);
return added_clause;
#undef WHEREAND
}