2000-01-19 00:30:24 +01:00
|
|
|
/*
|
|
|
|
* psql - the PostgreSQL interactive terminal
|
|
|
|
*
|
2020-01-01 18:21:45 +01:00
|
|
|
* Copyright (c) 2000-2020, PostgreSQL Global Development Group
|
2000-01-19 00:30:24 +01:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/bin/psql/stringutils.c
|
2000-01-19 00:30:24 +01:00
|
|
|
*/
|
2001-02-10 03:31:31 +01:00
|
|
|
#include "postgres_fe.h"
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
#include <ctype.h>
|
1996-11-26 04:20:35 +01:00
|
|
|
|
2003-12-01 23:14:40 +01:00
|
|
|
#include "common.h"
|
2002-10-19 02:22:14 +02:00
|
|
|
#include "stringutils.h"
|
1999-11-04 22:56:02 +01:00
|
|
|
|
1996-11-26 04:20:35 +01:00
|
|
|
|
1999-11-04 22:56:02 +01:00
|
|
|
/*
|
|
|
|
* Replacement for strtok() (a.k.a. poor man's flex)
|
|
|
|
*
|
2002-10-19 02:22:14 +02:00
|
|
|
* Splits a string into tokens, returning one token per call, then NULL
|
|
|
|
* when no more tokens exist in the given string.
|
|
|
|
*
|
|
|
|
* The calling convention is similar to that of strtok, but with more
|
|
|
|
* frammishes.
|
|
|
|
*
|
1999-11-05 00:14:30 +01:00
|
|
|
* s - string to parse, if NULL continue parsing the last string
|
2003-08-04 02:43:34 +02:00
|
|
|
* whitespace - set of whitespace characters that separate tokens
|
2002-10-19 02:22:14 +02:00
|
|
|
* delim - set of non-whitespace separator characters (or NULL)
|
|
|
|
* quote - set of characters that can quote a token (NULL if none)
|
|
|
|
* escape - character that can quote quotes (0 if none)
|
2017-08-16 06:22:32 +02:00
|
|
|
* e_strings - if true, treat E'...' syntax as a valid token
|
|
|
|
* del_quotes - if true, strip quotes from the returned token, else return
|
2002-10-19 02:22:14 +02:00
|
|
|
* it exactly as found in the string
|
|
|
|
* encoding - the active character-set encoding
|
|
|
|
*
|
|
|
|
* Characters in 'delim', if any, will be returned as single-character
|
|
|
|
* tokens unless part of a quoted token.
|
|
|
|
*
|
2003-03-10 23:28:22 +01:00
|
|
|
* Double occurrences of the quoting character are always taken to represent
|
2002-10-19 02:22:14 +02:00
|
|
|
* a single quote character in the data. If escape isn't 0, then escape
|
|
|
|
* followed by anything (except \0) is a data character too.
|
1999-11-04 22:56:02 +01:00
|
|
|
*
|
2017-08-16 06:22:32 +02:00
|
|
|
* The combination of e_strings and del_quotes both true is not currently
|
2006-06-01 03:28:00 +02:00
|
|
|
* handled. This could be fixed but it's not needed anywhere at the moment.
|
|
|
|
*
|
1999-11-04 22:56:02 +01:00
|
|
|
* Note that the string s is _not_ overwritten in this implementation.
|
2002-10-19 02:22:14 +02:00
|
|
|
*
|
|
|
|
* NB: it's okay to vary delim, quote, and escape from one call to the
|
|
|
|
* next on a single source string, but changing whitespace is a bad idea
|
|
|
|
* since you might lose data.
|
1999-11-04 22:56:02 +01:00
|
|
|
*/
|
1999-11-05 00:14:30 +01:00
|
|
|
char *
|
|
|
|
strtokx(const char *s,
|
2002-10-19 02:22:14 +02:00
|
|
|
const char *whitespace,
|
1999-11-05 00:14:30 +01:00
|
|
|
const char *delim,
|
|
|
|
const char *quote,
|
2002-10-19 02:22:14 +02:00
|
|
|
char escape,
|
2006-06-01 03:28:00 +02:00
|
|
|
bool e_strings,
|
2002-10-19 02:22:14 +02:00
|
|
|
bool del_quotes,
|
2000-01-15 06:38:50 +01:00
|
|
|
int encoding)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2017-06-21 20:39:04 +02:00
|
|
|
static char *storage = NULL; /* store the local copy of the users
|
|
|
|
* string here */
|
2005-10-15 04:49:52 +02:00
|
|
|
static char *string = NULL; /* pointer into storage where to continue on
|
|
|
|
* next call */
|
1999-11-05 00:14:30 +01:00
|
|
|
|
|
|
|
/* variously abused variables: */
|
|
|
|
unsigned int offset;
|
|
|
|
char *start;
|
2002-10-19 02:22:14 +02:00
|
|
|
char *p;
|
1999-11-05 00:14:30 +01:00
|
|
|
|
|
|
|
if (s)
|
|
|
|
{
|
|
|
|
free(storage);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/*
|
|
|
|
* We may need extra space to insert delimiter nulls for adjacent
|
2014-05-06 18:12:18 +02:00
|
|
|
* tokens. 2X the space is a gross overestimate, but it's unlikely
|
2005-10-15 04:49:52 +02:00
|
|
|
* that this code will be used on huge strings anyway.
|
2002-10-19 02:22:14 +02:00
|
|
|
*/
|
2004-01-25 04:07:22 +01:00
|
|
|
storage = pg_malloc(2 * strlen(s) + 1);
|
2002-10-19 02:22:14 +02:00
|
|
|
strcpy(storage, s);
|
1999-11-05 00:14:30 +01:00
|
|
|
string = storage;
|
|
|
|
}
|
1999-11-04 22:56:02 +01:00
|
|
|
|
1999-11-05 00:14:30 +01:00
|
|
|
if (!storage)
|
|
|
|
return NULL;
|
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/* skip leading whitespace */
|
|
|
|
offset = strspn(string, whitespace);
|
|
|
|
start = &string[offset];
|
1999-11-05 00:14:30 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/* end of string reached? */
|
|
|
|
if (*start == '\0')
|
1999-11-05 00:14:30 +01:00
|
|
|
{
|
|
|
|
/* technically we don't need to free here, but we're nice */
|
|
|
|
free(storage);
|
|
|
|
storage = NULL;
|
|
|
|
string = NULL;
|
|
|
|
return NULL;
|
|
|
|
}
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/* test if delimiter character */
|
|
|
|
if (delim && strchr(delim, *start))
|
1999-11-05 00:14:30 +01:00
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If not at end of string, we need to insert a null to terminate the
|
2014-05-06 18:12:18 +02:00
|
|
|
* returned token. We can just overwrite the next character if it
|
2005-10-15 04:49:52 +02:00
|
|
|
* happens to be in the whitespace set ... otherwise move over the
|
|
|
|
* rest of the string to make room. (This is why we allocated extra
|
|
|
|
* space above).
|
2002-10-19 02:22:14 +02:00
|
|
|
*/
|
|
|
|
p = start + 1;
|
|
|
|
if (*p != '\0')
|
|
|
|
{
|
|
|
|
if (!strchr(whitespace, *p))
|
|
|
|
memmove(p + 1, p, strlen(p) + 1);
|
|
|
|
*p = '\0';
|
|
|
|
string = p + 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* at end of string, so no extra work */
|
|
|
|
string = p;
|
|
|
|
}
|
1999-11-05 00:14:30 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
return start;
|
|
|
|
}
|
1999-11-05 00:14:30 +01:00
|
|
|
|
2006-06-01 03:28:00 +02:00
|
|
|
/* check for E string */
|
|
|
|
p = start;
|
|
|
|
if (e_strings &&
|
|
|
|
(*p == 'E' || *p == 'e') &&
|
|
|
|
p[1] == '\'')
|
|
|
|
{
|
|
|
|
quote = "'";
|
|
|
|
escape = '\\'; /* if std strings before, not any more */
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/* test if quoting character */
|
2006-06-01 03:28:00 +02:00
|
|
|
if (quote && strchr(quote, *p))
|
2002-10-19 02:22:14 +02:00
|
|
|
{
|
|
|
|
/* okay, we have a quoted token, now scan for the closer */
|
2006-06-01 03:28:00 +02:00
|
|
|
char thisquote = *p++;
|
1999-11-05 00:14:30 +01:00
|
|
|
|
2006-06-01 03:28:00 +02:00
|
|
|
for (; *p; p += PQmblen(p, encoding))
|
2002-10-19 02:22:14 +02:00
|
|
|
{
|
|
|
|
if (*p == escape && p[1] != '\0')
|
|
|
|
p++; /* process escaped anything */
|
|
|
|
else if (*p == thisquote && p[1] == thisquote)
|
|
|
|
p++; /* process doubled quote */
|
|
|
|
else if (*p == thisquote)
|
|
|
|
{
|
|
|
|
p++; /* skip trailing quote */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
1999-11-05 00:14:30 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If not at end of string, we need to insert a null to terminate the
|
2014-05-06 18:12:18 +02:00
|
|
|
* returned token. See notes above.
|
2002-10-19 02:22:14 +02:00
|
|
|
*/
|
1999-11-05 00:14:30 +01:00
|
|
|
if (*p != '\0')
|
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
if (!strchr(whitespace, *p))
|
|
|
|
memmove(p + 1, p, strlen(p) + 1);
|
1999-11-05 00:14:30 +01:00
|
|
|
*p = '\0';
|
|
|
|
string = p + 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
/* at end of string, so no extra work */
|
1999-11-05 00:14:30 +01:00
|
|
|
string = p;
|
|
|
|
}
|
2002-10-19 02:22:14 +02:00
|
|
|
|
|
|
|
/* Clean up the token if caller wants that */
|
|
|
|
if (del_quotes)
|
|
|
|
strip_quotes(start, thisquote, escape, encoding);
|
|
|
|
|
|
|
|
return start;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Otherwise no quoting character. Scan till next whitespace, delimiter
|
2005-10-15 04:49:52 +02:00
|
|
|
* or quote. NB: at this point, *start is known not to be '\0',
|
|
|
|
* whitespace, delim, or quote, so we will consume at least one character.
|
2002-10-19 02:22:14 +02:00
|
|
|
*/
|
|
|
|
offset = strcspn(start, whitespace);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
if (delim)
|
|
|
|
{
|
|
|
|
unsigned int offset2 = strcspn(start, delim);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
if (offset > offset2)
|
|
|
|
offset = offset2;
|
|
|
|
}
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
if (quote)
|
1999-11-05 00:14:30 +01:00
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
unsigned int offset2 = strcspn(start, quote);
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
if (offset > offset2)
|
|
|
|
offset = offset2;
|
|
|
|
}
|
|
|
|
|
|
|
|
p = start + offset;
|
|
|
|
|
|
|
|
/*
|
2003-08-04 02:43:34 +02:00
|
|
|
* If not at end of string, we need to insert a null to terminate the
|
2014-05-06 18:12:18 +02:00
|
|
|
* returned token. See notes above.
|
2002-10-19 02:22:14 +02:00
|
|
|
*/
|
|
|
|
if (*p != '\0')
|
|
|
|
{
|
|
|
|
if (!strchr(whitespace, *p))
|
|
|
|
memmove(p + 1, p, strlen(p) + 1);
|
|
|
|
*p = '\0';
|
|
|
|
string = p + 1;
|
1999-11-05 00:14:30 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
/* at end of string, so no extra work */
|
|
|
|
string = p;
|
1999-11-05 00:14:30 +01:00
|
|
|
}
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
return start;
|
|
|
|
}
|
1999-11-04 22:56:02 +01:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
2002-10-19 02:22:14 +02:00
|
|
|
* strip_quotes
|
1999-11-04 22:56:02 +01:00
|
|
|
*
|
2002-10-19 02:22:14 +02:00
|
|
|
* Remove quotes from the string at *source. Leading and trailing occurrences
|
|
|
|
* of 'quote' are removed; embedded double occurrences of 'quote' are reduced
|
|
|
|
* to single occurrences; if 'escape' is not 0 then 'escape' removes special
|
|
|
|
* significance of next character.
|
|
|
|
*
|
|
|
|
* Note that the source string is overwritten in-place.
|
1999-11-04 22:56:02 +01:00
|
|
|
*/
|
Add support for piping COPY to/from an external program.
This includes backend "COPY TO/FROM PROGRAM '...'" syntax, and corresponding
psql \copy syntax. Like with reading/writing files, the backend version is
superuser-only, and in the psql version, the program is run in the client.
In the passing, the psql \copy STDIN/STDOUT syntax is subtly changed: if you
the stdin/stdout is quoted, it's now interpreted as a filename. For example,
"\copy foo from 'stdin'" now reads from a file called 'stdin', not from
standard input. Before this, there was no way to specify a filename called
stdin, stdout, pstdin or pstdout.
This creates a new function in pgport, wait_result_to_str(), which can
be used to convert the exit status of a process, as returned by wait(3),
to a human-readable string.
Etsuro Fujita, reviewed by Amit Kapila.
2013-02-27 17:17:21 +01:00
|
|
|
void
|
2002-10-19 02:22:14 +02:00
|
|
|
strip_quotes(char *source, char quote, char escape, int encoding)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
char *src;
|
|
|
|
char *dst;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2012-12-15 00:03:07 +01:00
|
|
|
Assert(source != NULL);
|
|
|
|
Assert(quote != '\0');
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
src = dst = source;
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
if (*src && *src == quote)
|
|
|
|
src++; /* skip leading quote */
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
while (*src)
|
1999-11-05 00:14:30 +01:00
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
char c = *src;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (c == quote && src[1] == '\0')
|
|
|
|
break; /* skip trailing quote */
|
|
|
|
else if (c == quote && src[1] == quote)
|
|
|
|
src++; /* process doubled quote */
|
|
|
|
else if (c == escape && src[1] != '\0')
|
|
|
|
src++; /* process escaped character */
|
|
|
|
|
|
|
|
i = PQmblen(src, encoding);
|
|
|
|
while (i--)
|
|
|
|
*dst++ = *src++;
|
1999-11-05 00:14:30 +01:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
*dst = '\0';
|
1999-11-04 22:56:02 +01:00
|
|
|
}
|
2012-02-28 05:06:29 +01:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* quote_if_needed
|
|
|
|
*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Opposite of strip_quotes(). If "source" denotes itself literally without
|
2012-02-28 05:06:29 +01:00
|
|
|
* quoting or escaping, returns NULL. Otherwise, returns a malloc'd copy with
|
|
|
|
* quoting and escaping applied:
|
|
|
|
*
|
|
|
|
* source - string to parse
|
|
|
|
* entails_quote - any of these present? need outer quotes
|
|
|
|
* quote - doubled within string, affixed to both ends
|
|
|
|
* escape - doubled within string
|
|
|
|
* encoding - the active character-set encoding
|
|
|
|
*
|
|
|
|
* Do not use this as a substitute for PQescapeStringConn(). Use it for
|
|
|
|
* strings to be parsed by strtokx() or psql_scan_slash_option().
|
|
|
|
*/
|
|
|
|
char *
|
|
|
|
quote_if_needed(const char *source, const char *entails_quote,
|
|
|
|
char quote, char escape, int encoding)
|
|
|
|
{
|
|
|
|
const char *src;
|
|
|
|
char *ret;
|
|
|
|
char *dst;
|
|
|
|
bool need_quotes = false;
|
|
|
|
|
2012-12-15 00:03:07 +01:00
|
|
|
Assert(source != NULL);
|
|
|
|
Assert(quote != '\0');
|
2012-02-28 05:06:29 +01:00
|
|
|
|
|
|
|
src = source;
|
2012-06-10 21:20:04 +02:00
|
|
|
dst = ret = pg_malloc(2 * strlen(src) + 3); /* excess */
|
2012-02-28 05:06:29 +01:00
|
|
|
|
|
|
|
*dst++ = quote;
|
|
|
|
|
|
|
|
while (*src)
|
|
|
|
{
|
|
|
|
char c = *src;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (c == quote)
|
|
|
|
{
|
|
|
|
need_quotes = true;
|
|
|
|
*dst++ = quote;
|
|
|
|
}
|
|
|
|
else if (c == escape)
|
|
|
|
{
|
|
|
|
need_quotes = true;
|
|
|
|
*dst++ = escape;
|
|
|
|
}
|
|
|
|
else if (strchr(entails_quote, c))
|
|
|
|
need_quotes = true;
|
|
|
|
|
|
|
|
i = PQmblen(src, encoding);
|
|
|
|
while (i--)
|
|
|
|
*dst++ = *src++;
|
|
|
|
}
|
|
|
|
|
|
|
|
*dst++ = quote;
|
|
|
|
*dst = '\0';
|
|
|
|
|
|
|
|
if (!need_quotes)
|
|
|
|
{
|
|
|
|
free(ret);
|
|
|
|
ret = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|