2000-01-19 00:30:24 +01:00
|
|
|
/*
|
|
|
|
* psql - the PostgreSQL interactive terminal
|
|
|
|
*
|
2011-01-01 19:18:15 +01:00
|
|
|
* Copyright (c) 2000-2011, PostgreSQL Global Development Group
|
2000-01-19 00:30:24 +01:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/bin/psql/stringutils.c
|
2000-01-19 00:30:24 +01:00
|
|
|
*/
|
2001-02-10 03:31:31 +01:00
|
|
|
#include "postgres_fe.h"
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
#include <ctype.h>
|
1996-11-26 04:20:35 +01:00
|
|
|
|
2003-12-01 23:14:40 +01:00
|
|
|
#include "common.h"
|
2002-10-19 02:22:14 +02:00
|
|
|
#include "stringutils.h"
|
1999-11-04 22:56:02 +01:00
|
|
|
|
1996-11-26 04:20:35 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
static void strip_quotes(char *source, char quote, char escape, int encoding);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
1996-11-14 17:08:05 +01:00
|
|
|
|
1999-11-04 22:56:02 +01:00
|
|
|
/*
|
|
|
|
* Replacement for strtok() (a.k.a. poor man's flex)
|
|
|
|
*
|
2002-10-19 02:22:14 +02:00
|
|
|
* Splits a string into tokens, returning one token per call, then NULL
|
|
|
|
* when no more tokens exist in the given string.
|
|
|
|
*
|
|
|
|
* The calling convention is similar to that of strtok, but with more
|
|
|
|
* frammishes.
|
|
|
|
*
|
1999-11-05 00:14:30 +01:00
|
|
|
* s - string to parse, if NULL continue parsing the last string
|
2003-08-04 02:43:34 +02:00
|
|
|
* whitespace - set of whitespace characters that separate tokens
|
2002-10-19 02:22:14 +02:00
|
|
|
* delim - set of non-whitespace separator characters (or NULL)
|
|
|
|
* quote - set of characters that can quote a token (NULL if none)
|
|
|
|
* escape - character that can quote quotes (0 if none)
|
2006-06-01 03:28:00 +02:00
|
|
|
* e_strings - if TRUE, treat E'...' syntax as a valid token
|
2006-10-04 02:30:14 +02:00
|
|
|
* del_quotes - if TRUE, strip quotes from the returned token, else return
|
2002-10-19 02:22:14 +02:00
|
|
|
* it exactly as found in the string
|
|
|
|
* encoding - the active character-set encoding
|
|
|
|
*
|
|
|
|
* Characters in 'delim', if any, will be returned as single-character
|
|
|
|
* tokens unless part of a quoted token.
|
|
|
|
*
|
2003-03-10 23:28:22 +01:00
|
|
|
* Double occurrences of the quoting character are always taken to represent
|
2002-10-19 02:22:14 +02:00
|
|
|
* a single quote character in the data. If escape isn't 0, then escape
|
|
|
|
* followed by anything (except \0) is a data character too.
|
1999-11-04 22:56:02 +01:00
|
|
|
*
|
2006-06-01 03:28:00 +02:00
|
|
|
* The combination of e_strings and del_quotes both TRUE is not currently
|
|
|
|
* handled. This could be fixed but it's not needed anywhere at the moment.
|
|
|
|
*
|
1999-11-04 22:56:02 +01:00
|
|
|
* Note that the string s is _not_ overwritten in this implementation.
|
2002-10-19 02:22:14 +02:00
|
|
|
*
|
|
|
|
* NB: it's okay to vary delim, quote, and escape from one call to the
|
|
|
|
* next on a single source string, but changing whitespace is a bad idea
|
|
|
|
* since you might lose data.
|
1999-11-04 22:56:02 +01:00
|
|
|
*/
|
1999-11-05 00:14:30 +01:00
|
|
|
char *
|
|
|
|
strtokx(const char *s,
|
2002-10-19 02:22:14 +02:00
|
|
|
const char *whitespace,
|
1999-11-05 00:14:30 +01:00
|
|
|
const char *delim,
|
|
|
|
const char *quote,
|
2002-10-19 02:22:14 +02:00
|
|
|
char escape,
|
2006-06-01 03:28:00 +02:00
|
|
|
bool e_strings,
|
2002-10-19 02:22:14 +02:00
|
|
|
bool del_quotes,
|
2000-01-15 06:38:50 +01:00
|
|
|
int encoding)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2005-10-15 04:49:52 +02:00
|
|
|
static char *storage = NULL;/* store the local copy of the users string
|
|
|
|
* here */
|
|
|
|
static char *string = NULL; /* pointer into storage where to continue on
|
|
|
|
* next call */
|
1999-11-05 00:14:30 +01:00
|
|
|
|
|
|
|
/* variously abused variables: */
|
|
|
|
unsigned int offset;
|
|
|
|
char *start;
|
2002-10-19 02:22:14 +02:00
|
|
|
char *p;
|
1999-11-05 00:14:30 +01:00
|
|
|
|
|
|
|
if (s)
|
|
|
|
{
|
|
|
|
free(storage);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/*
|
|
|
|
* We may need extra space to insert delimiter nulls for adjacent
|
2005-10-15 04:49:52 +02:00
|
|
|
* tokens. 2X the space is a gross overestimate, but it's unlikely
|
|
|
|
* that this code will be used on huge strings anyway.
|
2002-10-19 02:22:14 +02:00
|
|
|
*/
|
2004-01-25 04:07:22 +01:00
|
|
|
storage = pg_malloc(2 * strlen(s) + 1);
|
2002-10-19 02:22:14 +02:00
|
|
|
strcpy(storage, s);
|
1999-11-05 00:14:30 +01:00
|
|
|
string = storage;
|
|
|
|
}
|
1999-11-04 22:56:02 +01:00
|
|
|
|
1999-11-05 00:14:30 +01:00
|
|
|
if (!storage)
|
|
|
|
return NULL;
|
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/* skip leading whitespace */
|
|
|
|
offset = strspn(string, whitespace);
|
|
|
|
start = &string[offset];
|
1999-11-05 00:14:30 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/* end of string reached? */
|
|
|
|
if (*start == '\0')
|
1999-11-05 00:14:30 +01:00
|
|
|
{
|
|
|
|
/* technically we don't need to free here, but we're nice */
|
|
|
|
free(storage);
|
|
|
|
storage = NULL;
|
|
|
|
string = NULL;
|
|
|
|
return NULL;
|
|
|
|
}
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/* test if delimiter character */
|
|
|
|
if (delim && strchr(delim, *start))
|
1999-11-05 00:14:30 +01:00
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If not at end of string, we need to insert a null to terminate the
|
|
|
|
* returned token. We can just overwrite the next character if it
|
|
|
|
* happens to be in the whitespace set ... otherwise move over the
|
|
|
|
* rest of the string to make room. (This is why we allocated extra
|
|
|
|
* space above).
|
2002-10-19 02:22:14 +02:00
|
|
|
*/
|
|
|
|
p = start + 1;
|
|
|
|
if (*p != '\0')
|
|
|
|
{
|
|
|
|
if (!strchr(whitespace, *p))
|
|
|
|
memmove(p + 1, p, strlen(p) + 1);
|
|
|
|
*p = '\0';
|
|
|
|
string = p + 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* at end of string, so no extra work */
|
|
|
|
string = p;
|
|
|
|
}
|
1999-11-05 00:14:30 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
return start;
|
|
|
|
}
|
1999-11-05 00:14:30 +01:00
|
|
|
|
2006-06-01 03:28:00 +02:00
|
|
|
/* check for E string */
|
|
|
|
p = start;
|
|
|
|
if (e_strings &&
|
|
|
|
(*p == 'E' || *p == 'e') &&
|
|
|
|
p[1] == '\'')
|
|
|
|
{
|
|
|
|
quote = "'";
|
|
|
|
escape = '\\'; /* if std strings before, not any more */
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/* test if quoting character */
|
2006-06-01 03:28:00 +02:00
|
|
|
if (quote && strchr(quote, *p))
|
2002-10-19 02:22:14 +02:00
|
|
|
{
|
|
|
|
/* okay, we have a quoted token, now scan for the closer */
|
2006-06-01 03:28:00 +02:00
|
|
|
char thisquote = *p++;
|
1999-11-05 00:14:30 +01:00
|
|
|
|
2006-06-01 03:28:00 +02:00
|
|
|
for (; *p; p += PQmblen(p, encoding))
|
2002-10-19 02:22:14 +02:00
|
|
|
{
|
|
|
|
if (*p == escape && p[1] != '\0')
|
|
|
|
p++; /* process escaped anything */
|
|
|
|
else if (*p == thisquote && p[1] == thisquote)
|
|
|
|
p++; /* process doubled quote */
|
|
|
|
else if (*p == thisquote)
|
|
|
|
{
|
|
|
|
p++; /* skip trailing quote */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
1999-11-05 00:14:30 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If not at end of string, we need to insert a null to terminate the
|
|
|
|
* returned token. See notes above.
|
2002-10-19 02:22:14 +02:00
|
|
|
*/
|
1999-11-05 00:14:30 +01:00
|
|
|
if (*p != '\0')
|
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
if (!strchr(whitespace, *p))
|
|
|
|
memmove(p + 1, p, strlen(p) + 1);
|
1999-11-05 00:14:30 +01:00
|
|
|
*p = '\0';
|
|
|
|
string = p + 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
/* at end of string, so no extra work */
|
1999-11-05 00:14:30 +01:00
|
|
|
string = p;
|
|
|
|
}
|
2002-10-19 02:22:14 +02:00
|
|
|
|
|
|
|
/* Clean up the token if caller wants that */
|
|
|
|
if (del_quotes)
|
|
|
|
strip_quotes(start, thisquote, escape, encoding);
|
|
|
|
|
|
|
|
return start;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Otherwise no quoting character. Scan till next whitespace, delimiter
|
|
|
|
* or quote. NB: at this point, *start is known not to be '\0',
|
|
|
|
* whitespace, delim, or quote, so we will consume at least one character.
|
2002-10-19 02:22:14 +02:00
|
|
|
*/
|
|
|
|
offset = strcspn(start, whitespace);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
if (delim)
|
|
|
|
{
|
|
|
|
unsigned int offset2 = strcspn(start, delim);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
if (offset > offset2)
|
|
|
|
offset = offset2;
|
|
|
|
}
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
if (quote)
|
1999-11-05 00:14:30 +01:00
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
unsigned int offset2 = strcspn(start, quote);
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
if (offset > offset2)
|
|
|
|
offset = offset2;
|
|
|
|
}
|
|
|
|
|
|
|
|
p = start + offset;
|
|
|
|
|
|
|
|
/*
|
2003-08-04 02:43:34 +02:00
|
|
|
* If not at end of string, we need to insert a null to terminate the
|
|
|
|
* returned token. See notes above.
|
2002-10-19 02:22:14 +02:00
|
|
|
*/
|
|
|
|
if (*p != '\0')
|
|
|
|
{
|
|
|
|
if (!strchr(whitespace, *p))
|
|
|
|
memmove(p + 1, p, strlen(p) + 1);
|
|
|
|
*p = '\0';
|
|
|
|
string = p + 1;
|
1999-11-05 00:14:30 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
/* at end of string, so no extra work */
|
|
|
|
string = p;
|
1999-11-05 00:14:30 +01:00
|
|
|
}
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
return start;
|
|
|
|
}
|
1999-11-04 22:56:02 +01:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
2002-10-19 02:22:14 +02:00
|
|
|
* strip_quotes
|
1999-11-04 22:56:02 +01:00
|
|
|
*
|
2002-10-19 02:22:14 +02:00
|
|
|
* Remove quotes from the string at *source. Leading and trailing occurrences
|
|
|
|
* of 'quote' are removed; embedded double occurrences of 'quote' are reduced
|
|
|
|
* to single occurrences; if 'escape' is not 0 then 'escape' removes special
|
|
|
|
* significance of next character.
|
|
|
|
*
|
|
|
|
* Note that the source string is overwritten in-place.
|
1999-11-04 22:56:02 +01:00
|
|
|
*/
|
|
|
|
static void
|
2002-10-19 02:22:14 +02:00
|
|
|
strip_quotes(char *source, char quote, char escape, int encoding)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
char *src;
|
|
|
|
char *dst;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2003-12-01 23:14:40 +01:00
|
|
|
psql_assert(source);
|
|
|
|
psql_assert(quote);
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
src = dst = source;
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
if (*src && *src == quote)
|
|
|
|
src++; /* skip leading quote */
|
1999-11-04 22:56:02 +01:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
while (*src)
|
1999-11-05 00:14:30 +01:00
|
|
|
{
|
2002-10-19 02:22:14 +02:00
|
|
|
char c = *src;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (c == quote && src[1] == '\0')
|
|
|
|
break; /* skip trailing quote */
|
|
|
|
else if (c == quote && src[1] == quote)
|
|
|
|
src++; /* process doubled quote */
|
|
|
|
else if (c == escape && src[1] != '\0')
|
|
|
|
src++; /* process escaped character */
|
|
|
|
|
|
|
|
i = PQmblen(src, encoding);
|
|
|
|
while (i--)
|
|
|
|
*dst++ = *src++;
|
1999-11-05 00:14:30 +01:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2002-10-19 02:22:14 +02:00
|
|
|
*dst = '\0';
|
1999-11-04 22:56:02 +01:00
|
|
|
}
|