Reduce per-character overhead in COPY OUT by combining calls to

CopySendData.
This commit is contained in:
Tom Lane 2006-05-25 18:42:17 +00:00
parent c76cb77105
commit 0a5fdb0d91
1 changed files with 92 additions and 44 deletions

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.264 2006/05/21 20:05:19 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.265 2006/05/25 18:42:17 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -243,8 +243,8 @@ static Datum CopyReadBinaryAttribute(CopyState cstate,
int column_no, FmgrInfo *flinfo, int column_no, FmgrInfo *flinfo,
Oid typioparam, int32 typmod, Oid typioparam, int32 typmod,
bool *isnull); bool *isnull);
static void CopyAttributeOutText(CopyState cstate, char *server_string); static void CopyAttributeOutText(CopyState cstate, char *string);
static void CopyAttributeOutCSV(CopyState cstate, char *server_string, static void CopyAttributeOutCSV(CopyState cstate, char *string,
bool use_quote, bool single_attr); bool use_quote, bool single_attr);
static List *CopyGetAttnums(Relation rel, List *attnamelist); static List *CopyGetAttnums(Relation rel, List *attnamelist);
static char *limit_printout_length(const char *str); static char *limit_printout_length(const char *str);
@ -2884,91 +2884,123 @@ CopyReadBinaryAttribute(CopyState cstate,
/* /*
* Send text representation of one attribute, with conversion and escaping * Send text representation of one attribute, with conversion and escaping
*/ */
#define DUMPSOFAR() \
do { \
if (ptr > start) \
CopySendData(cstate, start, ptr - start); \
} while (0)
static void static void
CopyAttributeOutText(CopyState cstate, char *server_string) CopyAttributeOutText(CopyState cstate, char *string)
{ {
char *string; char *ptr;
char *start;
char c; char c;
char delimc = cstate->delim[0]; char delimc = cstate->delim[0];
int mblen;
if (cstate->need_transcoding) if (cstate->need_transcoding)
string = pg_server_to_client(server_string, strlen(server_string)); ptr = pg_server_to_client(string, strlen(string));
else else
string = server_string; ptr = string;
for (; (c = *string) != '\0'; string += mblen) /*
* We have to grovel through the string searching for control characters
* and instances of the delimiter character. In most cases, though, these
* are infrequent. To avoid overhead from calling CopySendData once per
* character, we dump out all characters between replaceable characters
* in a single call. The loop invariant is that the data from "start"
* to "ptr" can be sent literally, but hasn't yet been.
*/
start = ptr;
while ((c = *ptr) != '\0')
{ {
mblen = 1;
switch (c) switch (c)
{ {
case '\b': case '\b':
DUMPSOFAR();
CopySendString(cstate, "\\b"); CopySendString(cstate, "\\b");
start = ++ptr;
break; break;
case '\f': case '\f':
DUMPSOFAR();
CopySendString(cstate, "\\f"); CopySendString(cstate, "\\f");
start = ++ptr;
break; break;
case '\n': case '\n':
DUMPSOFAR();
CopySendString(cstate, "\\n"); CopySendString(cstate, "\\n");
start = ++ptr;
break; break;
case '\r': case '\r':
DUMPSOFAR();
CopySendString(cstate, "\\r"); CopySendString(cstate, "\\r");
start = ++ptr;
break; break;
case '\t': case '\t':
DUMPSOFAR();
CopySendString(cstate, "\\t"); CopySendString(cstate, "\\t");
start = ++ptr;
break; break;
case '\v': case '\v':
DUMPSOFAR();
CopySendString(cstate, "\\v"); CopySendString(cstate, "\\v");
start = ++ptr;
break; break;
case '\\': case '\\':
DUMPSOFAR();
CopySendString(cstate, "\\\\"); CopySendString(cstate, "\\\\");
start = ++ptr;
break; break;
default: default:
if (c == delimc) if (c == delimc)
{
DUMPSOFAR();
CopySendChar(cstate, '\\'); CopySendChar(cstate, '\\');
start = ptr; /* we include char in next run */
}
/* /*
* We can skip pg_encoding_mblen() overhead when encoding is * We can skip pg_encoding_mblen() overhead when encoding is
* safe, because in valid backend encodings, extra bytes of a * safe, because in valid backend encodings, extra bytes of a
* multibyte character never look like ASCII. * multibyte character never look like ASCII.
*/ */
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c)) if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
mblen = pg_encoding_mblen(cstate->client_encoding, string); ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
CopySendData(cstate, string, mblen); else
ptr++;
break; break;
} }
} }
DUMPSOFAR();
} }
/* /*
* Send CSV representation of one attribute, with conversion and * Send text representation of one attribute, with conversion and
* CSV type escaping * CSV-style escaping
*/ */
static void static void
CopyAttributeOutCSV(CopyState cstate, char *server_string, CopyAttributeOutCSV(CopyState cstate, char *string,
bool use_quote, bool single_attr) bool use_quote, bool single_attr)
{ {
char *string; char *ptr;
char *start;
char c; char c;
char delimc = cstate->delim[0]; char delimc = cstate->delim[0];
char quotec = cstate->quote[0]; char quotec = cstate->quote[0];
char escapec = cstate->escape[0]; char escapec = cstate->escape[0];
char *tstring;
int mblen;
/* force quoting if it matches null_print */ /* force quoting if it matches null_print (before conversion!) */
if (!use_quote && strcmp(server_string, cstate->null_print) == 0) if (!use_quote && strcmp(string, cstate->null_print) == 0)
use_quote = true; use_quote = true;
if (cstate->need_transcoding) if (cstate->need_transcoding)
string = pg_server_to_client(server_string, strlen(server_string)); ptr = pg_server_to_client(string, strlen(string));
else else
string = server_string; ptr = string;
/* /*
* have to run through the string twice, first time to see if it needs * Make a preliminary pass to discover if it needs quoting
* quoting, second to actually send it
*/ */
if (!use_quote) if (!use_quote)
{ {
@ -2977,41 +3009,57 @@ CopyAttributeOutCSV(CopyState cstate, char *server_string,
* alone on a line so it is not interpreted as the end-of-data * alone on a line so it is not interpreted as the end-of-data
* marker. * marker.
*/ */
if (single_attr && strcmp(string, "\\.") == 0) if (single_attr && strcmp(ptr, "\\.") == 0)
use_quote = true; use_quote = true;
else else
{ {
for (tstring = string; (c = *tstring) != '\0'; tstring += mblen) char *tptr = ptr;
while ((c = *tptr) != '\0')
{ {
if (c == delimc || c == quotec || c == '\n' || c == '\r') if (c == delimc || c == quotec || c == '\n' || c == '\r')
{ {
use_quote = true; use_quote = true;
break; break;
} }
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c)) if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
mblen = pg_encoding_mblen(cstate->client_encoding, tstring); tptr += pg_encoding_mblen(cstate->client_encoding, tptr);
else else
mblen = 1; tptr++;
} }
} }
} }
if (use_quote) if (use_quote)
CopySendChar(cstate, quotec);
for (; (c = *string) != '\0'; string += mblen)
{ {
if (use_quote && (c == quotec || c == escapec))
CopySendChar(cstate, escapec);
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
mblen = pg_encoding_mblen(cstate->client_encoding, string);
else
mblen = 1;
CopySendData(cstate, string, mblen);
}
if (use_quote)
CopySendChar(cstate, quotec); CopySendChar(cstate, quotec);
/*
* We adopt the same optimization strategy as in CopyAttributeOutText
*/
start = ptr;
while ((c = *ptr) != '\0')
{
if (c == quotec || c == escapec)
{
DUMPSOFAR();
CopySendChar(cstate, escapec);
start = ptr; /* we include char in next run */
}
if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
else
ptr++;
}
DUMPSOFAR();
CopySendChar(cstate, quotec);
}
else
{
/* If it doesn't need quoting, we can just dump it as-is */
CopySendString(cstate, ptr);
}
} }
/* /*