diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 44da71c4cb..db7d24a511 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -187,15 +187,15 @@ typedef struct CopyStateData TransitionCaptureState *transition_capture; /* - * These variables are used to reduce overhead in textual COPY FROM. + * These variables are used to reduce overhead in COPY FROM. * * attribute_buf holds the separated, de-escaped text for each field of * the current line. The CopyReadAttributes functions return arrays of * pointers into this buffer. We avoid palloc/pfree overhead by re-using * the buffer on each cycle. * - * (In binary COPY FROM, attribute_buf holds the binary data for the - * current field, while the other variables are not used.) + * In binary COPY FROM, attribute_buf holds the binary data for the + * current field, but the usage is otherwise similar. */ StringInfoData attribute_buf; @@ -209,7 +209,8 @@ typedef struct CopyStateData * input cycle is first to read the whole line into line_buf, convert it * to server encoding there, and then extract the individual attribute * fields into attribute_buf. line_buf is preserved unmodified so that we - * can display it in error messages if appropriate. + * can display it in error messages if appropriate. (In binary mode, + * line_buf is not used.) */ StringInfoData line_buf; bool line_buf_converted; /* converted to server encoding? */ @@ -217,15 +218,18 @@ typedef struct CopyStateData /* * Finally, raw_buf holds raw data read from the data source (file or - * client connection). CopyReadLine parses this data sufficiently to - * locate line boundaries, then transfers the data to line_buf and - * converts it. Note: we guarantee that there is a \0 at - * raw_buf[raw_buf_len]. + * client connection). In text mode, CopyReadLine parses this data + * sufficiently to locate line boundaries, then transfers the data to + * line_buf and converts it. In binary mode, CopyReadBinaryData fetches + * appropriate amounts of data from this buffer. In both modes, we + * guarantee that there is a \0 at raw_buf[raw_buf_len]. */ #define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */ char *raw_buf; int raw_buf_index; /* next byte to process */ int raw_buf_len; /* total # of bytes stored */ + /* Shorthand for number of unconsumed bytes available in raw_buf */ +#define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index) } CopyStateData; /* DestReceiver for COPY (query) TO */ @@ -394,6 +398,8 @@ static void CopySendInt32(CopyState cstate, int32 val); static bool CopyGetInt32(CopyState cstate, int32 *val); static void CopySendInt16(CopyState cstate, int16 val); static bool CopyGetInt16(CopyState cstate, int16 *val); +static bool CopyLoadRawBuf(CopyState cstate); +static int CopyReadBinaryData(CopyState cstate, char *dest, int nbytes); /* @@ -723,7 +729,7 @@ CopyGetData(CopyState cstate, void *databuf, int minread, int maxread) /* * CopySendInt32 sends an int32 in network byte order */ -static void +static inline void CopySendInt32(CopyState cstate, int32 val) { uint32 buf; @@ -737,12 +743,12 @@ CopySendInt32(CopyState cstate, int32 val) * * Returns true if OK, false if EOF */ -static bool +static inline bool CopyGetInt32(CopyState cstate, int32 *val) { uint32 buf; - if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf)) + if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf)) { *val = 0; /* suppress compiler warning */ return false; @@ -754,7 +760,7 @@ CopyGetInt32(CopyState cstate, int32 *val) /* * CopySendInt16 sends an int16 in network byte order */ -static void +static inline void CopySendInt16(CopyState cstate, int16 val) { uint16 buf; @@ -766,12 +772,12 @@ CopySendInt16(CopyState cstate, int16 val) /* * CopyGetInt16 reads an int16 that appears in network byte order */ -static bool +static inline bool CopyGetInt16(CopyState cstate, int16 *val) { uint16 buf; - if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf)) + if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf)) { *val = 0; /* suppress compiler warning */ return false; @@ -786,26 +792,20 @@ CopyGetInt16(CopyState cstate, int16 *val) * * Returns true if able to obtain at least one more byte, else false. * - * If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred - * down to the start of the buffer and then we load more data after that. - * This case is used only when a frontend multibyte character crosses a - * bufferload boundary. + * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start + * of the buffer and then we load more data after that. This case occurs only + * when a multibyte character crosses a bufferload boundary. */ static bool CopyLoadRawBuf(CopyState cstate) { - int nbytes; + int nbytes = RAW_BUF_BYTES(cstate); int inbytes; - if (cstate->raw_buf_index < cstate->raw_buf_len) - { - /* Copy down the unprocessed data */ - nbytes = cstate->raw_buf_len - cstate->raw_buf_index; + /* Copy down the unprocessed data if any. */ + if (nbytes > 0) memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index, nbytes); - } - else - nbytes = 0; /* no data need be saved */ inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes, 1, RAW_BUF_SIZE - nbytes); @@ -816,6 +816,54 @@ CopyLoadRawBuf(CopyState cstate) return (inbytes > 0); } +/* + * CopyReadBinaryData + * + * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf + * and writes them to 'dest'. Returns the number of bytes read (which + * would be less than 'nbytes' only if we reach EOF). + */ +static int +CopyReadBinaryData(CopyState cstate, char *dest, int nbytes) +{ + int copied_bytes = 0; + + if (RAW_BUF_BYTES(cstate) >= nbytes) + { + /* Enough bytes are present in the buffer. */ + memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes); + cstate->raw_buf_index += nbytes; + copied_bytes = nbytes; + } + else + { + /* + * Not enough bytes in the buffer, so must read from the file. Need + * to loop since 'nbytes' could be larger than the buffer size. + */ + do + { + int copy_bytes; + + /* Load more data if buffer is empty. */ + if (RAW_BUF_BYTES(cstate) == 0) + { + if (!CopyLoadRawBuf(cstate)) + break; /* EOF */ + } + + /* Transfer some bytes. */ + copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate)); + memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes); + cstate->raw_buf_index += copy_bytes; + dest += copy_bytes; + copied_bytes += copy_bytes; + } while (copied_bytes < nbytes); + } + + return copied_bytes; +} + /* * DoCopy executes the SQL COPY statement @@ -3366,17 +3414,17 @@ BeginCopyFrom(ParseState *pstate, cstate->cur_attval = NULL; /* - * Set up variables to avoid per-attribute overhead. attribute_buf is - * used in both text and binary modes, but we use line_buf and raw_buf + * Set up variables to avoid per-attribute overhead. attribute_buf and + * raw_buf are used in both text and binary modes, but we use line_buf * only in text mode. */ initStringInfo(&cstate->attribute_buf); + cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1); + cstate->raw_buf_index = cstate->raw_buf_len = 0; if (!cstate->binary) { initStringInfo(&cstate->line_buf); cstate->line_buf_converted = false; - cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1); - cstate->raw_buf_index = cstate->raw_buf_len = 0; } /* Assign range table, we'll need it in CopyFrom. */ @@ -3527,7 +3575,7 @@ BeginCopyFrom(ParseState *pstate, int32 tmp; /* Signature */ - if (CopyGetData(cstate, readSig, 11, 11) != 11 || + if (CopyReadBinaryData(cstate, readSig, 11) != 11 || memcmp(readSig, BinarySignature, 11) != 0) ereport(ERROR, (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), @@ -3555,7 +3603,7 @@ BeginCopyFrom(ParseState *pstate, /* Skip extension header, if present */ while (tmp-- > 0) { - if (CopyGetData(cstate, readSig, 1, 1) != 1) + if (CopyReadBinaryData(cstate, readSig, 1) != 1) ereport(ERROR, (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), errmsg("invalid COPY file header (wrong length)"))); @@ -3771,7 +3819,7 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext, char dummy; if (cstate->copy_dest != COPY_OLD_FE && - CopyGetData(cstate, &dummy, 1, 1) > 0) + CopyReadBinaryData(cstate, &dummy, 1) > 0) ereport(ERROR, (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), errmsg("received copy data after EOF marker"))); @@ -4744,8 +4792,8 @@ CopyReadBinaryAttribute(CopyState cstate, FmgrInfo *flinfo, resetStringInfo(&cstate->attribute_buf); enlargeStringInfo(&cstate->attribute_buf, fld_size); - if (CopyGetData(cstate, cstate->attribute_buf.data, - fld_size, fld_size) != fld_size) + if (CopyReadBinaryData(cstate, cstate->attribute_buf.data, + fld_size) != fld_size) ereport(ERROR, (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), errmsg("unexpected EOF in COPY data")));