Split copy.c into four files.

Copy.c has grown really large. Split it into more manageable parts:

- copy.c now contains only a few functions that are common to COPY FROM
  and COPY TO.

- copyto.c contains code for COPY TO.

- copyfrom.c contains code for initializing COPY FROM, and inserting the
  tuples to the correct table.

- copyfromparse.c contains code for reading from the client/file/program,
  and parsing the input text/CSV/binary format into tuples.

All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.

The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.

Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
This commit is contained in:
Heikki Linnakangas 2020-11-23 10:50:50 +02:00
parent 17958972fe
commit c532d15ddd
9 changed files with 4829 additions and 4517 deletions

View File

@ -105,7 +105,7 @@ typedef struct FileFdwExecutionState
bool is_program; /* true if filename represents an OS command */
List *options; /* merged COPY options, excluding filename and
* is_program */
CopyState cstate; /* COPY execution state */
CopyFromState cstate; /* COPY execution state */
} FileFdwExecutionState;
/*
@ -655,7 +655,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
char *filename;
bool is_program;
List *options;
CopyState cstate;
CopyFromState cstate;
FileFdwExecutionState *festate;
/*
@ -677,6 +677,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
*/
cstate = BeginCopyFrom(NULL,
node->ss.ss_currentRelation,
NULL,
filename,
is_program,
NULL,
@ -752,6 +753,7 @@ fileReScanForeignScan(ForeignScanState *node)
festate->cstate = BeginCopyFrom(NULL,
node->ss.ss_currentRelation,
NULL,
festate->filename,
festate->is_program,
NULL,
@ -1107,7 +1109,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
char *filename;
bool is_program;
List *options;
CopyState cstate;
CopyFromState cstate;
ErrorContextCallback errcallback;
MemoryContext oldcontext = CurrentMemoryContext;
MemoryContext tupcontext;
@ -1125,7 +1127,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
/*
* Create CopyState from FDW options.
*/
cstate = BeginCopyFrom(NULL, onerel, filename, is_program, NULL, NIL,
cstate = BeginCopyFrom(NULL, onerel, NULL, filename, is_program, NULL, NIL,
options);
/*

View File

@ -24,6 +24,9 @@ OBJS = \
constraint.o \
conversioncmds.o \
copy.o \
copyfrom.o \
copyfromparse.o \
copyto.o \
createas.o \
dbcommands.o \
define.o \

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -749,7 +749,7 @@ copy_table(Relation rel)
LogicalRepRelation lrel;
WalRcvExecResult *res;
StringInfoData cmd;
CopyState cstate;
CopyFromState cstate;
List *attnamelist;
ParseState *pstate;
@ -800,7 +800,7 @@ copy_table(Relation rel)
NULL, false, false);
attnamelist = make_copy_attnamelist(relmapentry);
cstate = BeginCopyFrom(pstate, rel, NULL, false, copy_read_data, attnamelist, NIL);
cstate = BeginCopyFrom(pstate, rel, NULL, NULL, false, copy_read_data, attnamelist, NIL);
/* Do the copy */
(void) CopyFrom(cstate);

View File

@ -19,26 +19,71 @@
#include "parser/parse_node.h"
#include "tcop/dest.h"
/* CopyStateData is private in commands/copy.c */
typedef struct CopyStateData *CopyState;
/*
* A struct to hold COPY options, in a parsed form. All of these are related
* to formatting, except for 'freeze', which doesn't really belong here, but
* it's expedient to parse it along with all the other options.
*/
typedef struct CopyFormatOptions
{
/* parameters from the COPY command */
int file_encoding; /* file or remote side's character encoding,
* -1 if not specified */
bool binary; /* binary format? */
bool freeze; /* freeze rows on loading? */
bool csv_mode; /* Comma Separated Value format? */
bool header_line; /* CSV header line? */
char *null_print; /* NULL marker string (server encoding!) */
int null_print_len; /* length of same */
char *null_print_client; /* same converted to file encoding */
char *delim; /* column delimiter (must be 1 byte) */
char *quote; /* CSV quote char (must be 1 byte) */
char *escape; /* CSV escape char (must be 1 byte) */
List *force_quote; /* list of column names */
bool force_quote_all; /* FORCE_QUOTE *? */
bool *force_quote_flags; /* per-column CSV FQ flags */
List *force_notnull; /* list of column names */
bool *force_notnull_flags; /* per-column CSV FNN flags */
List *force_null; /* list of column names */
bool *force_null_flags; /* per-column CSV FN flags */
bool convert_selectively; /* do selective binary conversion? */
List *convert_select; /* list of column names (can be NIL) */
} CopyFormatOptions;
/* These are private in commands/copy[from|to].c */
typedef struct CopyFromStateData *CopyFromState;
typedef struct CopyToStateData *CopyToState;
typedef int (*copy_data_source_cb) (void *outbuf, int minread, int maxread);
extern void DoCopy(ParseState *state, const CopyStmt *stmt,
int stmt_location, int stmt_len,
uint64 *processed);
extern void ProcessCopyOptions(ParseState *pstate, CopyState cstate, bool is_from, List *options);
extern CopyState BeginCopyFrom(ParseState *pstate, Relation rel, const char *filename,
extern void ProcessCopyOptions(ParseState *pstate, CopyFormatOptions *ops_out, bool is_from, List *options);
extern CopyFromState BeginCopyFrom(ParseState *pstate, Relation rel, Node *whereClause,
const char *filename,
bool is_program, copy_data_source_cb data_source_cb, List *attnamelist, List *options);
extern void EndCopyFrom(CopyState cstate);
extern bool NextCopyFrom(CopyState cstate, ExprContext *econtext,
extern void EndCopyFrom(CopyFromState cstate);
extern bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
Datum *values, bool *nulls);
extern bool NextCopyFromRawFields(CopyState cstate,
extern bool NextCopyFromRawFields(CopyFromState cstate,
char ***fields, int *nfields);
extern void CopyFromErrorCallback(void *arg);
extern uint64 CopyFrom(CopyState cstate);
extern uint64 CopyFrom(CopyFromState cstate);
extern DestReceiver *CreateCopyDestReceiver(void);
/*
* internal prototypes
*/
extern CopyToState BeginCopyTo(ParseState *pstate, Relation rel, RawStmt *query,
Oid queryRelId, const char *filename, bool is_program,
List *attnamelist, List *options);
extern void EndCopyTo(CopyToState cstate);
extern uint64 DoCopyTo(CopyToState cstate);
extern List *CopyGetAttnums(TupleDesc tupDesc, Relation rel,
List *attnamelist);
#endif /* COPY_H */

View File

@ -0,0 +1,164 @@
/*-------------------------------------------------------------------------
*
* copyfrom_internal.h
* Internal definitions for COPY FROM command.
*
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/commands/copyfrom_internal.h
*
*-------------------------------------------------------------------------
*/
#ifndef COPYFROM_INTERNAL_H
#define COPYFROM_INTERNAL_H
#include "commands/copy.h"
#include "commands/trigger.h"
/*
* Represents the different source cases we need to worry about at
* the bottom level
*/
typedef enum CopySource
{
COPY_FILE, /* from file (or a piped program) */
COPY_OLD_FE, /* from frontend (2.0 protocol) */
COPY_NEW_FE, /* from frontend (3.0 protocol) */
COPY_CALLBACK /* from callback function */
} CopySource;
/*
* Represents the end-of-line terminator type of the input
*/
typedef enum EolType
{
EOL_UNKNOWN,
EOL_NL,
EOL_CR,
EOL_CRNL
} EolType;
/*
* Represents the heap insert method to be used during COPY FROM.
*/
typedef enum CopyInsertMethod
{
CIM_SINGLE, /* use table_tuple_insert or fdw routine */
CIM_MULTI, /* always use table_multi_insert */
CIM_MULTI_CONDITIONAL /* use table_multi_insert only if valid */
} CopyInsertMethod;
/*
* This struct contains all the state variables used throughout a COPY FROM
* operation.
*
* Multi-byte encodings: all supported client-side encodings encode multi-byte
* characters by having the first byte's high bit set. Subsequent bytes of the
* character can have the high bit not set. When scanning data in such an
* encoding to look for a match to a single-byte (ie ASCII) character, we must
* use the full pg_encoding_mblen() machinery to skip over multibyte
* characters, else we might find a false match to a trailing byte. In
* supported server encodings, there is no possibility of a false match, and
* it's faster to make useless comparisons to trailing bytes than it is to
* invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
* when we have to do it the hard way.
*/
typedef struct CopyFromStateData
{
/* low-level state data */
CopySource copy_src; /* type of copy source */
FILE *copy_file; /* used if copy_src == COPY_FILE */
StringInfo fe_msgbuf; /* used if copy_src == COPY_NEW_FE */
bool reached_eof; /* true if we read to end of copy data (not
* all copy_src types maintain this) */
EolType eol_type; /* EOL type of input */
int file_encoding; /* file or remote side's character encoding */
bool need_transcoding; /* file encoding diff from server? */
bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
/* parameters from the COPY command */
Relation rel; /* relation to copy from */
List *attnumlist; /* integer list of attnums to copy */
char *filename; /* filename, or NULL for STDIN */
bool is_program; /* is 'filename' a program to popen? */
copy_data_source_cb data_source_cb; /* function for reading data */
CopyFormatOptions opts;
bool *convert_select_flags; /* per-column CSV/TEXT CS flags */
Node *whereClause; /* WHERE condition (or NULL) */
/* these are just for error messages, see CopyFromErrorCallback */
const char *cur_relname; /* table name for error messages */
uint64 cur_lineno; /* line number for error messages */
const char *cur_attname; /* current att for error messages */
const char *cur_attval; /* current att value for error messages */
/*
* Working state
*/
MemoryContext copycontext; /* per-copy execution context */
AttrNumber num_defaults;
FmgrInfo *in_functions; /* array of input functions for each attrs */
Oid *typioparams; /* array of element types for in_functions */
int *defmap; /* array of default att numbers */
ExprState **defexprs; /* array of default att expressions */
bool volatile_defexprs; /* is any of defexprs volatile? */
List *range_table;
ExprState *qualexpr;
TransitionCaptureState *transition_capture;
/*
* These variables are used to reduce overhead in COPY FROM.
*
* attribute_buf holds the separated, de-escaped text for each field of
* the current line. The CopyReadAttributes functions return arrays of
* pointers into this buffer. We avoid palloc/pfree overhead by re-using
* the buffer on each cycle.
*
* In binary COPY FROM, attribute_buf holds the binary data for the
* current field, but the usage is otherwise similar.
*/
StringInfoData attribute_buf;
/* field raw data pointers found by COPY FROM */
int max_fields;
char **raw_fields;
/*
* Similarly, line_buf holds the whole input line being processed. The
* input cycle is first to read the whole line into line_buf, convert it
* to server encoding there, and then extract the individual attribute
* fields into attribute_buf. line_buf is preserved unmodified so that we
* can display it in error messages if appropriate. (In binary mode,
* line_buf is not used.)
*/
StringInfoData line_buf;
bool line_buf_converted; /* converted to server encoding? */
bool line_buf_valid; /* contains the row being processed? */
/*
* Finally, raw_buf holds raw data read from the data source (file or
* client connection). In text mode, CopyReadLine parses this data
* sufficiently to locate line boundaries, then transfers the data to
* line_buf and converts it. In binary mode, CopyReadBinaryData fetches
* appropriate amounts of data from this buffer. In both modes, we
* guarantee that there is a \0 at raw_buf[raw_buf_len].
*/
#define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */
char *raw_buf;
int raw_buf_index; /* next byte to process */
int raw_buf_len; /* total # of bytes stored */
/* Shorthand for number of unconsumed bytes available in raw_buf */
#define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index)
} CopyFromStateData;
extern void ReceiveCopyBegin(CopyFromState cstate);
extern void ReceiveCopyBinaryHeader(CopyFromState cstate);
#endif /* COPYFROM_INTERNAL_H */