From 7c5d0ae7078456bfeedb2103c45b9a32285c2631 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 20 Feb 2011 14:06:59 -0500 Subject: [PATCH] Add contrib/file_fdw foreign-data wrapper for reading files via COPY. This is both very useful in its own right, and an important test case for the core FDW support. This commit includes a small refactoring of copy.c to expose its option checking code as a separately callable function. The original patch submission duplicated hundreds of lines of that code, which seemed pretty unmaintainable. Shigeru Hanada, reviewed by Itagaki Takahiro and Tom Lane --- contrib/Makefile | 1 + contrib/README | 4 + contrib/file_fdw/.gitignore | 2 + contrib/file_fdw/Makefile | 21 + contrib/file_fdw/data/agg.bad | 4 + contrib/file_fdw/data/agg.csv | 4 + contrib/file_fdw/data/agg.data | 4 + contrib/file_fdw/expected/.gitignore | 1 + contrib/file_fdw/file_fdw--1.0.sql | 15 + contrib/file_fdw/file_fdw.c | 536 ++++++++++++++++++++++++ contrib/file_fdw/file_fdw.control | 5 + contrib/file_fdw/input/file_fdw.source | 131 ++++++ contrib/file_fdw/output/file_fdw.source | 225 ++++++++++ contrib/file_fdw/sql/.gitignore | 1 + doc/src/sgml/contrib.sgml | 1 + doc/src/sgml/file-fdw.sgml | 127 ++++++ doc/src/sgml/filelist.sgml | 1 + src/backend/commands/copy.c | 138 +++--- src/include/commands/copy.h | 5 +- 19 files changed, 1171 insertions(+), 55 deletions(-) create mode 100644 contrib/file_fdw/.gitignore create mode 100644 contrib/file_fdw/Makefile create mode 100644 contrib/file_fdw/data/agg.bad create mode 100644 contrib/file_fdw/data/agg.csv create mode 100644 contrib/file_fdw/data/agg.data create mode 100644 contrib/file_fdw/expected/.gitignore create mode 100644 contrib/file_fdw/file_fdw--1.0.sql create mode 100644 contrib/file_fdw/file_fdw.c create mode 100644 contrib/file_fdw/file_fdw.control create mode 100644 contrib/file_fdw/input/file_fdw.source create mode 100644 contrib/file_fdw/output/file_fdw.source create mode 100644 contrib/file_fdw/sql/.gitignore create mode 100644 doc/src/sgml/file-fdw.sgml diff --git a/contrib/Makefile b/contrib/Makefile index 2b314501f7..696776795e 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -18,6 +18,7 @@ SUBDIRS = \ dict_xsyn \ dummy_seclabel \ earthdistance \ + file_fdw \ fuzzystrmatch \ hstore \ intagg \ diff --git a/contrib/README b/contrib/README index 3c4e324271..9bd1b137e6 100644 --- a/contrib/README +++ b/contrib/README @@ -73,6 +73,10 @@ earthdistance - Functions for computing distances between two points on Earth by Bruno Wolff III and Hal Snyder +file_fdw + Foreign-data wrapper for server-side CSV/TEXT files + by Shigeru Hanada + fuzzystrmatch - Levenshtein, metaphone, and soundex fuzzy string matching by Joe Conway and Joel Burton diff --git a/contrib/file_fdw/.gitignore b/contrib/file_fdw/.gitignore new file mode 100644 index 0000000000..19b6c5ba42 --- /dev/null +++ b/contrib/file_fdw/.gitignore @@ -0,0 +1,2 @@ +# Generated subdirectories +/results/ diff --git a/contrib/file_fdw/Makefile b/contrib/file_fdw/Makefile new file mode 100644 index 0000000000..4616cec633 --- /dev/null +++ b/contrib/file_fdw/Makefile @@ -0,0 +1,21 @@ +# contrib/file_fdw/Makefile + +MODULES = file_fdw + +EXTENSION = file_fdw +DATA = file_fdw--1.0.sql + +REGRESS = file_fdw + +EXTRA_CLEAN = sql/file_fdw.sql expected/file_fdw.out + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/file_fdw +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/file_fdw/data/agg.bad b/contrib/file_fdw/data/agg.bad new file mode 100644 index 0000000000..3415b15007 --- /dev/null +++ b/contrib/file_fdw/data/agg.bad @@ -0,0 +1,4 @@ +56;@7.8@ +100;@99.097@ +0;@aaa@ +42;@324.78@ diff --git a/contrib/file_fdw/data/agg.csv b/contrib/file_fdw/data/agg.csv new file mode 100644 index 0000000000..3ee6bf24db --- /dev/null +++ b/contrib/file_fdw/data/agg.csv @@ -0,0 +1,4 @@ +56;@7.8@ +100;@99.097@ +0;@0.09561@ +42;@324.78@ diff --git a/contrib/file_fdw/data/agg.data b/contrib/file_fdw/data/agg.data new file mode 100644 index 0000000000..d92c7df4d0 --- /dev/null +++ b/contrib/file_fdw/data/agg.data @@ -0,0 +1,4 @@ +56 7.8 +100 99.097 +0 0.09561 +42 324.78 diff --git a/contrib/file_fdw/expected/.gitignore b/contrib/file_fdw/expected/.gitignore new file mode 100644 index 0000000000..a464ad144f --- /dev/null +++ b/contrib/file_fdw/expected/.gitignore @@ -0,0 +1 @@ +/file_fdw.out diff --git a/contrib/file_fdw/file_fdw--1.0.sql b/contrib/file_fdw/file_fdw--1.0.sql new file mode 100644 index 0000000000..0cca65f463 --- /dev/null +++ b/contrib/file_fdw/file_fdw--1.0.sql @@ -0,0 +1,15 @@ +/* contrib/file_fdw/file_fdw--1.0.sql */ + +CREATE FUNCTION file_fdw_handler() +RETURNS fdw_handler +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FUNCTION file_fdw_validator(text[], oid) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FOREIGN DATA WRAPPER file_fdw + HANDLER file_fdw_handler + VALIDATOR file_fdw_validator; diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c new file mode 100644 index 0000000000..265afb5d9b --- /dev/null +++ b/contrib/file_fdw/file_fdw.c @@ -0,0 +1,536 @@ +/*------------------------------------------------------------------------- + * + * file_fdw.c + * foreign-data wrapper for server-side flat files. + * + * Copyright (c) 2010-2011, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/file_fdw/file_fdw.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include + +#include "access/reloptions.h" +#include "catalog/pg_foreign_table.h" +#include "commands/copy.h" +#include "commands/defrem.h" +#include "commands/explain.h" +#include "foreign/fdwapi.h" +#include "foreign/foreign.h" +#include "miscadmin.h" +#include "optimizer/cost.h" + +PG_MODULE_MAGIC; + +/* + * Describes the valid options for objects that use this wrapper. + */ +struct FileFdwOption +{ + const char *optname; + Oid optcontext; /* Oid of catalog in which option may appear */ +}; + +/* + * Valid options for file_fdw. + * These options are based on the options for COPY FROM command. + * + * Note: If you are adding new option for user mapping, you need to modify + * fileGetOptions(), which currently doesn't bother to look at user mappings. + */ +static struct FileFdwOption valid_options[] = { + /* File options */ + { "filename", ForeignTableRelationId }, + + /* Format options */ + /* oids option is not supported */ + { "format", ForeignTableRelationId }, + { "header", ForeignTableRelationId }, + { "delimiter", ForeignTableRelationId }, + { "quote", ForeignTableRelationId }, + { "escape", ForeignTableRelationId }, + { "null", ForeignTableRelationId }, + + /* + * force_quote is not supported by file_fdw because it's for COPY TO. + */ + + /* + * force_not_null is not supported by file_fdw. It would need a parser + * for list of columns, not to mention a way to check the column list + * against the table. + */ + + /* Sentinel */ + { NULL, InvalidOid } +}; + +/* + * FDW-specific information for ForeignScanState.fdw_state. + */ +typedef struct FileFdwExecutionState +{ + char *filename; /* file to read */ + List *options; /* merged COPY options, excluding filename */ + CopyState cstate; /* state of reading file */ +} FileFdwExecutionState; + +/* + * SQL functions + */ +extern Datum file_fdw_handler(PG_FUNCTION_ARGS); +extern Datum file_fdw_validator(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(file_fdw_handler); +PG_FUNCTION_INFO_V1(file_fdw_validator); + +/* + * FDW callback routines + */ +static FdwPlan *filePlanForeignScan(Oid foreigntableid, + PlannerInfo *root, + RelOptInfo *baserel); +static void fileExplainForeignScan(ForeignScanState *node, ExplainState *es); +static void fileBeginForeignScan(ForeignScanState *node, int eflags); +static TupleTableSlot *fileIterateForeignScan(ForeignScanState *node); +static void fileReScanForeignScan(ForeignScanState *node); +static void fileEndForeignScan(ForeignScanState *node); + +/* + * Helper functions + */ +static bool is_valid_option(const char *option, Oid context); +static void fileGetOptions(Oid foreigntableid, + char **filename, List **other_options); +static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel, + const char *filename, + Cost *startup_cost, Cost *total_cost); + + +/* + * Foreign-data wrapper handler function: return a struct with pointers + * to my callback routines. + */ +Datum +file_fdw_handler(PG_FUNCTION_ARGS) +{ + FdwRoutine *fdwroutine = makeNode(FdwRoutine); + + fdwroutine->PlanForeignScan = filePlanForeignScan; + fdwroutine->ExplainForeignScan = fileExplainForeignScan; + fdwroutine->BeginForeignScan = fileBeginForeignScan; + fdwroutine->IterateForeignScan = fileIterateForeignScan; + fdwroutine->ReScanForeignScan = fileReScanForeignScan; + fdwroutine->EndForeignScan = fileEndForeignScan; + + PG_RETURN_POINTER(fdwroutine); +} + +/* + * Validate the generic options given to a FOREIGN DATA WRAPPER, SERVER, + * USER MAPPING or FOREIGN TABLE that uses file_fdw. + * + * Raise an ERROR if the option or its value is considered invalid. + */ +Datum +file_fdw_validator(PG_FUNCTION_ARGS) +{ + List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); + Oid catalog = PG_GETARG_OID(1); + char *filename = NULL; + List *other_options = NIL; + ListCell *cell; + + /* + * Only superusers are allowed to set options of a file_fdw foreign table. + * This is because the filename is one of those options, and we don't + * want non-superusers to be able to determine which file gets read. + * + * Putting this sort of permissions check in a validator is a bit of a + * crock, but there doesn't seem to be any other place that can enforce + * the check more cleanly. + * + * Note that the valid_options[] array disallows setting filename at + * any options level other than foreign table --- otherwise there'd + * still be a security hole. + */ + if (catalog == ForeignTableRelationId && !superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("only superuser can change options of a file_fdw foreign table"))); + + /* + * Check that only options supported by file_fdw, and allowed for the + * current object type, are given. + */ + foreach(cell, options_list) + { + DefElem *def = (DefElem *) lfirst(cell); + + if (!is_valid_option(def->defname, catalog)) + { + struct FileFdwOption *opt; + StringInfoData buf; + + /* + * Unknown option specified, complain about it. Provide a hint + * with list of valid options for the object. + */ + initStringInfo(&buf); + for (opt = valid_options; opt->optname; opt++) + { + if (catalog == opt->optcontext) + appendStringInfo(&buf, "%s%s", (buf.len > 0) ? ", " : "", + opt->optname); + } + + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), + errmsg("invalid option \"%s\"", def->defname), + errhint("Valid options in this context are: %s", + buf.data))); + } + + /* Separate out filename, since ProcessCopyOptions won't allow it */ + if (strcmp(def->defname, "filename") == 0) + { + if (filename) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + filename = defGetString(def); + } + else + other_options = lappend(other_options, def); + } + + /* + * Now apply the core COPY code's validation logic for more checks. + */ + ProcessCopyOptions(NULL, true, other_options); + + PG_RETURN_VOID(); +} + +/* + * Check if the provided option is one of the valid options. + * context is the Oid of the catalog holding the object the option is for. + */ +static bool +is_valid_option(const char *option, Oid context) +{ + struct FileFdwOption *opt; + + for (opt = valid_options; opt->optname; opt++) + { + if (context == opt->optcontext && strcmp(opt->optname, option) == 0) + return true; + } + return false; +} + +/* + * Fetch the options for a file_fdw foreign table. + * + * We have to separate out "filename" from the other options because + * it must not appear in the options list passed to the core COPY code. + */ +static void +fileGetOptions(Oid foreigntableid, + char **filename, List **other_options) +{ + ForeignTable *table; + ForeignServer *server; + ForeignDataWrapper *wrapper; + List *options; + ListCell *lc, + *prev; + + /* + * Extract options from FDW objects. We ignore user mappings because + * file_fdw doesn't have any options that can be specified there. + * + * (XXX Actually, given the current contents of valid_options[], there's + * no point in examining anything except the foreign table's own options. + * Simplify?) + */ + table = GetForeignTable(foreigntableid); + server = GetForeignServer(table->serverid); + wrapper = GetForeignDataWrapper(server->fdwid); + + options = NIL; + options = list_concat(options, wrapper->options); + options = list_concat(options, server->options); + options = list_concat(options, table->options); + + /* + * Separate out the filename. + */ + *filename = NULL; + prev = NULL; + foreach(lc, options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "filename") == 0) + { + *filename = defGetString(def); + options = list_delete_cell(options, lc, prev); + break; + } + prev = lc; + } + if (*filename == NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_UNABLE_TO_CREATE_REPLY), + errmsg("filename is required for file_fdw foreign tables"))); + *other_options = options; +} + +/* + * filePlanForeignScan + * Create a FdwPlan for a scan on the foreign table + */ +static FdwPlan * +filePlanForeignScan(Oid foreigntableid, + PlannerInfo *root, + RelOptInfo *baserel) +{ + FdwPlan *fdwplan; + char *filename; + List *options; + + /* Fetch options --- we only need filename at this point */ + fileGetOptions(foreigntableid, &filename, &options); + + /* Construct FdwPlan with cost estimates */ + fdwplan = makeNode(FdwPlan); + estimate_costs(root, baserel, filename, + &fdwplan->startup_cost, &fdwplan->total_cost); + fdwplan->fdw_private = NIL; /* not used */ + + return fdwplan; +} + +/* + * fileExplainForeignScan + * Produce extra output for EXPLAIN + */ +static void +fileExplainForeignScan(ForeignScanState *node, ExplainState *es) +{ + char *filename; + List *options; + + /* Fetch options --- we only need filename at this point */ + fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), + &filename, &options); + + ExplainPropertyText("Foreign File", filename, es); + + /* Suppress file size if we're not showing cost details */ + if (es->costs) + { + struct stat stat_buf; + + if (stat(filename, &stat_buf) == 0) + ExplainPropertyLong("Foreign File Size", (long) stat_buf.st_size, + es); + } +} + +/* + * fileBeginForeignScan + * Initiate access to the file by creating CopyState + */ +static void +fileBeginForeignScan(ForeignScanState *node, int eflags) +{ + char *filename; + List *options; + CopyState cstate; + FileFdwExecutionState *festate; + + /* + * Do nothing in EXPLAIN (no ANALYZE) case. node->fdw_state stays NULL. + */ + if (eflags & EXEC_FLAG_EXPLAIN_ONLY) + return; + + /* Fetch options of foreign table */ + fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation), + &filename, &options); + + /* + * Create CopyState from FDW options. We always acquire all columns, + * so as to match the expected ScanTupleSlot signature. + */ + cstate = BeginCopyFrom(node->ss.ss_currentRelation, + filename, + NIL, + options); + + /* + * Save state in node->fdw_state. We must save enough information to call + * BeginCopyFrom() again. + */ + festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState)); + festate->filename = filename; + festate->options = options; + festate->cstate = cstate; + + node->fdw_state = (void *) festate; +} + +/* + * fileIterateForeignScan + * Read next record from the data file and store it into the + * ScanTupleSlot as a virtual tuple + */ +static TupleTableSlot * +fileIterateForeignScan(ForeignScanState *node) +{ + FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state; + TupleTableSlot *slot = node->ss.ss_ScanTupleSlot; + bool found; + ErrorContextCallback errcontext; + + /* Set up callback to identify error line number. */ + errcontext.callback = CopyFromErrorCallback; + errcontext.arg = (void *) festate->cstate; + errcontext.previous = error_context_stack; + error_context_stack = &errcontext; + + /* + * The protocol for loading a virtual tuple into a slot is first + * ExecClearTuple, then fill the values/isnull arrays, then + * ExecStoreVirtualTuple. If we don't find another row in the file, + * we just skip the last step, leaving the slot empty as required. + * + * We can pass ExprContext = NULL because we read all columns from the + * file, so no need to evaluate default expressions. + * + * We can also pass tupleOid = NULL because we don't allow oids for + * foreign tables. + */ + ExecClearTuple(slot); + found = NextCopyFrom(festate->cstate, NULL, + slot->tts_values, slot->tts_isnull, + NULL); + if (found) + ExecStoreVirtualTuple(slot); + + /* Remove error callback. */ + error_context_stack = errcontext.previous; + + return slot; +} + +/* + * fileEndForeignScan + * Finish scanning foreign table and dispose objects used for this scan + */ +static void +fileEndForeignScan(ForeignScanState *node) +{ + FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state; + + /* if festate is NULL, we are in EXPLAIN; nothing to do */ + if (festate) + EndCopyFrom(festate->cstate); +} + +/* + * fileReScanForeignScan + * Rescan table, possibly with new parameters + */ +static void +fileReScanForeignScan(ForeignScanState *node) +{ + FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state; + + EndCopyFrom(festate->cstate); + + festate->cstate = BeginCopyFrom(node->ss.ss_currentRelation, + festate->filename, + NIL, + festate->options); +} + +/* + * Estimate costs of scanning a foreign table. + */ +static void +estimate_costs(PlannerInfo *root, RelOptInfo *baserel, + const char *filename, + Cost *startup_cost, Cost *total_cost) +{ + struct stat stat_buf; + BlockNumber pages; + int tuple_width; + double ntuples; + double nrows; + Cost run_cost = 0; + Cost cpu_per_tuple; + + /* + * Get size of the file. It might not be there at plan time, though, + * in which case we have to use a default estimate. + */ + if (stat(filename, &stat_buf) < 0) + stat_buf.st_size = 10 * BLCKSZ; + + /* + * Convert size to pages for use in I/O cost estimate below. + */ + pages = (stat_buf.st_size + (BLCKSZ-1)) / BLCKSZ; + if (pages < 1) + pages = 1; + + /* + * Estimate the number of tuples in the file. We back into this estimate + * using the planner's idea of the relation width; which is bogus if not + * all columns are being read, not to mention that the text representation + * of a row probably isn't the same size as its internal representation. + * FIXME later. + */ + tuple_width = MAXALIGN(baserel->width) + MAXALIGN(sizeof(HeapTupleHeaderData)); + + ntuples = clamp_row_est((double) stat_buf.st_size / (double) tuple_width); + + /* + * Now estimate the number of rows returned by the scan after applying + * the baserestrictinfo quals. This is pretty bogus too, since the + * planner will have no stats about the relation, but it's better than + * nothing. + */ + nrows = ntuples * + clauselist_selectivity(root, + baserel->baserestrictinfo, + 0, + JOIN_INNER, + NULL); + + nrows = clamp_row_est(nrows); + + /* Save the output-rows estimate for the planner */ + baserel->rows = nrows; + + /* + * Now estimate costs. We estimate costs almost the same way as + * cost_seqscan(), thus assuming that I/O costs are equivalent to a + * regular table file of the same size. However, we take per-tuple CPU + * costs as 10x of a seqscan, to account for the cost of parsing records. + */ + run_cost += seq_page_cost * pages; + + *startup_cost = baserel->baserestrictcost.startup; + cpu_per_tuple = cpu_tuple_cost * 10 + baserel->baserestrictcost.per_tuple; + run_cost += cpu_per_tuple * ntuples; + *total_cost = *startup_cost + run_cost; +} diff --git a/contrib/file_fdw/file_fdw.control b/contrib/file_fdw/file_fdw.control new file mode 100644 index 0000000000..f0fccab53b --- /dev/null +++ b/contrib/file_fdw/file_fdw.control @@ -0,0 +1,5 @@ +# file_fdw extension +comment = 'foreign-data wrapper for flat file access' +default_version = '1.0' +module_pathname = '$libdir/file_fdw' +relocatable = true diff --git a/contrib/file_fdw/input/file_fdw.source b/contrib/file_fdw/input/file_fdw.source new file mode 100644 index 0000000000..9ff7235a12 --- /dev/null +++ b/contrib/file_fdw/input/file_fdw.source @@ -0,0 +1,131 @@ +-- +-- Test foreign-data wrapper file_fdw. +-- + +-- Clean up in case a prior regression run failed +SET client_min_messages TO 'error'; +DROP ROLE IF EXISTS file_fdw_superuser, file_fdw_user, no_priv_user; +RESET client_min_messages; + +CREATE ROLE file_fdw_superuser LOGIN SUPERUSER; -- is a superuser +CREATE ROLE file_fdw_user LOGIN; -- has priv and user mapping +CREATE ROLE no_priv_user LOGIN; -- has priv but no user mapping + +-- Install file_fdw +CREATE EXTENSION file_fdw; + +-- file_fdw_superuser owns fdw-related objects +SET ROLE file_fdw_superuser; +CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw; + +-- privilege tests +SET ROLE file_fdw_user; +CREATE FOREIGN DATA WRAPPER file_fdw2 HANDLER file_fdw_handler VALIDATOR file_fdw_validator; -- ERROR +CREATE SERVER file_server2 FOREIGN DATA WRAPPER file_fdw; -- ERROR +CREATE USER MAPPING FOR file_fdw_user SERVER file_server; -- ERROR + +SET ROLE file_fdw_superuser; +GRANT USAGE ON FOREIGN SERVER file_server TO file_fdw_user; + +SET ROLE file_fdw_user; +CREATE USER MAPPING FOR file_fdw_user SERVER file_server; + +-- create user mappings and grant privilege to test users +SET ROLE file_fdw_superuser; +CREATE USER MAPPING FOR file_fdw_superuser SERVER file_server; +CREATE USER MAPPING FOR no_priv_user SERVER file_server; + +-- validator tests +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'xml'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', header 'true'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', quote ':'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', escape ':'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', header 'true'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', quote ':'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', escape ':'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter 'a'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', escape '-'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', quote '-', null '=-='); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '-', null '=-='); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '-', quote '-'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '---'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', quote '---'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', escape '---'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter '\'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter '.'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter '1'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter 'a'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter ' +'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', null ' +'); -- ERROR + +CREATE FOREIGN TABLE agg_text ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'text', filename '@abs_srcdir@/data/agg.data', delimiter ' ', null '\N'); +GRANT SELECT ON agg_text TO file_fdw_user; +CREATE FOREIGN TABLE agg_csv ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null ''); +CREATE FOREIGN TABLE agg_bad ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', delimiter ';', quote '@', escape '"', null ''); + +-- basic query tests +SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; +SELECT * FROM agg_csv ORDER BY a; +SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; + +-- error context report tests +SELECT * FROM agg_bad; -- ERROR + +-- misc query tests +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv; +\t off +PREPARE st(int) AS SELECT * FROM agg_csv WHERE a = $1; +EXECUTE st(100); +EXECUTE st(100); +DEALLOCATE st; + +-- tableoid +SELECT tableoid::regclass, b FROM agg_csv; + +-- updates aren't supported +INSERT INTO agg_csv VALUES(1,2.0); +UPDATE agg_csv SET a = 1; +DELETE FROM agg_csv WHERE a = 100; +SELECT * FROM agg_csv FOR UPDATE OF agg_csv; +-- but this should be ignored +SELECT * FROM agg_csv FOR UPDATE; + +-- privilege tests +SET ROLE file_fdw_superuser; +SELECT * FROM agg_text ORDER BY a; +SET ROLE file_fdw_user; +SELECT * FROM agg_text ORDER BY a; +SET ROLE no_priv_user; +SELECT * FROM agg_text ORDER BY a; -- ERROR +SET ROLE file_fdw_user; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_text WHERE a > 0; +\t off + +-- privilege tests for object +SET ROLE file_fdw_superuser; +ALTER FOREIGN TABLE agg_text OWNER TO file_fdw_user; +ALTER FOREIGN TABLE agg_text OPTIONS (SET format 'text'); +SET ROLE file_fdw_user; +ALTER FOREIGN TABLE agg_text OPTIONS (SET format 'text'); +SET ROLE file_fdw_superuser; + +-- cleanup +RESET ROLE; +DROP EXTENSION file_fdw CASCADE; +DROP ROLE file_fdw_superuser, file_fdw_user, no_priv_user; diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source new file mode 100644 index 0000000000..2ba36c9368 --- /dev/null +++ b/contrib/file_fdw/output/file_fdw.source @@ -0,0 +1,225 @@ +-- +-- Test foreign-data wrapper file_fdw. +-- +-- Clean up in case a prior regression run failed +SET client_min_messages TO 'error'; +DROP ROLE IF EXISTS file_fdw_superuser, file_fdw_user, no_priv_user; +RESET client_min_messages; +CREATE ROLE file_fdw_superuser LOGIN SUPERUSER; -- is a superuser +CREATE ROLE file_fdw_user LOGIN; -- has priv and user mapping +CREATE ROLE no_priv_user LOGIN; -- has priv but no user mapping +-- Install file_fdw +CREATE EXTENSION file_fdw; +-- file_fdw_superuser owns fdw-related objects +SET ROLE file_fdw_superuser; +CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw; +-- privilege tests +SET ROLE file_fdw_user; +CREATE FOREIGN DATA WRAPPER file_fdw2 HANDLER file_fdw_handler VALIDATOR file_fdw_validator; -- ERROR +ERROR: permission denied to create foreign-data wrapper "file_fdw2" +HINT: Must be superuser to create a foreign-data wrapper. +CREATE SERVER file_server2 FOREIGN DATA WRAPPER file_fdw; -- ERROR +ERROR: permission denied for foreign-data wrapper file_fdw +CREATE USER MAPPING FOR file_fdw_user SERVER file_server; -- ERROR +ERROR: permission denied for foreign server file_server +SET ROLE file_fdw_superuser; +GRANT USAGE ON FOREIGN SERVER file_server TO file_fdw_user; +SET ROLE file_fdw_user; +CREATE USER MAPPING FOR file_fdw_user SERVER file_server; +-- create user mappings and grant privilege to test users +SET ROLE file_fdw_superuser; +CREATE USER MAPPING FOR file_fdw_superuser SERVER file_server; +CREATE USER MAPPING FOR no_priv_user SERVER file_server; +-- validator tests +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'xml'); -- ERROR +ERROR: COPY format "xml" not recognized +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', header 'true'); -- ERROR +ERROR: COPY HEADER available only in CSV mode +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', quote ':'); -- ERROR +ERROR: COPY quote available only in CSV mode +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', escape ':'); -- ERROR +ERROR: COPY escape available only in CSV mode +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', header 'true'); -- ERROR +ERROR: COPY HEADER available only in CSV mode +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', quote ':'); -- ERROR +ERROR: COPY quote available only in CSV mode +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', escape ':'); -- ERROR +ERROR: COPY escape available only in CSV mode +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter 'a'); -- ERROR +ERROR: COPY delimiter cannot be "a" +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', escape '-'); -- ERROR +ERROR: COPY escape available only in CSV mode +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', quote '-', null '=-='); -- ERROR +ERROR: CSV quote character must not appear in the NULL specification +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '-', null '=-='); -- ERROR +ERROR: COPY delimiter must not appear in the NULL specification +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '-', quote '-'); -- ERROR +ERROR: COPY delimiter and quote must be different +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter '---'); -- ERROR +ERROR: COPY delimiter must be a single one-byte character +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', quote '---'); -- ERROR +ERROR: COPY quote must be a single one-byte character +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', escape '---'); -- ERROR +ERROR: COPY escape must be a single one-byte character +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter '\'); -- ERROR +ERROR: COPY delimiter cannot be "\" +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter '.'); -- ERROR +ERROR: COPY delimiter cannot be "." +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter '1'); -- ERROR +ERROR: COPY delimiter cannot be "1" +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'text', delimiter 'a'); -- ERROR +ERROR: COPY delimiter cannot be "a" +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', delimiter ' +'); -- ERROR +ERROR: COPY delimiter cannot be newline or carriage return +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', null ' +'); -- ERROR +ERROR: COPY null representation cannot use newline or carriage return +CREATE FOREIGN TABLE agg_text ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'text', filename '@abs_srcdir@/data/agg.data', delimiter ' ', null '\N'); +GRANT SELECT ON agg_text TO file_fdw_user; +CREATE FOREIGN TABLE agg_csv ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.csv', header 'true', delimiter ';', quote '@', escape '"', null ''); +CREATE FOREIGN TABLE agg_bad ( + a int2, + b float4 +) SERVER file_server +OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', delimiter ';', quote '@', escape '"', null ''); +-- basic query tests +SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a; + a | b +-----+-------- + 42 | 324.78 + 100 | 99.097 +(2 rows) + +SELECT * FROM agg_csv ORDER BY a; + a | b +-----+--------- + 0 | 0.09561 + 42 | 324.78 + 100 | 99.097 +(3 rows) + +SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; + a | b | a | b +-----+---------+-----+--------- + 0 | 0.09561 | 0 | 0.09561 + 42 | 324.78 | 42 | 324.78 + 100 | 99.097 | 100 | 99.097 +(3 rows) + +-- error context report tests +SELECT * FROM agg_bad; -- ERROR +ERROR: invalid input syntax for type real: "aaa" +CONTEXT: COPY agg_bad, line 3, column b: "aaa" +-- misc query tests +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv; + Foreign Scan on public.agg_csv + Output: a, b + Foreign File: @abs_srcdir@/data/agg.csv + +\t off +PREPARE st(int) AS SELECT * FROM agg_csv WHERE a = $1; +EXECUTE st(100); + a | b +-----+-------- + 100 | 99.097 +(1 row) + +EXECUTE st(100); + a | b +-----+-------- + 100 | 99.097 +(1 row) + +DEALLOCATE st; +-- tableoid +SELECT tableoid::regclass, b FROM agg_csv; + tableoid | b +----------+--------- + agg_csv | 99.097 + agg_csv | 0.09561 + agg_csv | 324.78 +(3 rows) + +-- updates aren't supported +INSERT INTO agg_csv VALUES(1,2.0); +ERROR: cannot change foreign table "agg_csv" +UPDATE agg_csv SET a = 1; +ERROR: cannot change foreign table "agg_csv" +DELETE FROM agg_csv WHERE a = 100; +ERROR: cannot change foreign table "agg_csv" +SELECT * FROM agg_csv FOR UPDATE OF agg_csv; +ERROR: SELECT FOR UPDATE/SHARE cannot be used with foreign table "agg_csv" +LINE 1: SELECT * FROM agg_csv FOR UPDATE OF agg_csv; + ^ +-- but this should be ignored +SELECT * FROM agg_csv FOR UPDATE; + a | b +-----+--------- + 100 | 99.097 + 0 | 0.09561 + 42 | 324.78 +(3 rows) + +-- privilege tests +SET ROLE file_fdw_superuser; +SELECT * FROM agg_text ORDER BY a; + a | b +-----+--------- + 0 | 0.09561 + 42 | 324.78 + 56 | 7.8 + 100 | 99.097 +(4 rows) + +SET ROLE file_fdw_user; +SELECT * FROM agg_text ORDER BY a; + a | b +-----+--------- + 0 | 0.09561 + 42 | 324.78 + 56 | 7.8 + 100 | 99.097 +(4 rows) + +SET ROLE no_priv_user; +SELECT * FROM agg_text ORDER BY a; -- ERROR +ERROR: permission denied for relation agg_text +SET ROLE file_fdw_user; +\t on +EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_text WHERE a > 0; + Foreign Scan on public.agg_text + Output: a, b + Filter: (agg_text.a > 0) + Foreign File: @abs_srcdir@/data/agg.data + +\t off +-- privilege tests for object +SET ROLE file_fdw_superuser; +ALTER FOREIGN TABLE agg_text OWNER TO file_fdw_user; +ALTER FOREIGN TABLE agg_text OPTIONS (SET format 'text'); +SET ROLE file_fdw_user; +ALTER FOREIGN TABLE agg_text OPTIONS (SET format 'text'); +ERROR: only superuser can change options of a file_fdw foreign table +SET ROLE file_fdw_superuser; +-- cleanup +RESET ROLE; +DROP EXTENSION file_fdw CASCADE; +NOTICE: drop cascades to 7 other objects +DETAIL: drop cascades to server file_server +drop cascades to user mapping for file_fdw_user +drop cascades to user mapping for file_fdw_superuser +drop cascades to user mapping for no_priv_user +drop cascades to foreign table agg_text +drop cascades to foreign table agg_csv +drop cascades to foreign table agg_bad +DROP ROLE file_fdw_superuser, file_fdw_user, no_priv_user; diff --git a/contrib/file_fdw/sql/.gitignore b/contrib/file_fdw/sql/.gitignore new file mode 100644 index 0000000000..ebf16fed94 --- /dev/null +++ b/contrib/file_fdw/sql/.gitignore @@ -0,0 +1 @@ +/file_fdw.sql diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index 4504ab1749..adf09ca872 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -100,6 +100,7 @@ CREATE EXTENSION module_name FROM unpackaged; &dict-xsyn; &dummy-seclabel; &earthdistance; + &file-fdw; &fuzzystrmatch; &hstore; &intagg; diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml new file mode 100644 index 0000000000..e292166718 --- /dev/null +++ b/doc/src/sgml/file-fdw.sgml @@ -0,0 +1,127 @@ + + + + file_fdw + + + file_fdw + + + + The file_fdw module provides the foreign-data wrapper + file_fdw, which can be used to access data + files in the server's filesystem. Data files must be in a format + that can be read by COPY FROM; + see for details. + + + + A foreign table created using this wrapper can have the following options: + + + + + + filename + + + + Specifies the file to be read. Required. Must be an absolute path name. + + + + + + format + + + + Specifies the file's format, + the same as COPY's FORMAT option. + + + + + + header + + + + Specifies whether the file has a header line, + the same as COPY's HEADER option. + + + + + + delimiter + + + + Specifies the file's delimiter character, + the same as COPY's DELIMITER option. + + + + + + quote + + + + Specifies the file's quote character, + the same as COPY's QUOTE option. + + + + + + escape + + + + Specifies the file's escape character, + the same as COPY's ESCAPE option. + + + + + + null + + + + Specifies the file's null string, + the same as COPY's NULL option. + + + + + + + + COPY's OIDS, FORCE_QUOTE, + and FORCE_NOT_NULL options are currently not supported by + file_fdw. + + + + These options can only be specified for a foreign table, not in the + options of the file_fdw foreign-data wrapper, nor in the + options of a server or user mapping using the wrapper. + + + + Changing table-level options requires superuser privileges, for security + reasons: only a superuser should be able to determine which file is read. + In principle non-superusers could be allowed to change the other options, + but that's not supported at present. + + + + For a foreign table using file_fdw, EXPLAIN shows + the name of the file to be read. Unless COSTS OFF is + specified, the file size (in bytes) is shown as well. + + + diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 659bcba7c7..1a6079639f 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -106,6 +106,7 @@ + diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 2c06d80951..294450ef66 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -114,7 +114,10 @@ typedef struct CopyStateData char *delim; /* column delimiter (must be 1 byte) */ char *quote; /* CSV quote char (must be 1 byte) */ char *escape; /* CSV escape char (must be 1 byte) */ + List *force_quote; /* list of column names */ + bool force_quote_all; /* FORCE QUOTE *? */ bool *force_quote_flags; /* per-column CSV FQ flags */ + List *force_notnull; /* list of column names */ bool *force_notnull_flags; /* per-column CSV FNN flags */ /* these are just for error messages, see CopyFromErrorCallback */ @@ -764,7 +767,7 @@ DoCopy(const CopyStmt *stmt, const char *queryString) tupDesc = RelationGetDescr(rel); attnums = CopyGetAttnums(tupDesc, rel, stmt->attlist); - foreach (cur, attnums) + foreach(cur, attnums) { int attno = lfirst_int(cur) - FirstLowInvalidHeapAttributeNumber; @@ -814,52 +817,33 @@ DoCopy(const CopyStmt *stmt, const char *queryString) } /* - * Common setup routines used by BeginCopyFrom and BeginCopyTo. + * Process the statement option list for COPY. * - * Iff , unload or reload in the binary format, as opposed to the - * more wasteful but more robust and portable text format. + * Scan the options list (a list of DefElem) and transpose the information + * into cstate, applying appropriate error checking. * - * Iff , unload or reload the format that includes OID information. - * On input, we accept OIDs whether or not the table has an OID column, - * but silently drop them if it does not. On output, we report an error - * if the user asks for OIDs in a table that has none (not providing an - * OID column might seem friendlier, but could seriously confuse programs). + * cstate is assumed to be filled with zeroes initially. * - * If in the text format, delimit columns with delimiter and print - * NULL values as . + * This is exported so that external users of the COPY API can sanity-check + * a list of options. In that usage, cstate should be passed as NULL + * (since external users don't know sizeof(CopyStateData)) and the collected + * data is just leaked until CurrentMemoryContext is reset. + * + * Note that additional checking, such as whether column names listed in FORCE + * QUOTE actually exist, has to be applied later. This just checks for + * self-consistency of the options list. */ -static CopyState -BeginCopy(bool is_from, - Relation rel, - Node *raw_query, - const char *queryString, - List *attnamelist, - List *options) +void +ProcessCopyOptions(CopyState cstate, + bool is_from, + List *options) { - CopyState cstate; - List *force_quote = NIL; - List *force_notnull = NIL; - bool force_quote_all = false; bool format_specified = false; ListCell *option; - TupleDesc tupDesc; - int num_phys_attrs; - MemoryContext oldcontext; - /* Allocate workspace and zero all fields */ - cstate = (CopyStateData *) palloc0(sizeof(CopyStateData)); - - /* - * We allocate everything used by a cstate in a new memory context. - * This would avoid memory leaks repeated uses of COPY in a query. - */ - cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext, - "COPY", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); - - oldcontext = MemoryContextSwitchTo(cstate->copycontext); + /* Support external use for option sanity checking */ + if (cstate == NULL) + cstate = (CopyStateData *) palloc0(sizeof(CopyStateData)); /* Extract options from the statement node tree */ foreach(option, options) @@ -936,14 +920,14 @@ BeginCopy(bool is_from, } else if (strcmp(defel->defname, "force_quote") == 0) { - if (force_quote || force_quote_all) + if (cstate->force_quote || cstate->force_quote_all) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); if (defel->arg && IsA(defel->arg, A_Star)) - force_quote_all = true; + cstate->force_quote_all = true; else if (defel->arg && IsA(defel->arg, List)) - force_quote = (List *) defel->arg; + cstate->force_quote = (List *) defel->arg; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -952,12 +936,12 @@ BeginCopy(bool is_from, } else if (strcmp(defel->defname, "force_not_null") == 0) { - if (force_notnull) + if (cstate->force_notnull) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); if (defel->arg && IsA(defel->arg, List)) - force_notnull = (List *) defel->arg; + cstate->force_notnull = (List *) defel->arg; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -1071,21 +1055,21 @@ BeginCopy(bool is_from, errmsg("COPY escape must be a single one-byte character"))); /* Check force_quote */ - if (!cstate->csv_mode && (force_quote != NIL || force_quote_all)) + if (!cstate->csv_mode && (cstate->force_quote || cstate->force_quote_all)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY force quote available only in CSV mode"))); - if ((force_quote != NIL || force_quote_all) && is_from) + if ((cstate->force_quote || cstate->force_quote_all) && is_from) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY force quote only available using COPY TO"))); /* Check force_notnull */ - if (!cstate->csv_mode && force_notnull != NIL) + if (!cstate->csv_mode && cstate->force_notnull != NIL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY force not null available only in CSV mode"))); - if (force_notnull != NIL && !is_from) + if (cstate->force_notnull != NIL && !is_from) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY force not null only available using COPY FROM"))); @@ -1102,7 +1086,55 @@ BeginCopy(bool is_from, ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("CSV quote character must not appear in the NULL specification"))); +} +/* + * Common setup routines used by BeginCopyFrom and BeginCopyTo. + * + * Iff , unload or reload in the binary format, as opposed to the + * more wasteful but more robust and portable text format. + * + * Iff , unload or reload the format that includes OID information. + * On input, we accept OIDs whether or not the table has an OID column, + * but silently drop them if it does not. On output, we report an error + * if the user asks for OIDs in a table that has none (not providing an + * OID column might seem friendlier, but could seriously confuse programs). + * + * If in the text format, delimit columns with delimiter and print + * NULL values as . + */ +static CopyState +BeginCopy(bool is_from, + Relation rel, + Node *raw_query, + const char *queryString, + List *attnamelist, + List *options) +{ + CopyState cstate; + TupleDesc tupDesc; + int num_phys_attrs; + MemoryContext oldcontext; + + /* Allocate workspace and zero all fields */ + cstate = (CopyStateData *) palloc0(sizeof(CopyStateData)); + + /* + * We allocate everything used by a cstate in a new memory context. + * This avoids memory leaks during repeated use of COPY in a query. + */ + cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext, + "COPY", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + oldcontext = MemoryContextSwitchTo(cstate->copycontext); + + /* Extract options from the statement node tree */ + ProcessCopyOptions(cstate, is_from, options); + + /* Process the source/target relation or query */ if (rel) { Assert(!raw_query); @@ -1197,19 +1229,19 @@ BeginCopy(bool is_from, /* Convert FORCE QUOTE name list to per-column flags, check validity */ cstate->force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool)); - if (force_quote_all) + if (cstate->force_quote_all) { int i; for (i = 0; i < num_phys_attrs; i++) cstate->force_quote_flags[i] = true; } - else if (force_quote) + else if (cstate->force_quote) { List *attnums; ListCell *cur; - attnums = CopyGetAttnums(tupDesc, cstate->rel, force_quote); + attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_quote); foreach(cur, attnums) { @@ -1226,12 +1258,12 @@ BeginCopy(bool is_from, /* Convert FORCE NOT NULL name list to per-column flags, check validity */ cstate->force_notnull_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool)); - if (force_notnull) + if (cstate->force_notnull) { List *attnums; ListCell *cur; - attnums = CopyGetAttnums(tupDesc, cstate->rel, force_notnull); + attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->force_notnull); foreach(cur, attnums) { diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index afe4b5e450..7098a70c95 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -18,11 +18,12 @@ #include "nodes/parsenodes.h" #include "tcop/dest.h" - -typedef struct CopyStateData *CopyState; +/* CopyStateData is private in commands/copy.c */ +typedef struct CopyStateData *CopyState; extern uint64 DoCopy(const CopyStmt *stmt, const char *queryString); +extern void ProcessCopyOptions(CopyState cstate, bool is_from, List *options); extern CopyState BeginCopyFrom(Relation rel, const char *filename, List *attnamelist, List *options); extern void EndCopyFrom(CopyState cstate);