From 7e413a0f82c8f8e9abb3981839191414de3aae08 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Thu, 7 Mar 2019 09:26:14 -0300 Subject: [PATCH] pg_dump: allow multiple rows per insert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is useful to speed up loading data in a different database engine. Authors: Surafel Temesgen and David Rowley. Lightly edited by Álvaro. Reviewed-by: Fabien Coelho Discussion: https://postgr.es/m/CALAY4q9kumSdnRBzvRJvSRf2+BH20YmSvzqOkvwpEmodD-xv6g@mail.gmail.com --- doc/src/sgml/ref/pg_dump.sgml | 38 ++++-- src/bin/pg_dump/pg_backup.h | 2 +- src/bin/pg_dump/pg_dump.c | 204 +++++++++++++++++++++---------- src/bin/pg_dump/t/001_basic.pl | 4 +- src/bin/pg_dump/t/002_pg_dump.pl | 34 +++++- 5 files changed, 199 insertions(+), 83 deletions(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index 033eae9b46..e0e65f9c21 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -661,9 +661,9 @@ PostgreSQL documentation ...). This will make restoration very slow; it is mainly useful for making dumps that can be loaded into non-PostgreSQL databases. - However, since this option generates a separate command for each row, - an error in reloading a row causes only that row to be lost rather - than the entire table contents. + Any error during reloading will cause only rows that are part of the + problematic INSERT to be lost, rather than the + entire table contents. @@ -775,13 +775,12 @@ PostgreSQL documentation than COPY). This will make restoration very slow; it is mainly useful for making dumps that can be loaded into non-PostgreSQL databases. - However, since this option generates a separate command for each row, - an error in reloading a row causes only that row to be lost rather - than the entire table contents. - Note that - the restore might fail altogether if you have rearranged column order. - The option is safe against column - order changes, though even slower. + Any error during reloading will cause only rows that are part of the + problematic INSERT to be lost, rather than the + entire table contents. Note that the restore might fail altogether if + you have rearranged column order. The + option is safe against column order + changes, though even slower. @@ -925,8 +924,9 @@ PostgreSQL documentation Add ON CONFLICT DO NOTHING to INSERT commands. - This option is not valid unless or - is also specified. + This option is not valid unless , + or + is also specified. @@ -949,6 +949,20 @@ PostgreSQL documentation + + + + + Dump data as INSERT commands (rather than + COPY). Controls the maximum number of rows per + INSERT command. The value specified must be a + number greater than zero. Any error during reloading will cause only + rows that are part of the problematic INSERT to be + lost, rather than the entire table contents. + + + + diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 4a2e122e2d..7ab27391fb 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -140,10 +140,10 @@ typedef struct _dumpOptions int dumpSections; /* bitmask of chosen sections */ bool aclsSkip; const char *lockWaitTimeout; + int dump_inserts; /* 0 = COPY, otherwise rows per INSERT */ /* flags for various command-line long options */ int disable_dollar_quoting; - int dump_inserts; int column_inserts; int if_exists; int no_comments; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 679ff3365c..082dc841e9 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -138,6 +138,12 @@ static const CatalogId nilCatalogId = {0, 0}; static bool have_extra_float_digits = false; static int extra_float_digits; +/* + * The default number of rows per INSERT when + * --inserts is specified without --rows-per-insert + */ +#define DUMP_DEFAULT_ROWS_PER_INSERT 1 + /* * Macro for producing quoted, schema-qualified name of a dumpable object. */ @@ -306,11 +312,13 @@ main(int argc, char **argv) DumpableObject *boundaryObjs; int i; int optindex; + char *endptr; RestoreOptions *ropt; Archive *fout; /* the script file */ const char *dumpencoding = NULL; const char *dumpsnapshot = NULL; char *use_role = NULL; + long rowsPerInsert; int numWorkers = 1; trivalue prompt_password = TRI_DEFAULT; int compressLevel = -1; @@ -363,7 +371,7 @@ main(int argc, char **argv) {"exclude-table-data", required_argument, NULL, 4}, {"extra-float-digits", required_argument, NULL, 8}, {"if-exists", no_argument, &dopt.if_exists, 1}, - {"inserts", no_argument, &dopt.dump_inserts, 1}, + {"inserts", no_argument, NULL, 9}, {"lock-wait-timeout", required_argument, NULL, 2}, {"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1}, {"quote-all-identifiers", no_argument, "e_all_identifiers, 1}, @@ -382,6 +390,7 @@ main(int argc, char **argv) {"no-subscriptions", no_argument, &dopt.no_subscriptions, 1}, {"no-sync", no_argument, NULL, 7}, {"on-conflict-do-nothing", no_argument, &dopt.do_nothing, 1}, + {"rows-per-insert", required_argument, NULL, 10}, {NULL, 0, NULL, 0} }; @@ -572,6 +581,31 @@ main(int argc, char **argv) } break; + case 9: /* inserts */ + + /* + * dump_inserts also stores --rows-per-insert, careful not to + * overwrite that. + */ + if (dopt.dump_inserts == 0) + dopt.dump_inserts = DUMP_DEFAULT_ROWS_PER_INSERT; + break; + + case 10: /* rows per insert */ + errno = 0; + rowsPerInsert = strtol(optarg, &endptr, 10); + + if (endptr == optarg || *endptr != '\0' || + rowsPerInsert <= 0 || rowsPerInsert > INT_MAX || + errno == ERANGE) + { + write_msg(NULL, "rows-per-insert must be in range %d..%d\n", + 1, INT_MAX); + exit_nicely(1); + } + dopt.dump_inserts = (int) rowsPerInsert; + break; + default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit_nicely(1); @@ -596,8 +630,8 @@ main(int argc, char **argv) } /* --column-inserts implies --inserts */ - if (dopt.column_inserts) - dopt.dump_inserts = 1; + if (dopt.column_inserts && dopt.dump_inserts == 0) + dopt.dump_inserts = DUMP_DEFAULT_ROWS_PER_INSERT; /* * Binary upgrade mode implies dumping sequence data even in schema-only @@ -622,8 +656,12 @@ main(int argc, char **argv) if (dopt.if_exists && !dopt.outputClean) exit_horribly(NULL, "option --if-exists requires option -c/--clean\n"); - if (dopt.do_nothing && !(dopt.dump_inserts || dopt.column_inserts)) - exit_horribly(NULL, "option --on-conflict-do-nothing requires option --inserts or --column-inserts\n"); + /* + * --inserts are already implied above if --column-inserts or + * --rows-per-insert were specified. + */ + if (dopt.do_nothing && dopt.dump_inserts == 0) + exit_horribly(NULL, "option --on-conflict-do-nothing requires option --inserts, --rows-per-insert or --column-inserts\n"); /* Identify archive format to emit */ archiveFormat = parseArchiveFormat(format, &archiveMode); @@ -993,6 +1031,7 @@ help(const char *progname) printf(_(" --no-unlogged-table-data do not dump unlogged table data\n")); printf(_(" --on-conflict-do-nothing add ON CONFLICT DO NOTHING to INSERT commands\n")); printf(_(" --quote-all-identifiers quote all identifiers, even if not key words\n")); + printf(_(" --rows-per-insert=NROWS number of rows per INSERT; implies --inserts\n")); printf(_(" --section=SECTION dump named section (pre-data, data, or post-data)\n")); printf(_(" --serializable-deferrable wait until the dump can run without anomalies\n")); printf(_(" --snapshot=SNAPSHOT use given snapshot for the dump\n")); @@ -1909,9 +1948,9 @@ dumpTableData_insert(Archive *fout, void *dcontext) PQExpBuffer q = createPQExpBuffer(); PQExpBuffer insertStmt = NULL; PGresult *res; - int tuple; int nfields; - int field; + int rows_per_statement = dopt->dump_inserts; + int rows_this_statement = 0; appendPQExpBuffer(q, "DECLARE _pg_dump_cursor CURSOR FOR " "SELECT * FROM ONLY %s", @@ -1926,69 +1965,88 @@ dumpTableData_insert(Archive *fout, void *dcontext) res = ExecuteSqlQuery(fout, "FETCH 100 FROM _pg_dump_cursor", PGRES_TUPLES_OK); nfields = PQnfields(res); - for (tuple = 0; tuple < PQntuples(res); tuple++) + + /* + * First time through, we build as much of the INSERT statement as + * possible in "insertStmt", which we can then just print for each + * statement. If the table happens to have zero columns then this will + * be a complete statement, otherwise it will end in "VALUES" and be + * ready to have the row's column values printed. + */ + if (insertStmt == NULL) { + TableInfo *targettab; + + insertStmt = createPQExpBuffer(); + /* - * First time through, we build as much of the INSERT statement as - * possible in "insertStmt", which we can then just print for each - * line. If the table happens to have zero columns then this will - * be a complete statement, otherwise it will end in "VALUES(" and - * be ready to have the row's column values appended. + * When load-via-partition-root is set, get the root table name + * for the partition table, so that we can reload data through the + * root table. */ - if (insertStmt == NULL) + if (dopt->load_via_partition_root && tbinfo->ispartition) + targettab = getRootTableInfo(tbinfo); + else + targettab = tbinfo; + + appendPQExpBuffer(insertStmt, "INSERT INTO %s ", + fmtQualifiedDumpable(targettab)); + + /* corner case for zero-column table */ + if (nfields == 0) { - TableInfo *targettab; - - insertStmt = createPQExpBuffer(); - - /* - * When load-via-partition-root is set, get the root table - * name for the partition table, so that we can reload data - * through the root table. - */ - if (dopt->load_via_partition_root && tbinfo->ispartition) - targettab = getRootTableInfo(tbinfo); - else - targettab = tbinfo; - - appendPQExpBuffer(insertStmt, "INSERT INTO %s ", - fmtQualifiedDumpable(targettab)); - - /* corner case for zero-column table */ - if (nfields == 0) - { - appendPQExpBufferStr(insertStmt, "DEFAULT VALUES;\n"); - } - else - { - /* append the list of column names if required */ - if (dopt->column_inserts) - { - appendPQExpBufferChar(insertStmt, '('); - for (field = 0; field < nfields; field++) - { - if (field > 0) - appendPQExpBufferStr(insertStmt, ", "); - appendPQExpBufferStr(insertStmt, - fmtId(PQfname(res, field))); - } - appendPQExpBufferStr(insertStmt, ") "); - } - - if (tbinfo->needs_override) - appendPQExpBufferStr(insertStmt, "OVERRIDING SYSTEM VALUE "); - - appendPQExpBufferStr(insertStmt, "VALUES ("); - } + appendPQExpBufferStr(insertStmt, "DEFAULT VALUES;\n"); } + else + { + /* append the list of column names if required */ + if (dopt->column_inserts) + { + appendPQExpBufferChar(insertStmt, '('); + for (int field = 0; field < nfields; field++) + { + if (field > 0) + appendPQExpBufferStr(insertStmt, ", "); + appendPQExpBufferStr(insertStmt, + fmtId(PQfname(res, field))); + } + appendPQExpBufferStr(insertStmt, ") "); + } - archputs(insertStmt->data, fout); + if (tbinfo->needs_override) + appendPQExpBufferStr(insertStmt, "OVERRIDING SYSTEM VALUE "); - /* if it is zero-column table then we're done */ + appendPQExpBufferStr(insertStmt, "VALUES"); + } + } + + for (int tuple = 0; tuple < PQntuples(res); tuple++) + { + /* Write the INSERT if not in the middle of a multi-row INSERT. */ + if (rows_this_statement == 0) + archputs(insertStmt->data, fout); + + /* + * If it is zero-column table then we've aleady written the + * complete statement, which will mean we've disobeyed + * --rows-per-insert when it's set greater than 1. We do support + * a way to make this multi-row with: SELECT UNION ALL SELECT + * UNION ALL ... but that's non-standard so we should avoid it + * given that using INSERTs is mostly only ever needed for + * cross-database exports. + */ if (nfields == 0) continue; - for (field = 0; field < nfields; field++) + /* Emit a row heading */ + if (rows_per_statement == 1) + archputs(" (", fout); + else if (rows_this_statement > 0) + archputs(",\n\t(", fout); + else + archputs("\n\t(", fout); + + for (int field = 0; field < nfields; field++) { if (field > 0) archputs(", ", fout); @@ -2053,10 +2111,19 @@ dumpTableData_insert(Archive *fout, void *dcontext) } } - if (!dopt->do_nothing) - archputs(");\n", fout); - else - archputs(") ON CONFLICT DO NOTHING;\n", fout); + /* Terminate the row ... */ + archputs(")", fout); + + /* ... and the statement, if the target no. of rows is reached */ + if (++rows_this_statement >= rows_per_statement) + { + if (dopt->do_nothing) + archputs(" ON CONFLICT DO NOTHING;\n", fout); + else + archputs(";\n", fout); + /* Reset the row counter */ + rows_this_statement = 0; + } } if (PQntuples(res) <= 0) @@ -2067,6 +2134,15 @@ dumpTableData_insert(Archive *fout, void *dcontext) PQclear(res); } + /* Terminate any statements that didn't make the row count. */ + if (rows_this_statement > 0) + { + if (dopt->do_nothing) + archputs(" ON CONFLICT DO NOTHING;\n", fout); + else + archputs(";\n", fout); + } + archputs("\n\n", fout); ExecuteSqlStatement(fout, "CLOSE _pg_dump_cursor"); diff --git a/src/bin/pg_dump/t/001_basic.pl b/src/bin/pg_dump/t/001_basic.pl index 1dad553739..3a58f9b8ce 100644 --- a/src/bin/pg_dump/t/001_basic.pl +++ b/src/bin/pg_dump/t/001_basic.pl @@ -118,8 +118,8 @@ command_fails_like( command_fails_like( [ 'pg_dump', '--on-conflict-do-nothing' ], - qr/\Qpg_dump: option --on-conflict-do-nothing requires option --inserts or --column-inserts\E/, - 'pg_dump: option --on-conflict-do-nothing requires option --inserts or --column-inserts'); + qr/pg_dump: option --on-conflict-do-nothing requires option --inserts, --rows-per-insert or --column-inserts/, + 'pg_dump: --on-conflict-do-nothing requires --inserts, --rows-per-insert, --column-inserts'); # pg_dumpall command-line argument checks command_fails_like( diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index afaa26badb..de6895122e 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -295,6 +295,18 @@ my %pgdump_runs = ( "$tempdir/role_parallel", ], }, + rows_per_insert => { + dump_cmd => [ + 'pg_dump', + '--no-sync', + "--file=$tempdir/rows_per_insert.sql", + '-a', + '--rows-per-insert=4', + '--table=dump_test.test_table', + '--table=dump_test.test_fourth_table', + 'postgres', + ], + }, schema_only => { dump_cmd => [ 'pg_dump', '--format=plain', @@ -1228,10 +1240,11 @@ my %tests = ( 'COPY test_fourth_table' => { create_order => 7, create_sql => - 'INSERT INTO dump_test.test_fourth_table DEFAULT VALUES;', + 'INSERT INTO dump_test.test_fourth_table DEFAULT VALUES;' + . 'INSERT INTO dump_test.test_fourth_table DEFAULT VALUES;', regexp => qr/^ \QCOPY dump_test.test_fourth_table FROM stdin;\E - \n\n\\\.\n + \n\n\n\\\.\n /xm, like => { %full_runs, @@ -1295,6 +1308,19 @@ my %tests = ( like => { column_inserts => 1, }, }, + 'test_table with 4-row INSERTs' => { + regexp => qr/^ + (?: + INSERT\ INTO\ dump_test\.test_table\ VALUES\n + (?:\t\(\d,\ NULL,\ NULL,\ NULL\),\n){3} + \t\(\d,\ NULL,\ NULL,\ NULL\);\n + ){2} + INSERT\ INTO\ dump_test\.test_table\ VALUES\n + \t\(\d,\ NULL,\ NULL,\ NULL\); + /xm, + like => { rows_per_insert => 1, }, + }, + 'INSERT INTO test_second_table' => { regexp => qr/^ (?:INSERT\ INTO\ dump_test\.test_second_table\ \(col1,\ col2\) @@ -1304,8 +1330,8 @@ my %tests = ( 'INSERT INTO test_fourth_table' => { regexp => - qr/^\QINSERT INTO dump_test.test_fourth_table DEFAULT VALUES;\E/m, - like => { column_inserts => 1, }, + qr/^(?:INSERT INTO dump_test\.test_fourth_table DEFAULT VALUES;\n){2}/m, + like => { column_inserts => 1, rows_per_insert => 1, }, }, 'INSERT INTO test_fifth_table' => {