From 23d7680d04b958de327be96ffdde8f024140d50e Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Mon, 14 Aug 2017 22:54:41 -0400 Subject: [PATCH] pg_dump: Add a --load-via-partition-root option. Rushabh Lathia, reviewed and somewhat revised by me. Testing by Rajkumar Raghuwanshi. Discussion: http://postgr.es/m/CAGPqQf0C1he087bz9xRBOGZBuESYz9X=Fp8Ca_g+TfHgAff75g@mail.gmail.com --- doc/src/sgml/ref/pg_dump.sgml | 15 +++++++ doc/src/sgml/ref/pg_dumpall.sgml | 15 +++++++ src/bin/pg_dump/common.c | 51 ++++++++++++++------- src/bin/pg_dump/pg_backup.h | 1 + src/bin/pg_dump/pg_dump.c | 76 +++++++++++++++++++++++++++++++- src/bin/pg_dump/pg_dumpall.c | 5 +++ 6 files changed, 145 insertions(+), 18 deletions(-) diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index bafa031e1a..ad5b6fc703 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -888,6 +888,21 @@ PostgreSQL documentation + + + + + When dumping a COPY or INSERT statement for a partitioned table, + target the root of the partitioning hierarchy which contains it rather + than the partition itself. This may be useful when reloading data on + a server where rows do not always fall into the same partitions as + they did on the original server. This could happen, for example, if + the partitioning column is of type text and the two system have + different definitions of the collation used to partition the data. + + + + diff --git a/doc/src/sgml/ref/pg_dumpall.sgml b/doc/src/sgml/ref/pg_dumpall.sgml index aa944a2e92..f8a2521743 100644 --- a/doc/src/sgml/ref/pg_dumpall.sgml +++ b/doc/src/sgml/ref/pg_dumpall.sgml @@ -430,6 +430,21 @@ PostgreSQL documentation + + + + + When dumping a COPY or INSERT statement for a partitioned table, + target the root of the partitioning hierarchy which contains it rather + than the partition itself. This may be useful when reloading data on + a server where rows do not always fall into the same partitions as + they did on the original server. This could happen, for example, if + the partitioning column is of type text and the two system have + different definitions of the collation used to partition the data. + + + + diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index 47191be86a..4b47951de1 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -66,7 +66,7 @@ static int numExtensions; static ExtensionMemberId *extmembers; static int numextmembers; -static void flagInhTables(TableInfo *tbinfo, int numTables, +static void flagInhTables(Archive *fout, TableInfo *tbinfo, int numTables, InhInfo *inhinfo, int numInherits); static void flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables); static DumpableObject **buildIndexArray(void *objArray, int numObjs, @@ -243,7 +243,7 @@ getSchemaData(Archive *fout, int *numTablesPtr) /* Link tables to parents, mark parents of target tables interesting */ if (g_verbose) write_msg(NULL, "finding inheritance relationships\n"); - flagInhTables(tblinfo, numTables, inhinfo, numInherits); + flagInhTables(fout, tblinfo, numTables, inhinfo, numInherits); if (g_verbose) write_msg(NULL, "reading column info for interesting tables\n"); @@ -294,8 +294,8 @@ getSchemaData(Archive *fout, int *numTablesPtr) } /* flagInhTables - - * Fill in parent link fields of every target table, and mark - * parents of target tables as interesting + * Fill in parent link fields of tables for which we need that information, + * and mark parents of target tables as interesting * * Note that only direct ancestors of targets are marked interesting. * This is sufficient; we don't much care whether they inherited their @@ -304,34 +304,53 @@ getSchemaData(Archive *fout, int *numTablesPtr) * modifies tblinfo */ static void -flagInhTables(TableInfo *tblinfo, int numTables, +flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables, InhInfo *inhinfo, int numInherits) { + DumpOptions *dopt = fout->dopt; int i, j; - int numParents; - TableInfo **parents; for (i = 0; i < numTables; i++) { + bool find_parents = true; + bool mark_parents = true; + /* Some kinds never have parents */ if (tblinfo[i].relkind == RELKIND_SEQUENCE || tblinfo[i].relkind == RELKIND_VIEW || tblinfo[i].relkind == RELKIND_MATVIEW) continue; - /* Don't bother computing anything for non-target tables, either */ + /* + * Normally, we don't bother computing anything for non-target tables, + * but if load-via-partition-root is specified, we gather information + * on every partition in the system so that getRootTableInfo can trace + * from any given to leaf partition all the way up to the root. (We + * don't need to mark them as interesting for getTableAttrs, though.) + */ if (!tblinfo[i].dobj.dump) - continue; + { + mark_parents = false; - /* Find all the immediate parent tables */ - findParentsByOid(&tblinfo[i], inhinfo, numInherits); + if (!dopt->load_via_partition_root || + !tblinfo[i].ispartition) + find_parents = false; + } - /* Mark the parents as interesting for getTableAttrs */ - numParents = tblinfo[i].numParents; - parents = tblinfo[i].parents; - for (j = 0; j < numParents; j++) - parents[j]->interesting = true; + /* If needed, find all the immediate parent tables. */ + if (find_parents) + findParentsByOid(&tblinfo[i], inhinfo, numInherits); + + /* If needed, mark the parents as interesting for getTableAttrs. */ + if (mark_parents) + { + int numParents = tblinfo[i].numParents; + TableInfo **parents = tblinfo[i].parents; + + for (j = 0; j < numParents; j++) + parents[j]->interesting = true; + } } } diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 144068ac49..ce3100f09d 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -157,6 +157,7 @@ typedef struct _dumpOptions int outputNoTablespaces; int use_setsessauth; int enable_row_security; + int load_via_partition_root; /* default, if no "inclusion" switches appear, is to dump everything */ bool include_everything; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 2d8bb32dc0..628bdea1fd 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -269,6 +269,7 @@ static void appendReloptionsArrayAH(PQExpBuffer buffer, const char *reloptions, const char *prefix, Archive *fout); static char *get_synchronized_snapshot(Archive *fout); static void setupDumpWorker(Archive *AHX); +static TableInfo *getRootTableInfo(TableInfo *tbinfo); int @@ -345,6 +346,7 @@ main(int argc, char **argv) {"lock-wait-timeout", required_argument, NULL, 2}, {"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1}, {"quote-all-identifiers", no_argument, "e_all_identifiers, 1}, + {"load-via-partition-root", no_argument, &dopt.load_via_partition_root, 1}, {"role", required_argument, NULL, 3}, {"section", required_argument, NULL, 5}, {"serializable-deferrable", no_argument, &dopt.serializable_deferrable, 1}, @@ -959,6 +961,7 @@ help(const char *progname) printf(_(" --no-tablespaces do not dump tablespace assignments\n")); printf(_(" --no-unlogged-table-data do not dump unlogged table data\n")); printf(_(" --quote-all-identifiers quote all identifiers, even if not key words\n")); + printf(_(" --load-via-partition-root load partitions via the root table\n")); printf(_(" --section=SECTION dump named section (pre-data, data, or post-data)\n")); printf(_(" --serializable-deferrable wait until the dump can run without anomalies\n")); printf(_(" --snapshot=SNAPSHOT use given snapshot for the dump\n")); @@ -1902,8 +1905,32 @@ dumpTableData_insert(Archive *fout, void *dcontext) if (insertStmt == NULL) { insertStmt = createPQExpBuffer(); + + /* + * When load-via-partition-root is set, get the root table + * name for the partition table, so that we can reload data + * through the root table. + */ + if (dopt->load_via_partition_root && tbinfo->ispartition) + { + TableInfo *parentTbinfo; + + parentTbinfo = getRootTableInfo(tbinfo); + + /* + * When we loading data through the root, we will qualify + * the table name. This is needed because earlier + * search_path will be set for the partition table. + */ + classname = (char *) fmtQualifiedId(fout->remoteVersion, + parentTbinfo->dobj.namespace->dobj.name, + parentTbinfo->dobj.name); + } + else + classname = fmtId(tbinfo->dobj.name); + appendPQExpBuffer(insertStmt, "INSERT INTO %s ", - fmtId(classname)); + classname); /* corner case for zero-column table */ if (nfields == 0) @@ -2025,6 +2052,27 @@ dumpTableData_insert(Archive *fout, void *dcontext) return 1; } +/* + * getRootTableInfo: + * get the root TableInfo for the given partition table. + */ +static TableInfo * +getRootTableInfo(TableInfo *tbinfo) +{ + TableInfo *parentTbinfo; + + Assert(tbinfo->ispartition); + Assert(tbinfo->numParents == 1); + + parentTbinfo = tbinfo->parents[0]; + while (parentTbinfo->ispartition) + { + Assert(parentTbinfo->numParents == 1); + parentTbinfo = parentTbinfo->parents[0]; + } + + return parentTbinfo; +} /* * dumpTableData - @@ -2041,14 +2089,38 @@ dumpTableData(Archive *fout, TableDataInfo *tdinfo) PQExpBuffer clistBuf = createPQExpBuffer(); DataDumperPtr dumpFn; char *copyStmt; + const char *copyFrom; if (!dopt->dump_inserts) { /* Dump/restore using COPY */ dumpFn = dumpTableData_copy; + + /* + * When load-via-partition-root is set, get the root table name for + * the partition table, so that we can reload data through the root + * table. + */ + if (dopt->load_via_partition_root && tbinfo->ispartition) + { + TableInfo *parentTbinfo; + + parentTbinfo = getRootTableInfo(tbinfo); + + /* + * When we load data through the root, we will qualify the table + * name, because search_path is set for the partition. + */ + copyFrom = fmtQualifiedId(fout->remoteVersion, + parentTbinfo->dobj.namespace->dobj.name, + parentTbinfo->dobj.name); + } + else + copyFrom = fmtId(tbinfo->dobj.name); + /* must use 2 steps here 'cause fmtId is nonreentrant */ appendPQExpBuffer(copyBuf, "COPY %s ", - fmtId(tbinfo->dobj.name)); + copyFrom); appendPQExpBuffer(copyBuf, "%s %sFROM stdin;\n", fmtCopyColumnList(tbinfo, clistBuf), (tdinfo->oids && tbinfo->hasoids) ? "WITH OIDS " : ""); diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index b14bb8e963..c0a0346cd9 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -80,6 +80,7 @@ static int no_subscriptions = 0; static int no_unlogged_table_data = 0; static int no_role_passwords = 0; static int server_version; +static int load_via_partition_root = 0; static char role_catalog[10]; #define PG_AUTHID "pg_authid" @@ -128,6 +129,7 @@ main(int argc, char *argv[]) {"lock-wait-timeout", required_argument, NULL, 2}, {"no-tablespaces", no_argument, &no_tablespaces, 1}, {"quote-all-identifiers", no_argument, "e_all_identifiers, 1}, + {"load-via-partition-root", no_argument, &load_via_partition_root, 1}, {"role", required_argument, NULL, 3}, {"use-set-session-authorization", no_argument, &use_setsessauth, 1}, {"no-publications", no_argument, &no_publications, 1}, @@ -385,6 +387,8 @@ main(int argc, char *argv[]) appendPQExpBufferStr(pgdumpopts, " --no-tablespaces"); if (quote_all_identifiers) appendPQExpBufferStr(pgdumpopts, " --quote-all-identifiers"); + if (load_via_partition_root) + appendPQExpBufferStr(pgdumpopts, " --load-via-partition-root"); if (use_setsessauth) appendPQExpBufferStr(pgdumpopts, " --use-set-session-authorization"); if (no_publications) @@ -606,6 +610,7 @@ help(void) printf(_(" --no-tablespaces do not dump tablespace assignments\n")); printf(_(" --no-unlogged-table-data do not dump unlogged table data\n")); printf(_(" --quote-all-identifiers quote all identifiers, even if not key words\n")); + printf(_(" --load-via-partition-root load partitions via the root table\n")); printf(_(" --use-set-session-authorization\n" " use SET SESSION AUTHORIZATION commands instead of\n" " ALTER OWNER commands to set ownership\n"));