From 43d3fbe369088f089afd55847dde0f34b339b5f2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 1 Jun 2016 16:14:21 -0400 Subject: [PATCH] Clean up some minor inefficiencies in parallel dump/restore. Parallel dump did a totally pointless query to find out the name of each table to be dumped, which it already knows. Parallel restore runs issued lots of redundant SET commands because _doSetFixedOutputState() was invoked once per TOC item rather than just once at connection start. While the extra queries are insignificant if you're dumping or restoring large tables, it still seems worth getting rid of them. Also, give the responsibility for selecting the right client_encoding for a parallel dump worker to setup_connection() where it naturally belongs, instead of having ad-hoc code for that in CloneArchive(). And fix some minor bugs like use of strdup() where pg_strdup() would be safer. Back-patch to 9.3, mostly to keep the branches in sync in an area that we're still finding bugs in. Discussion: <5086.1464793073@sss.pgh.pa.us> --- src/bin/pg_dump/parallel.c | 26 +----------------- src/bin/pg_dump/pg_backup_archiver.c | 18 +++++-------- src/bin/pg_dump/pg_dump.c | 40 ++++++++++++++++++---------- 3 files changed, 34 insertions(+), 50 deletions(-) diff --git a/src/bin/pg_dump/parallel.c b/src/bin/pg_dump/parallel.c index 0908b8a40e..53680857af 100644 --- a/src/bin/pg_dump/parallel.c +++ b/src/bin/pg_dump/parallel.c @@ -801,7 +801,6 @@ IsEveryWorkerIdle(ParallelState *pstate) static void lockTableForWorker(ArchiveHandle *AH, TocEntry *te) { - Archive *AHX = (Archive *) AH; const char *qualId; PQExpBuffer query; PGresult *res; @@ -812,33 +811,10 @@ lockTableForWorker(ArchiveHandle *AH, TocEntry *te) query = createPQExpBuffer(); - /* - * XXX this is an unbelievably expensive substitute for knowing how to dig - * a table name out of a TocEntry. - */ - appendPQExpBuffer(query, - "SELECT pg_namespace.nspname," - " pg_class.relname " - " FROM pg_class " - " JOIN pg_namespace on pg_namespace.oid = relnamespace " - " WHERE pg_class.oid = %u", te->catalogId.oid); - - res = PQexec(AH->connection, query->data); - - if (!res || PQresultStatus(res) != PGRES_TUPLES_OK) - exit_horribly(modulename, - "could not get relation name for OID %u: %s\n", - te->catalogId.oid, PQerrorMessage(AH->connection)); - - resetPQExpBuffer(query); - - qualId = fmtQualifiedId(AHX->remoteVersion, - PQgetvalue(res, 0, 0), - PQgetvalue(res, 0, 1)); + qualId = fmtQualifiedId(AH->public.remoteVersion, te->namespace, te->tag); appendPQExpBuffer(query, "LOCK TABLE %s IN ACCESS SHARE MODE NOWAIT", qualId); - PQclear(res); res = PQexec(AH->connection, query->data); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 487af2f817..282eb8a2c9 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -3831,6 +3831,7 @@ restore_toc_entries_postfork(ArchiveHandle *AH, TocEntry *pending_list) ropt->pghost, ropt->pgport, ropt->username, ropt->promptPassword); + /* re-establish fixed state */ _doSetFixedOutputState(AH); /* @@ -4009,10 +4010,9 @@ parallel_restore(ParallelArgs *args) TocEntry *te = args->te; int status; - _doSetFixedOutputState(AH); - Assert(AH->connection != NULL); + /* Count only errors associated with this TOC entry */ AH->public.n_errors = 0; /* Restore the TOC item */ @@ -4381,10 +4381,14 @@ CloneArchive(ArchiveHandle *AH) RestoreOptions *ropt = AH->public.ropt; Assert(AH->connection == NULL); + /* this also sets clone->connection */ ConnectDatabase((Archive *) clone, ropt->dbname, ropt->pghost, ropt->pgport, ropt->username, ropt->promptPassword); + + /* re-establish fixed state */ + _doSetFixedOutputState(clone); } else { @@ -4392,7 +4396,6 @@ CloneArchive(ArchiveHandle *AH) char *pghost; char *pgport; char *username; - const char *encname; Assert(AH->connection != NULL); @@ -4406,18 +4409,11 @@ CloneArchive(ArchiveHandle *AH) pghost = PQhost(AH->connection); pgport = PQport(AH->connection); username = PQuser(AH->connection); - encname = pg_encoding_to_char(AH->public.encoding); /* this also sets clone->connection */ ConnectDatabase((Archive *) clone, dbname, pghost, pgport, username, TRI_NO); - /* - * Set the same encoding, whatever we set here is what we got from - * pg_encoding_to_char(), so we really shouldn't run into an error - * setting that very same value. Also see the comment in - * SetupConnection(). - */ - PQsetClientEncoding(clone->connection, encname); + /* setupDumpWorker will fix up connection state */ } /* Let the format-specific code have a chance too */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 48e6509976..e49be99ec7 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -935,10 +935,7 @@ setup_connection(Archive *AH, const char *dumpencoding, const char *std_strings; /* - * Set the client encoding if requested. If dumpencoding == NULL then - * either it hasn't been requested or we're a cloned connection and then - * this has already been set in CloneArchive according to the original - * connection encoding. + * Set the client encoding if requested. */ if (dumpencoding) { @@ -956,7 +953,11 @@ setup_connection(Archive *AH, const char *dumpencoding, std_strings = PQparameterStatus(conn, "standard_conforming_strings"); AH->std_strings = (std_strings && strcmp(std_strings, "on") == 0); - /* Set the role if requested */ + /* + * Set the role if requested. In a parallel dump worker, we'll be passed + * use_role == NULL, but AH->use_role is already set (if user specified it + * originally) and we should use that. + */ if (!use_role && AH->use_role) use_role = AH->use_role; @@ -969,9 +970,9 @@ setup_connection(Archive *AH, const char *dumpencoding, ExecuteSqlStatement(AH, query->data); destroyPQExpBuffer(query); - /* save this for later use on parallel connections */ + /* save it for possible later use by parallel workers */ if (!AH->use_role) - AH->use_role = strdup(use_role); + AH->use_role = pg_strdup(use_role); } /* Set the datestyle to ISO to ensure the dump's portability */ @@ -1057,11 +1058,12 @@ setup_connection(Archive *AH, const char *dumpencoding, "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE"); /* - * define an export snapshot, either chosen by user or needed for parallel - * dump. + * If user specified a snapshot to use, select that. In a parallel dump + * worker, we'll be passed dumpsnapshot == NULL, but AH->sync_snapshot_id + * is already set (if the server can handle it) and we should use that. */ if (dumpsnapshot) - AH->sync_snapshot_id = strdup(dumpsnapshot); + AH->sync_snapshot_id = pg_strdup(dumpsnapshot); if (AH->sync_snapshot_id) { @@ -1087,21 +1089,31 @@ setup_connection(Archive *AH, const char *dumpencoding, } } +/* Set up connection for a parallel worker process */ static void -setupDumpWorker(Archive *AHX) +setupDumpWorker(Archive *AH) { - setup_connection(AHX, NULL, NULL, NULL); + /* + * We want to re-select all the same values the master connection is + * using. We'll have inherited directly-usable values in + * AH->sync_snapshot_id and AH->use_role, but we need to translate the + * inherited encoding value back to a string to pass to setup_connection. + */ + setup_connection(AH, + pg_encoding_to_char(AH->encoding), + NULL, + NULL); } static char * get_synchronized_snapshot(Archive *fout) { - char *query = "SELECT pg_export_snapshot()"; + char *query = "SELECT pg_catalog.pg_export_snapshot()"; char *result; PGresult *res; res = ExecuteSqlQueryForSingleRow(fout, query); - result = strdup(PQgetvalue(res, 0, 0)); + result = pg_strdup(PQgetvalue(res, 0, 0)); PQclear(res); return result;