2000-07-11 15:07:17 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* pg_backup_archiver.h
|
|
|
|
*
|
|
|
|
* Private interface to the pg_dump archiver routines.
|
|
|
|
* It is NOT intended that these routines be called by any
|
|
|
|
* dumper directly.
|
|
|
|
*
|
|
|
|
* See the headers to pg_restore for more details.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2000, Philip Warner
|
|
|
|
* Rights are granted to use this software in any way so long
|
|
|
|
* as this notice is not removed.
|
|
|
|
*
|
|
|
|
* The author is not responsible for loss or damages that may
|
2019-05-23 03:17:41 +02:00
|
|
|
* result from its use.
|
2000-07-11 15:07:17 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/bin/pg_dump/pg_backup_archiver.h
|
2001-04-01 07:42:51 +02:00
|
|
|
*
|
2000-07-11 15:07:17 +02:00
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef __PG_BACKUP_ARCHIVE__
|
|
|
|
#define __PG_BACKUP_ARCHIVE__
|
|
|
|
|
2000-07-21 13:40:08 +02:00
|
|
|
#include <time.h>
|
|
|
|
|
2003-12-08 17:39:05 +01:00
|
|
|
#include "libpq-fe.h"
|
2019-11-25 03:38:57 +01:00
|
|
|
#include "pg_backup.h"
|
2000-07-21 13:40:08 +02:00
|
|
|
#include "pqexpbuffer.h"
|
2003-12-08 17:39:05 +01:00
|
|
|
|
2005-06-21 22:45:44 +02:00
|
|
|
#define LOBBUFSIZE 16384
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2000-07-21 13:40:08 +02:00
|
|
|
/* Data block types */
|
|
|
|
#define BLK_DATA 1
|
|
|
|
#define BLK_BLOBS 3
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2016-10-25 18:00:00 +02:00
|
|
|
/* Encode version components into a convenient integer <maj><min><rev> */
|
|
|
|
#define MAKE_ARCHIVE_VERSION(major, minor, rev) (((major) * 256 + (minor)) * 256 + (rev))
|
|
|
|
|
|
|
|
#define ARCHIVE_MAJOR(version) (((version) >> 16) & 255)
|
|
|
|
#define ARCHIVE_MINOR(version) (((version) >> 8) & 255)
|
2016-11-17 20:36:59 +01:00
|
|
|
#define ARCHIVE_REV(version) (((version) ) & 255)
|
2016-10-25 18:00:00 +02:00
|
|
|
|
2010-02-18 02:29:10 +01:00
|
|
|
/* Historical version numbers (checked in code) */
|
2016-10-25 18:00:00 +02:00
|
|
|
#define K_VERS_1_0 MAKE_ARCHIVE_VERSION(1, 0, 0)
|
2016-11-17 20:36:59 +01:00
|
|
|
#define K_VERS_1_2 MAKE_ARCHIVE_VERSION(1, 2, 0) /* Allow No ZLIB */
|
2022-12-05 08:52:11 +01:00
|
|
|
#define K_VERS_1_3 MAKE_ARCHIVE_VERSION(1, 3, 0) /* BLOBS */
|
2016-11-17 20:36:59 +01:00
|
|
|
#define K_VERS_1_4 MAKE_ARCHIVE_VERSION(1, 4, 0) /* Date & name in header */
|
|
|
|
#define K_VERS_1_5 MAKE_ARCHIVE_VERSION(1, 5, 0) /* Handle dependencies */
|
|
|
|
#define K_VERS_1_6 MAKE_ARCHIVE_VERSION(1, 6, 0) /* Schema field in TOCs */
|
|
|
|
#define K_VERS_1_7 MAKE_ARCHIVE_VERSION(1, 7, 0) /* File Offset size in
|
|
|
|
* header */
|
|
|
|
#define K_VERS_1_8 MAKE_ARCHIVE_VERSION(1, 8, 0) /* change interpretation
|
|
|
|
* of ID numbers and
|
|
|
|
* dependencies */
|
|
|
|
#define K_VERS_1_9 MAKE_ARCHIVE_VERSION(1, 9, 0) /* add default_with_oids
|
|
|
|
* tracking */
|
|
|
|
#define K_VERS_1_10 MAKE_ARCHIVE_VERSION(1, 10, 0) /* add tablespace */
|
|
|
|
#define K_VERS_1_11 MAKE_ARCHIVE_VERSION(1, 11, 0) /* add toc section
|
|
|
|
* indicator */
|
|
|
|
#define K_VERS_1_12 MAKE_ARCHIVE_VERSION(1, 12, 0) /* add separate BLOB
|
|
|
|
* entries */
|
Avoid using unsafe search_path settings during dump and restore.
Historically, pg_dump has "set search_path = foo, pg_catalog" when
dumping an object in schema "foo", and has also caused that setting
to be used while restoring the object. This is problematic because
functions and operators in schema "foo" could capture references meant
to refer to pg_catalog entries, both in the queries issued by pg_dump
and those issued during the subsequent restore run. That could
result in dump/restore misbehavior, or in privilege escalation if a
nefarious user installs trojan-horse functions or operators.
This patch changes pg_dump so that it does not change the search_path
dynamically. The emitted restore script sets the search_path to what
was used at dump time, and then leaves it alone thereafter. Created
objects are placed in the correct schema, regardless of the active
search_path, by dint of schema-qualifying their names in the CREATE
commands, as well as in subsequent ALTER and ALTER-like commands.
Since this change requires a change in the behavior of pg_restore
when processing an archive file made according to this new convention,
bump the archive file version number; old versions of pg_restore will
therefore refuse to process files made with new versions of pg_dump.
Security: CVE-2018-1058
2018-02-26 16:18:21 +01:00
|
|
|
#define K_VERS_1_13 MAKE_ARCHIVE_VERSION(1, 13, 0) /* change search_path
|
|
|
|
* behavior */
|
2019-03-06 18:54:38 +01:00
|
|
|
#define K_VERS_1_14 MAKE_ARCHIVE_VERSION(1, 14, 0) /* add tableam */
|
2023-02-23 18:33:30 +01:00
|
|
|
#define K_VERS_1_15 MAKE_ARCHIVE_VERSION(1, 15, 0) /* add
|
|
|
|
* compression_algorithm
|
|
|
|
* in header */
|
2016-10-25 18:00:00 +02:00
|
|
|
|
2019-04-26 18:03:59 +02:00
|
|
|
/* Current archive version number (the format we can output) */
|
2016-10-25 18:00:00 +02:00
|
|
|
#define K_VERS_MAJOR 1
|
2023-02-23 18:33:30 +01:00
|
|
|
#define K_VERS_MINOR 15
|
2016-10-25 18:00:00 +02:00
|
|
|
#define K_VERS_REV 0
|
Get rid of trailing semicolons in C macro definitions.
Writing a trailing semicolon in a macro is almost never the right thing,
because you almost always want to write a semicolon after each macro
call instead. (Even if there was some reason to prefer not to, pgindent
would probably make a hash of code formatted that way; so within PG the
rule should basically be "don't do it".) Thus, if we have a semi inside
the macro, the compiler sees "something;;". Much of the time the extra
empty statement is harmless, but it could lead to mysterious syntax
errors at call sites. In perhaps an overabundance of neatnik-ism, let's
run around and get rid of the excess semicolons whereever possible.
The only thing worse than a mysterious syntax error is a mysterious
syntax error that only happens in the back branches; therefore,
backpatch these changes where relevant, which is most of them because
most of these mistakes are old. (The lack of reported problems shows
that this is largely a hypothetical issue, but still, it could bite
us in some future patch.)
John Naylor and Tom Lane
Discussion: https://postgr.es/m/CACPNZCs0qWTqJ2QUSGJ07B7uvAvzMb-KbG2q+oo+J3tsWN5cqw@mail.gmail.com
2020-05-01 23:28:00 +02:00
|
|
|
#define K_VERS_SELF MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, K_VERS_REV)
|
2003-12-06 04:00:16 +01:00
|
|
|
|
2010-02-18 02:29:10 +01:00
|
|
|
/* Newest format we can read */
|
2016-10-25 18:00:00 +02:00
|
|
|
#define K_VERS_MAX MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, 255)
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2000-10-31 15:20:30 +01:00
|
|
|
|
2002-10-22 21:15:23 +02:00
|
|
|
/* Flags to indicate disposition of offsets stored in files */
|
|
|
|
#define K_OFFSET_POS_NOT_SET 1
|
|
|
|
#define K_OFFSET_POS_SET 2
|
|
|
|
#define K_OFFSET_NO_DATA 3
|
|
|
|
|
2013-03-24 16:27:20 +01:00
|
|
|
/*
|
|
|
|
* Special exit values from worker children. We reserve 0 for normal
|
|
|
|
* success; 1 and other small values should be interpreted as crashes.
|
|
|
|
*/
|
|
|
|
#define WORKER_OK 0
|
|
|
|
#define WORKER_CREATE_DONE 10
|
|
|
|
#define WORKER_INHIBIT_DATA 11
|
|
|
|
#define WORKER_IGNORED_ERRORS 12
|
|
|
|
|
2014-10-14 20:00:55 +02:00
|
|
|
typedef struct _archiveHandle ArchiveHandle;
|
|
|
|
typedef struct _tocEntry TocEntry;
|
2013-03-24 16:27:20 +01:00
|
|
|
struct ParallelState;
|
2013-03-24 20:35:37 +01:00
|
|
|
|
2014-05-06 02:27:16 +02:00
|
|
|
#define READ_ERROR_EXIT(fd) \
|
|
|
|
do { \
|
|
|
|
if (feof(fd)) \
|
2022-04-08 20:55:14 +02:00
|
|
|
pg_fatal("could not read from input file: end of file"); \
|
2014-05-06 02:27:16 +02:00
|
|
|
else \
|
2022-04-08 20:55:14 +02:00
|
|
|
pg_fatal("could not read from input file: %m"); \
|
2014-05-06 02:27:16 +02:00
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define WRITE_ERROR_EXIT \
|
|
|
|
do { \
|
2022-04-08 20:55:14 +02:00
|
|
|
pg_fatal("could not write to output file: %m"); \
|
2014-05-06 02:27:16 +02:00
|
|
|
} while (0)
|
2014-05-06 18:12:18 +02:00
|
|
|
|
2013-03-24 20:35:37 +01:00
|
|
|
typedef enum T_Action
|
|
|
|
{
|
|
|
|
ACT_DUMP,
|
|
|
|
ACT_RESTORE,
|
|
|
|
} T_Action;
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef void (*ClosePtrType) (ArchiveHandle *AH);
|
|
|
|
typedef void (*ReopenPtrType) (ArchiveHandle *AH);
|
|
|
|
typedef void (*ArchiveEntryPtrType) (ArchiveHandle *AH, TocEntry *te);
|
2014-10-14 20:00:55 +02:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef void (*StartDataPtrType) (ArchiveHandle *AH, TocEntry *te);
|
|
|
|
typedef void (*WriteDataPtrType) (ArchiveHandle *AH, const void *data, size_t dLen);
|
|
|
|
typedef void (*EndDataPtrType) (ArchiveHandle *AH, TocEntry *te);
|
2014-10-14 20:00:55 +02:00
|
|
|
|
2022-12-05 08:52:11 +01:00
|
|
|
typedef void (*StartLOsPtrType) (ArchiveHandle *AH, TocEntry *te);
|
|
|
|
typedef void (*StartLOPtrType) (ArchiveHandle *AH, TocEntry *te, Oid oid);
|
|
|
|
typedef void (*EndLOPtrType) (ArchiveHandle *AH, TocEntry *te, Oid oid);
|
|
|
|
typedef void (*EndLOsPtrType) (ArchiveHandle *AH, TocEntry *te);
|
2014-10-14 20:00:55 +02:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef int (*WriteBytePtrType) (ArchiveHandle *AH, const int i);
|
|
|
|
typedef int (*ReadBytePtrType) (ArchiveHandle *AH);
|
|
|
|
typedef void (*WriteBufPtrType) (ArchiveHandle *AH, const void *c, size_t len);
|
|
|
|
typedef void (*ReadBufPtrType) (ArchiveHandle *AH, void *buf, size_t len);
|
|
|
|
typedef void (*WriteExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te);
|
|
|
|
typedef void (*ReadExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te);
|
|
|
|
typedef void (*PrintExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te);
|
|
|
|
typedef void (*PrintTocDataPtrType) (ArchiveHandle *AH, TocEntry *te);
|
2014-10-14 20:00:55 +02:00
|
|
|
|
Improve parallel scheduling logic in pg_dump/pg_restore.
Previously, the way this worked was that a parallel pg_dump would
re-order the TABLE_DATA items in the dump's TOC into decreasing size
order, and separately re-order (some of) the INDEX items into decreasing
size order. Then pg_dump would dump the items in that order. Later,
parallel pg_restore just followed the TOC order. This method had lots
of deficiencies:
* TOC ordering randomly differed between parallel and non-parallel
dumps, and was hard to predict in the former case, causing problems
for building stable pg_dump test cases.
* Parallel restore only followed a well-chosen order if the dump had
been done in parallel; in particular, this never happened for restore
from custom-format dumps.
* The best order for restore isn't necessarily the same as for dump,
and it's not really static either because of locking considerations.
* TABLE_DATA and INDEX items aren't the only things that might take a lot
of work during restore. Scheduling was particularly stupid for the BLOBS
item, which might require lots of work during dump as well as restore,
but was left to the end in either case.
This patch removes the logic that changed the TOC order, fixing the
test instability problem. Instead, we sort the parallelizable items
just before processing them during a parallel dump. Independently
of that, parallel restore prioritizes the ready-to-execute tasks
based on the size of the underlying table. In the case of dependent
tasks such as index, constraint, or foreign key creation, the largest
relevant table is used as the metric for estimating the task length.
(This is pretty crude, but it should be enough to avoid the case we
want to avoid, which is ending the run with just a few large tasks
such that we can't make use of all N workers.)
Patch by me, responding to a complaint from Peter Eisentraut,
who also reviewed the patch.
Discussion: https://postgr.es/m/5137fe12-d0a2-4971-61b6-eb4e7e8875f8@2ndquadrant.com
2018-09-14 23:31:51 +02:00
|
|
|
typedef void (*PrepParallelRestorePtrType) (ArchiveHandle *AH);
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef void (*ClonePtrType) (ArchiveHandle *AH);
|
|
|
|
typedef void (*DeClonePtrType) (ArchiveHandle *AH);
|
2014-10-14 20:00:55 +02:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef int (*WorkerJobDumpPtrType) (ArchiveHandle *AH, TocEntry *te);
|
|
|
|
typedef int (*WorkerJobRestorePtrType) (ArchiveHandle *AH, TocEntry *te);
|
2013-03-24 16:27:20 +01:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef size_t (*CustomOutPtrType) (ArchiveHandle *AH, const void *buf, size_t len);
|
2001-03-22 05:01:46 +01:00
|
|
|
|
Fix pg_restore's direct-to-database mode for INSERT-style table data.
In commit 6545a901aaf84cb05212bb6a7674059908f527c3, I removed the mini SQL
lexer that was in pg_backup_db.c, thinking that it had no real purpose
beyond separating COPY data from SQL commands, which purpose had been
obsoleted by long-ago fixes in pg_dump's archive file format.
Unfortunately this was in error: that code was also used to identify
command boundaries in INSERT-style table data, which is run together as a
single string in the archive file for better compressibility. As a result,
direct-to-database restores from archive files made with --inserts or
--column-inserts fail in our latest releases, as reported by Dick Visser.
To fix, restore the mini SQL lexer, but simplify it by adjusting the
calling logic so that it's only required to cope with INSERT-style table
data, not arbitrary SQL commands. This allows us to not have to deal with
SQL comments, E'' strings, or dollar-quoted strings, none of which have
ever been emitted by dumpTableData_insert.
Also, fix the lexer to cope with standard-conforming strings, which was the
actual bug that the previous patch was meant to solve.
Back-patch to all supported branches. The previous patch went back to 8.2,
which unfortunately means that the EOL release of 8.2 contains this bug,
but I don't think we're doing another 8.2 release just because of that.
2012-01-06 19:04:09 +01:00
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
SQL_SCAN = 0, /* normal */
|
|
|
|
SQL_IN_SINGLE_QUOTE, /* '...' literal */
|
|
|
|
SQL_IN_DOUBLE_QUOTE, /* "..." identifier */
|
|
|
|
} sqlparseState;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
sqlparseState state; /* see above */
|
|
|
|
bool backSlash; /* next char is backslash quoted? */
|
|
|
|
PQExpBuffer curCmd; /* incomplete line (NULL if not created) */
|
|
|
|
} sqlparseInfo;
|
|
|
|
|
They are two different problems; the TOC entry is important for any
multiline command or to rerun the command easily later.
Whereas displaying the failed SQL command is a matter of fixing the
error
messages.
The latter is complicated by failed COPY commands which, with
die-on-errors
off, results in the data being processed as a command, so dumping the
command will dump all of the data.
In the case of long commands, should the whole command be dumped? eg.
(eg.
several pages of function definition).
In the case of the COPY command, I'm not sure what to do. Obviously, it
would be best to avoid sending the data, but the data and command are
combined (from memory). Also, the 'data' may be in the form of INSERT
statements.
Attached patch produces the first 125 chars of the command:
pg_restore: [archiver (db)] Error while PROCESSING TOC:
pg_restore: [archiver (db)] Error from TOC Entry 26; 1255 16449270
FUNCTION
plpgsql_call_handler() pjw
pg_restore: [archiver (db)] could not execute query: ERROR: function
"plpgsql_call_handler" already exists with same argument types
Command was: CREATE FUNCTION plpgsql_call_handler() RETURNS
language_handler
AS '/var/lib/pgsql-8.0b1/lib/plpgsql', 'plpgsql_call_han...
pg_restore: [archiver (db)] Error from TOC Entry 27; 1255 16449271
FUNCTION
plpgsql_validator(oid) pjw
pg_restore: [archiver (db)] could not execute query: ERROR: function
"plpgsql_validator" already exists with same argument types
Command was: CREATE FUNCTION plpgsql_validator(oid) RETURNS void
AS '/var/lib/pgsql-8.0b1/lib/plpgsql', 'plpgsql_validator'
LANGU...
Philip Warner
2004-08-20 22:00:34 +02:00
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
STAGE_NONE = 0,
|
|
|
|
STAGE_INITIALIZING,
|
|
|
|
STAGE_PROCESSING,
|
|
|
|
STAGE_FINALIZING,
|
|
|
|
} ArchiverStage;
|
|
|
|
|
Fix pg_restore's direct-to-database mode for INSERT-style table data.
In commit 6545a901aaf84cb05212bb6a7674059908f527c3, I removed the mini SQL
lexer that was in pg_backup_db.c, thinking that it had no real purpose
beyond separating COPY data from SQL commands, which purpose had been
obsoleted by long-ago fixes in pg_dump's archive file format.
Unfortunately this was in error: that code was also used to identify
command boundaries in INSERT-style table data, which is run together as a
single string in the archive file for better compressibility. As a result,
direct-to-database restores from archive files made with --inserts or
--column-inserts fail in our latest releases, as reported by Dick Visser.
To fix, restore the mini SQL lexer, but simplify it by adjusting the
calling logic so that it's only required to cope with INSERT-style table
data, not arbitrary SQL commands. This allows us to not have to deal with
SQL comments, E'' strings, or dollar-quoted strings, none of which have
ever been emitted by dumpTableData_insert.
Also, fix the lexer to cope with standard-conforming strings, which was the
actual bug that the previous patch was meant to solve.
Back-patch to all supported branches. The previous patch went back to 8.2,
which unfortunately means that the EOL release of 8.2 contains this bug,
but I don't think we're doing another 8.2 release just because of that.
2012-01-06 19:04:09 +01:00
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
OUTPUT_SQLCMDS = 0, /* emitting general SQL commands */
|
|
|
|
OUTPUT_COPYDATA, /* writing COPY data */
|
|
|
|
OUTPUT_OTHERDATA, /* writing data as INSERT commands */
|
|
|
|
} ArchiverOutput;
|
|
|
|
|
Fix pg_dump/pg_restore to emit REFRESH MATERIALIZED VIEW commands last.
Because we push all ACL (i.e. GRANT/REVOKE) restore steps to the end,
materialized view refreshes were occurring while the permissions on
referenced objects were still at defaults. This led to failures if,
say, an MV owned by user A reads from a table owned by user B, even
if B had granted the necessary privileges to A. We've had multiple
complaints about that type of restore failure, most recently from
Jordan Gigov.
The ideal fix for this would be to start treating ACLs as dependency-
sortable objects, rather than hard-wiring anything about their dump order
(the existing approach is a messy kluge dating to commit dc0e76ca3).
But that's going to be a rather major change, and it certainly wouldn't
lead to a back-patchable fix. As a short-term solution, convert the
existing two-pass hack (ie, normal objects then ACLs) to a three-pass hack,
ie, normal objects then ACLs then matview refreshes. Because this happens
in RestoreArchive(), it will also fix the problem when restoring from an
existing archive-format dump.
(Note this means that if a matview refresh would have failed under the
permissions prevailing at dump time, it'll fail during restore as well.
We'll define that as user error rather than something we should try
to work around.)
To avoid performance loss in parallel restore, we need the matview
refreshes to still be parallelizable. Hence, clean things up enough
so that both ACLs and matviews are handled by the parallel restore
infrastructure, instead of reverting back to serial restore for ACLs.
There is still a final serial step, but it shouldn't normally have to
do anything; it's only there to try to recover if we get stuck due to
some problem like unresolved circular dependencies.
Patch by me, but it owes something to an earlier attempt by Kevin Grittner.
Back-patch to 9.3 where materialized views were introduced.
Discussion: https://postgr.es/m/28572.1500912583@sss.pgh.pa.us
2017-08-03 23:36:23 +02:00
|
|
|
/*
|
|
|
|
* For historical reasons, ACL items are interspersed with everything else in
|
|
|
|
* a dump file's TOC; typically they're right after the object they're for.
|
|
|
|
* However, we need to restore data before ACLs, as otherwise a read-only
|
|
|
|
* table (ie one where the owner has revoked her own INSERT privilege) causes
|
|
|
|
* data restore failures. On the other hand, matview REFRESH commands should
|
|
|
|
* come out after ACLs, as otherwise non-superuser-owned matviews might not
|
|
|
|
* be able to execute. (If the permissions at the time of dumping would not
|
Fix pg_dump/pg_restore to restore event triggers later.
Previously, event triggers were restored just after regular triggers
(and FK constraints, which are basically triggers). This is risky
since an event trigger, once installed, could interfere with subsequent
restore commands. Worse, because event triggers don't have any
particular dependencies on any post-data objects, a parallel restore
would consider them eligible to be restored the moment the post-data
phase starts, allowing them to also interfere with restoration of a
whole bunch of objects that would have been restored before them in
a serial restore. There's no way to completely remove the risk of a
misguided event trigger breaking the restore, since if nothing else
it could break other event triggers. But we can certainly push them
to later in the process to minimize the hazard.
To fix, tweak the RestorePass mechanism introduced by commit 3eb9a5e7c
so that event triggers are handled as part of the post-ACL processing
pass (renaming the "REFRESH" pass to "POST_ACL" to reflect its more
general use). This will cause them to restore after everything except
matview refreshes, which seems OK since matview refreshes really ought
to run in the post-restore state of the database. In a parallel
restore, event triggers and matview refreshes might be intermixed,
but that seems all right as well.
Also update the code and comments in pg_dump_sort.c so that its idea
of how things are sorted agrees with what actually happens due to
the RestorePass mechanism. This is mostly cosmetic: it'll affect the
order of objects in a dump's TOC, but not the actual restore order.
But not changing that would be quite confusing to somebody reading
the code.
Back-patch to all supported branches.
Fabrízio de Royes Mello, tweaked a bit by me
Discussion: https://postgr.es/m/CAFcNs+ow1hmFox8P--3GSdtwz-S3Binb6ZmoP6Vk+Xg=K6eZNA@mail.gmail.com
2020-03-09 19:58:11 +01:00
|
|
|
* allow a REFRESH, too bad; we won't fix that for you.) We also want event
|
|
|
|
* triggers to be restored after ACLs, so that they can't mess those up.
|
|
|
|
*
|
|
|
|
* These considerations force us to make three passes over the TOC,
|
|
|
|
* restoring the appropriate subset of items in each pass. We assume that
|
|
|
|
* the dependency sort resulted in an appropriate ordering of items within
|
|
|
|
* each subset.
|
|
|
|
*
|
Fix pg_dump/pg_restore to emit REFRESH MATERIALIZED VIEW commands last.
Because we push all ACL (i.e. GRANT/REVOKE) restore steps to the end,
materialized view refreshes were occurring while the permissions on
referenced objects were still at defaults. This led to failures if,
say, an MV owned by user A reads from a table owned by user B, even
if B had granted the necessary privileges to A. We've had multiple
complaints about that type of restore failure, most recently from
Jordan Gigov.
The ideal fix for this would be to start treating ACLs as dependency-
sortable objects, rather than hard-wiring anything about their dump order
(the existing approach is a messy kluge dating to commit dc0e76ca3).
But that's going to be a rather major change, and it certainly wouldn't
lead to a back-patchable fix. As a short-term solution, convert the
existing two-pass hack (ie, normal objects then ACLs) to a three-pass hack,
ie, normal objects then ACLs then matview refreshes. Because this happens
in RestoreArchive(), it will also fix the problem when restoring from an
existing archive-format dump.
(Note this means that if a matview refresh would have failed under the
permissions prevailing at dump time, it'll fail during restore as well.
We'll define that as user error rather than something we should try
to work around.)
To avoid performance loss in parallel restore, we need the matview
refreshes to still be parallelizable. Hence, clean things up enough
so that both ACLs and matviews are handled by the parallel restore
infrastructure, instead of reverting back to serial restore for ACLs.
There is still a final serial step, but it shouldn't normally have to
do anything; it's only there to try to recover if we get stuck due to
some problem like unresolved circular dependencies.
Patch by me, but it owes something to an earlier attempt by Kevin Grittner.
Back-patch to 9.3 where materialized views were introduced.
Discussion: https://postgr.es/m/28572.1500912583@sss.pgh.pa.us
2017-08-03 23:36:23 +02:00
|
|
|
* XXX This mechanism should be superseded by tracking dependencies on ACLs
|
|
|
|
* properly; but we'll still need it for old dump files even after that.
|
|
|
|
*/
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
RESTORE_PASS_MAIN = 0, /* Main pass (most TOC item types) */
|
|
|
|
RESTORE_PASS_ACL, /* ACL item types */
|
Fix pg_dump/pg_restore to restore event triggers later.
Previously, event triggers were restored just after regular triggers
(and FK constraints, which are basically triggers). This is risky
since an event trigger, once installed, could interfere with subsequent
restore commands. Worse, because event triggers don't have any
particular dependencies on any post-data objects, a parallel restore
would consider them eligible to be restored the moment the post-data
phase starts, allowing them to also interfere with restoration of a
whole bunch of objects that would have been restored before them in
a serial restore. There's no way to completely remove the risk of a
misguided event trigger breaking the restore, since if nothing else
it could break other event triggers. But we can certainly push them
to later in the process to minimize the hazard.
To fix, tweak the RestorePass mechanism introduced by commit 3eb9a5e7c
so that event triggers are handled as part of the post-ACL processing
pass (renaming the "REFRESH" pass to "POST_ACL" to reflect its more
general use). This will cause them to restore after everything except
matview refreshes, which seems OK since matview refreshes really ought
to run in the post-restore state of the database. In a parallel
restore, event triggers and matview refreshes might be intermixed,
but that seems all right as well.
Also update the code and comments in pg_dump_sort.c so that its idea
of how things are sorted agrees with what actually happens due to
the RestorePass mechanism. This is mostly cosmetic: it'll affect the
order of objects in a dump's TOC, but not the actual restore order.
But not changing that would be quite confusing to somebody reading
the code.
Back-patch to all supported branches.
Fabrízio de Royes Mello, tweaked a bit by me
Discussion: https://postgr.es/m/CAFcNs+ow1hmFox8P--3GSdtwz-S3Binb6ZmoP6Vk+Xg=K6eZNA@mail.gmail.com
2020-03-09 19:58:11 +01:00
|
|
|
RESTORE_PASS_POST_ACL, /* Event trigger and matview refresh items */
|
Fix pg_dump/pg_restore to emit REFRESH MATERIALIZED VIEW commands last.
Because we push all ACL (i.e. GRANT/REVOKE) restore steps to the end,
materialized view refreshes were occurring while the permissions on
referenced objects were still at defaults. This led to failures if,
say, an MV owned by user A reads from a table owned by user B, even
if B had granted the necessary privileges to A. We've had multiple
complaints about that type of restore failure, most recently from
Jordan Gigov.
The ideal fix for this would be to start treating ACLs as dependency-
sortable objects, rather than hard-wiring anything about their dump order
(the existing approach is a messy kluge dating to commit dc0e76ca3).
But that's going to be a rather major change, and it certainly wouldn't
lead to a back-patchable fix. As a short-term solution, convert the
existing two-pass hack (ie, normal objects then ACLs) to a three-pass hack,
ie, normal objects then ACLs then matview refreshes. Because this happens
in RestoreArchive(), it will also fix the problem when restoring from an
existing archive-format dump.
(Note this means that if a matview refresh would have failed under the
permissions prevailing at dump time, it'll fail during restore as well.
We'll define that as user error rather than something we should try
to work around.)
To avoid performance loss in parallel restore, we need the matview
refreshes to still be parallelizable. Hence, clean things up enough
so that both ACLs and matviews are handled by the parallel restore
infrastructure, instead of reverting back to serial restore for ACLs.
There is still a final serial step, but it shouldn't normally have to
do anything; it's only there to try to recover if we get stuck due to
some problem like unresolved circular dependencies.
Patch by me, but it owes something to an earlier attempt by Kevin Grittner.
Back-patch to 9.3 where materialized views were introduced.
Discussion: https://postgr.es/m/28572.1500912583@sss.pgh.pa.us
2017-08-03 23:36:23 +02:00
|
|
|
|
Fix pg_dump/pg_restore to restore event triggers later.
Previously, event triggers were restored just after regular triggers
(and FK constraints, which are basically triggers). This is risky
since an event trigger, once installed, could interfere with subsequent
restore commands. Worse, because event triggers don't have any
particular dependencies on any post-data objects, a parallel restore
would consider them eligible to be restored the moment the post-data
phase starts, allowing them to also interfere with restoration of a
whole bunch of objects that would have been restored before them in
a serial restore. There's no way to completely remove the risk of a
misguided event trigger breaking the restore, since if nothing else
it could break other event triggers. But we can certainly push them
to later in the process to minimize the hazard.
To fix, tweak the RestorePass mechanism introduced by commit 3eb9a5e7c
so that event triggers are handled as part of the post-ACL processing
pass (renaming the "REFRESH" pass to "POST_ACL" to reflect its more
general use). This will cause them to restore after everything except
matview refreshes, which seems OK since matview refreshes really ought
to run in the post-restore state of the database. In a parallel
restore, event triggers and matview refreshes might be intermixed,
but that seems all right as well.
Also update the code and comments in pg_dump_sort.c so that its idea
of how things are sorted agrees with what actually happens due to
the RestorePass mechanism. This is mostly cosmetic: it'll affect the
order of objects in a dump's TOC, but not the actual restore order.
But not changing that would be quite confusing to somebody reading
the code.
Back-patch to all supported branches.
Fabrízio de Royes Mello, tweaked a bit by me
Discussion: https://postgr.es/m/CAFcNs+ow1hmFox8P--3GSdtwz-S3Binb6ZmoP6Vk+Xg=K6eZNA@mail.gmail.com
2020-03-09 19:58:11 +01:00
|
|
|
#define RESTORE_PASS_LAST RESTORE_PASS_POST_ACL
|
Fix pg_dump/pg_restore to emit REFRESH MATERIALIZED VIEW commands last.
Because we push all ACL (i.e. GRANT/REVOKE) restore steps to the end,
materialized view refreshes were occurring while the permissions on
referenced objects were still at defaults. This led to failures if,
say, an MV owned by user A reads from a table owned by user B, even
if B had granted the necessary privileges to A. We've had multiple
complaints about that type of restore failure, most recently from
Jordan Gigov.
The ideal fix for this would be to start treating ACLs as dependency-
sortable objects, rather than hard-wiring anything about their dump order
(the existing approach is a messy kluge dating to commit dc0e76ca3).
But that's going to be a rather major change, and it certainly wouldn't
lead to a back-patchable fix. As a short-term solution, convert the
existing two-pass hack (ie, normal objects then ACLs) to a three-pass hack,
ie, normal objects then ACLs then matview refreshes. Because this happens
in RestoreArchive(), it will also fix the problem when restoring from an
existing archive-format dump.
(Note this means that if a matview refresh would have failed under the
permissions prevailing at dump time, it'll fail during restore as well.
We'll define that as user error rather than something we should try
to work around.)
To avoid performance loss in parallel restore, we need the matview
refreshes to still be parallelizable. Hence, clean things up enough
so that both ACLs and matviews are handled by the parallel restore
infrastructure, instead of reverting back to serial restore for ACLs.
There is still a final serial step, but it shouldn't normally have to
do anything; it's only there to try to recover if we get stuck due to
some problem like unresolved circular dependencies.
Patch by me, but it owes something to an earlier attempt by Kevin Grittner.
Back-patch to 9.3 where materialized views were introduced.
Discussion: https://postgr.es/m/28572.1500912583@sss.pgh.pa.us
2017-08-03 23:36:23 +02:00
|
|
|
} RestorePass;
|
|
|
|
|
2020-12-11 19:15:30 +01:00
|
|
|
#define REQ_SCHEMA 0x01 /* want schema */
|
|
|
|
#define REQ_DATA 0x02 /* want data */
|
|
|
|
#define REQ_SPECIAL 0x04 /* for special TOC entries */
|
2005-01-25 23:44:31 +01:00
|
|
|
|
2014-10-14 20:00:55 +02:00
|
|
|
struct _archiveHandle
|
2000-07-11 15:07:17 +02:00
|
|
|
{
|
2000-07-21 13:40:08 +02:00
|
|
|
Archive public; /* Public part of archive */
|
2016-10-25 18:00:00 +02:00
|
|
|
int version; /* Version of file */
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2004-11-06 20:36:02 +01:00
|
|
|
char *archiveRemoteVersion; /* When reading an archive, the
|
|
|
|
* version of the dumped DB */
|
|
|
|
char *archiveDumpVersion; /* When reading an archive, the version of
|
|
|
|
* the dumper */
|
|
|
|
|
2002-08-20 19:54:45 +02:00
|
|
|
size_t intSize; /* Size of an integer in the archive */
|
2002-10-22 21:15:23 +02:00
|
|
|
size_t offSize; /* Size of a file offset in the archive -
|
|
|
|
* Added V1.7 */
|
2000-07-11 15:07:17 +02:00
|
|
|
ArchiveFormat format; /* Archive format */
|
|
|
|
|
Fix pg_restore's direct-to-database mode for INSERT-style table data.
In commit 6545a901aaf84cb05212bb6a7674059908f527c3, I removed the mini SQL
lexer that was in pg_backup_db.c, thinking that it had no real purpose
beyond separating COPY data from SQL commands, which purpose had been
obsoleted by long-ago fixes in pg_dump's archive file format.
Unfortunately this was in error: that code was also used to identify
command boundaries in INSERT-style table data, which is run together as a
single string in the archive file for better compressibility. As a result,
direct-to-database restores from archive files made with --inserts or
--column-inserts fail in our latest releases, as reported by Dick Visser.
To fix, restore the mini SQL lexer, but simplify it by adjusting the
calling logic so that it's only required to cope with INSERT-style table
data, not arbitrary SQL commands. This allows us to not have to deal with
SQL comments, E'' strings, or dollar-quoted strings, none of which have
ever been emitted by dumpTableData_insert.
Also, fix the lexer to cope with standard-conforming strings, which was the
actual bug that the previous patch was meant to solve.
Back-patch to all supported branches. The previous patch went back to 8.2,
which unfortunately means that the EOL release of 8.2 contains this bug,
but I don't think we're doing another 8.2 release just because of that.
2012-01-06 19:04:09 +01:00
|
|
|
sqlparseInfo sqlparse; /* state for parsing INSERT data */
|
|
|
|
|
2000-07-21 13:40:08 +02:00
|
|
|
time_t createDate; /* Date archive created */
|
|
|
|
|
|
|
|
/*
|
2021-04-01 19:34:16 +02:00
|
|
|
* Fields used when discovering archive format. For tar format, we load
|
|
|
|
* the first block into the lookahead buffer, and verify that it looks
|
|
|
|
* like a tar header. The tar module must then consume bytes from the
|
|
|
|
* lookahead buffer before reading any more from the file. For custom
|
|
|
|
* format, we load only the "PGDMP" marker into the buffer, and then set
|
|
|
|
* readHeader after confirming it matches. The buffer is vestigial in
|
|
|
|
* this case, as the subsequent code just checks readHeader and doesn't
|
|
|
|
* examine the buffer.
|
2000-07-21 13:40:08 +02:00
|
|
|
*/
|
2021-04-01 19:34:16 +02:00
|
|
|
int readHeader; /* Set if we already read "PGDMP" marker */
|
2000-07-21 13:40:08 +02:00
|
|
|
char *lookahead; /* Buffer used when reading header to discover
|
|
|
|
* format */
|
2021-04-01 19:34:16 +02:00
|
|
|
size_t lookaheadSize; /* Allocated size of buffer */
|
|
|
|
size_t lookaheadLen; /* Length of valid data in lookahead */
|
|
|
|
size_t lookaheadPos; /* Current read position in lookahead buffer */
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
ArchiveEntryPtrType ArchiveEntryPtr; /* Called for each metadata object */
|
|
|
|
StartDataPtrType StartDataPtr; /* Called when table data is about to be
|
2000-07-11 15:07:17 +02:00
|
|
|
* dumped */
|
2016-08-30 18:00:00 +02:00
|
|
|
WriteDataPtrType WriteDataPtr; /* Called to send some table data to the
|
2000-07-11 15:07:17 +02:00
|
|
|
* archive */
|
2016-08-30 18:00:00 +02:00
|
|
|
EndDataPtrType EndDataPtr; /* Called when table data dump is finished */
|
|
|
|
WriteBytePtrType WriteBytePtr; /* Write a byte to output */
|
|
|
|
ReadBytePtrType ReadBytePtr; /* Read a byte from an archive */
|
|
|
|
WriteBufPtrType WriteBufPtr; /* Write a buffer of output to the archive */
|
|
|
|
ReadBufPtrType ReadBufPtr; /* Read a buffer of input from the archive */
|
|
|
|
ClosePtrType ClosePtr; /* Close the archive */
|
|
|
|
ReopenPtrType ReopenPtr; /* Reopen the archive */
|
|
|
|
WriteExtraTocPtrType WriteExtraTocPtr; /* Write extra TOC entry data
|
2001-04-01 07:42:51 +02:00
|
|
|
* associated with the current
|
|
|
|
* archive format */
|
2017-05-17 02:36:35 +02:00
|
|
|
ReadExtraTocPtrType ReadExtraTocPtr; /* Read extra info associated with
|
|
|
|
* archive format */
|
2016-08-30 18:00:00 +02:00
|
|
|
PrintExtraTocPtrType PrintExtraTocPtr; /* Extra TOC info for format */
|
|
|
|
PrintTocDataPtrType PrintTocDataPtr;
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2022-12-05 08:52:11 +01:00
|
|
|
StartLOsPtrType StartLOsPtr;
|
|
|
|
EndLOsPtrType EndLOsPtr;
|
|
|
|
StartLOPtrType StartLOPtr;
|
|
|
|
EndLOPtrType EndLOPtr;
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
SetupWorkerPtrType SetupWorkerPtr;
|
|
|
|
WorkerJobDumpPtrType WorkerJobDumpPtr;
|
|
|
|
WorkerJobRestorePtrType WorkerJobRestorePtr;
|
2013-03-24 16:27:20 +01:00
|
|
|
|
Improve parallel scheduling logic in pg_dump/pg_restore.
Previously, the way this worked was that a parallel pg_dump would
re-order the TABLE_DATA items in the dump's TOC into decreasing size
order, and separately re-order (some of) the INDEX items into decreasing
size order. Then pg_dump would dump the items in that order. Later,
parallel pg_restore just followed the TOC order. This method had lots
of deficiencies:
* TOC ordering randomly differed between parallel and non-parallel
dumps, and was hard to predict in the former case, causing problems
for building stable pg_dump test cases.
* Parallel restore only followed a well-chosen order if the dump had
been done in parallel; in particular, this never happened for restore
from custom-format dumps.
* The best order for restore isn't necessarily the same as for dump,
and it's not really static either because of locking considerations.
* TABLE_DATA and INDEX items aren't the only things that might take a lot
of work during restore. Scheduling was particularly stupid for the BLOBS
item, which might require lots of work during dump as well as restore,
but was left to the end in either case.
This patch removes the logic that changed the TOC order, fixing the
test instability problem. Instead, we sort the parallelizable items
just before processing them during a parallel dump. Independently
of that, parallel restore prioritizes the ready-to-execute tasks
based on the size of the underlying table. In the case of dependent
tasks such as index, constraint, or foreign key creation, the largest
relevant table is used as the metric for estimating the task length.
(This is pretty crude, but it should be enough to avoid the case we
want to avoid, which is ending the run with just a few large tasks
such that we can't make use of all N workers.)
Patch by me, responding to a complaint from Peter Eisentraut,
who also reviewed the patch.
Discussion: https://postgr.es/m/5137fe12-d0a2-4971-61b6-eb4e7e8875f8@2ndquadrant.com
2018-09-14 23:31:51 +02:00
|
|
|
PrepParallelRestorePtrType PrepParallelRestorePtr;
|
2016-08-30 18:00:00 +02:00
|
|
|
ClonePtrType ClonePtr; /* Clone format-specific fields */
|
|
|
|
DeClonePtrType DeClonePtr; /* Clean up cloned fields */
|
2009-02-02 21:07:37 +01:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
CustomOutPtrType CustomOutPtr; /* Alternative script output routine */
|
2000-07-21 13:40:08 +02:00
|
|
|
|
|
|
|
/* Stuff for direct DB connection */
|
|
|
|
char *archdbname; /* DB name *read* from archive */
|
2009-02-02 21:07:37 +01:00
|
|
|
char *savedPassword; /* password for ropt->username, if known */
|
2013-03-24 16:27:20 +01:00
|
|
|
char *use_role;
|
2000-07-21 13:40:08 +02:00
|
|
|
PGconn *connection;
|
Redesign handling of SIGTERM/control-C in parallel pg_dump/pg_restore.
Formerly, Unix builds of pg_dump/pg_restore would trap SIGINT and similar
signals and set a flag that was tested in various data-transfer loops.
This was prone to errors of omission (cf commit 3c8aa6654); and even if
the client-side response was prompt, we did nothing that would cause
long-running SQL commands (e.g. CREATE INDEX) to terminate early.
Also, the master process would effectively do nothing at all upon receipt
of SIGINT; the only reason it seemed to work was that in typical scenarios
the signal would also be delivered to the child processes. We should
support termination when a signal is delivered only to the master process,
though.
Windows builds had no console interrupt handler, so they would just fall
over immediately at control-C, again leaving long-running SQL commands to
finish unmolested.
To fix, remove the flag-checking approach altogether. Instead, allow the
Unix signal handler to send a cancel request directly and then exit(1).
In the master process, also have it forward the signal to the children.
On Windows, add a console interrupt handler that behaves approximately
the same. The main difference is that a single execution of the Windows
handler can send all the cancel requests since all the info is available
in one process, whereas on Unix each process sends a cancel only for its
own database connection.
In passing, fix an old problem that DisconnectDatabase tends to send a
cancel request before exiting a parallel worker, even if nothing went
wrong. This is at least a waste of cycles, and could lead to unexpected
log messages, or maybe even data loss if it happened in pg_restore (though
in the current code the problem seems to affect only pg_dump). The cause
was that after a COPY step, pg_dump was leaving libpq in PGASYNC_BUSY
state, causing PQtransactionStatus() to report PQTRANS_ACTIVE. That's
normally harmless because the next PQexec() will silently clear the
PGASYNC_BUSY state; but in a parallel worker we might exit without any
additional SQL commands after a COPY step. So add an extra PQgetResult()
call after a COPY to allow libpq to return to PGASYNC_IDLE state.
This is a bug fix, IMO, so back-patch to 9.3 where parallel dump/restore
were introduced.
Thanks to Kyotaro Horiguchi for Windows testing and code suggestions.
Original-Patch: <7005.1464657274@sss.pgh.pa.us>
Discussion: <20160602.174941.256342236.horiguchi.kyotaro@lab.ntt.co.jp>
2016-06-02 19:27:53 +02:00
|
|
|
/* If connCancel isn't NULL, SIGINT handler will send a cancel */
|
|
|
|
PGcancel *volatile connCancel;
|
|
|
|
|
2000-07-21 13:40:08 +02:00
|
|
|
int connectToDB; /* Flag to indicate if direct DB connection is
|
|
|
|
* required */
|
Fix pg_restore's direct-to-database mode for INSERT-style table data.
In commit 6545a901aaf84cb05212bb6a7674059908f527c3, I removed the mini SQL
lexer that was in pg_backup_db.c, thinking that it had no real purpose
beyond separating COPY data from SQL commands, which purpose had been
obsoleted by long-ago fixes in pg_dump's archive file format.
Unfortunately this was in error: that code was also used to identify
command boundaries in INSERT-style table data, which is run together as a
single string in the archive file for better compressibility. As a result,
direct-to-database restores from archive files made with --inserts or
--column-inserts fail in our latest releases, as reported by Dick Visser.
To fix, restore the mini SQL lexer, but simplify it by adjusting the
calling logic so that it's only required to cope with INSERT-style table
data, not arbitrary SQL commands. This allows us to not have to deal with
SQL comments, E'' strings, or dollar-quoted strings, none of which have
ever been emitted by dumpTableData_insert.
Also, fix the lexer to cope with standard-conforming strings, which was the
actual bug that the previous patch was meant to solve.
Back-patch to all supported branches. The previous patch went back to 8.2,
which unfortunately means that the EOL release of 8.2 contains this bug,
but I don't think we're doing another 8.2 release just because of that.
2012-01-06 19:04:09 +01:00
|
|
|
ArchiverOutput outputKind; /* Flag for what we're currently writing */
|
2006-02-05 21:58:47 +01:00
|
|
|
bool pgCopyIn; /* Currently in libpq 'COPY IN' mode. */
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2022-12-05 08:52:11 +01:00
|
|
|
int loFd;
|
|
|
|
bool writingLO;
|
|
|
|
int loCount; /* # of LOs restored */
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2000-07-21 13:40:08 +02:00
|
|
|
char *fSpec; /* Archive File Spec */
|
|
|
|
FILE *FH; /* General purpose file handle */
|
2023-02-23 15:38:14 +01:00
|
|
|
void *OF; /* Output file */
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2012-05-29 02:38:28 +02:00
|
|
|
struct _tocEntry *toc; /* Header of circular list of TOC entries */
|
2000-07-11 15:07:17 +02:00
|
|
|
int tocCount; /* Number of TOC entries */
|
2003-12-06 04:00:16 +01:00
|
|
|
DumpId maxDumpId; /* largest DumpId among all TOC entries */
|
|
|
|
|
2012-05-29 02:38:28 +02:00
|
|
|
/* arrays created after the TOC list is complete: */
|
|
|
|
struct _tocEntry **tocsByDumpId; /* TOCs indexed by dumpId */
|
|
|
|
DumpId *tableDataId; /* TABLE DATA ids, indexed by table dumpId */
|
|
|
|
|
2000-07-11 15:07:17 +02:00
|
|
|
struct _tocEntry *currToc; /* Used when dumping data */
|
Switch pg_dump to use compression specifications
Compression specifications are currently used by pg_basebackup and
pg_receivewal, and are able to let the user control in an extended way
the method and level of compression used. As an effect of this commit,
pg_dump's -Z/--compress is now able to use more than just an integer, as
of the grammar "method[:detail]".
The method can be either "none" or "gzip", and can optionally take a
detail string. If the detail string is only an integer, it defines the
compression level. A comma-separated list of keywords can also be used
method allows for more options, the only keyword supported now is
"level".
The change is backward-compatible, hence specifying only an integer
leads to no compression for a level of 0 and gzip compression when the
level is greater than 0.
Most of the code changes are straight-forward, as pg_dump was relying on
an integer tracking the compression level to check for gzip or no
compression. These are changed to use a compression specification and
the algorithm stored in it.
As of this change, note that the dump format is not bumped because there
is no need yet to track the compression algorithm in the TOC entries.
Hence, we still rely on the compression level to make the difference
when reading them. This will be mandatory once a new compression method
is added, though.
In order to keep the code simpler when parsing the compression
specification, the code is changed so as pg_dump now fails hard when
using gzip on -Z/--compress without its support compiled, rather than
enforcing no compression without the user knowing about it except
through a warning. Like before this commit, archive and custom formats
are compressed by default when the code is compiled with gzip, and left
uncompressed without gzip.
Author: Georgios Kokolatos
Reviewed-by: Michael Paquier
Discussion: https://postgr.es/m/O4mutIrCES8ZhlXJiMvzsivT7ztAMja2lkdL1LJx6O5f22I2W8PBIeLKz7mDLwxHoibcnRAYJXm1pH4tyUNC4a8eDzLn22a6Pb1S74Niexg=@pm.me
2022-12-02 02:45:02 +01:00
|
|
|
pg_compress_specification compression_spec; /* Requested specification for
|
|
|
|
* compression */
|
2017-03-22 15:00:30 +01:00
|
|
|
bool dosync; /* data requested to be synced on sight */
|
2023-09-07 01:27:00 +02:00
|
|
|
DataDirSyncMethod sync_method;
|
2000-07-11 15:07:17 +02:00
|
|
|
ArchiveMode mode; /* File mode - r or w */
|
|
|
|
void *formatData; /* Header data specific to file format */
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2003-10-03 22:10:59 +02:00
|
|
|
/* these vars track state to avoid sending redundant SET commands */
|
2009-02-02 21:07:37 +01:00
|
|
|
char *currUser; /* current username, or NULL if unknown */
|
|
|
|
char *currSchema; /* current schema, or NULL */
|
|
|
|
char *currTablespace; /* current tablespace, or NULL */
|
2019-03-06 18:54:38 +01:00
|
|
|
char *currTableAm; /* current table access method, or NULL */
|
2004-08-29 07:07:03 +02:00
|
|
|
|
2002-04-24 04:21:04 +02:00
|
|
|
void *lo_buf;
|
2002-08-20 19:54:45 +02:00
|
|
|
size_t lo_buf_used;
|
|
|
|
size_t lo_buf_size;
|
They are two different problems; the TOC entry is important for any
multiline command or to rerun the command easily later.
Whereas displaying the failed SQL command is a matter of fixing the
error
messages.
The latter is complicated by failed COPY commands which, with
die-on-errors
off, results in the data being processed as a command, so dumping the
command will dump all of the data.
In the case of long commands, should the whole command be dumped? eg.
(eg.
several pages of function definition).
In the case of the COPY command, I'm not sure what to do. Obviously, it
would be best to avoid sending the data, but the data and command are
combined (from memory). Also, the 'data' may be in the form of INSERT
statements.
Attached patch produces the first 125 chars of the command:
pg_restore: [archiver (db)] Error while PROCESSING TOC:
pg_restore: [archiver (db)] Error from TOC Entry 26; 1255 16449270
FUNCTION
plpgsql_call_handler() pjw
pg_restore: [archiver (db)] could not execute query: ERROR: function
"plpgsql_call_handler" already exists with same argument types
Command was: CREATE FUNCTION plpgsql_call_handler() RETURNS
language_handler
AS '/var/lib/pgsql-8.0b1/lib/plpgsql', 'plpgsql_call_han...
pg_restore: [archiver (db)] Error from TOC Entry 27; 1255 16449271
FUNCTION
plpgsql_validator(oid) pjw
pg_restore: [archiver (db)] could not execute query: ERROR: function
"plpgsql_validator" already exists with same argument types
Command was: CREATE FUNCTION plpgsql_validator(oid) RETURNS void
AS '/var/lib/pgsql-8.0b1/lib/plpgsql', 'plpgsql_validator'
LANGU...
Philip Warner
2004-08-20 22:00:34 +02:00
|
|
|
|
|
|
|
int noTocComments;
|
|
|
|
ArchiverStage stage;
|
|
|
|
ArchiverStage lastErrorStage;
|
Fix pg_dump/pg_restore to emit REFRESH MATERIALIZED VIEW commands last.
Because we push all ACL (i.e. GRANT/REVOKE) restore steps to the end,
materialized view refreshes were occurring while the permissions on
referenced objects were still at defaults. This led to failures if,
say, an MV owned by user A reads from a table owned by user B, even
if B had granted the necessary privileges to A. We've had multiple
complaints about that type of restore failure, most recently from
Jordan Gigov.
The ideal fix for this would be to start treating ACLs as dependency-
sortable objects, rather than hard-wiring anything about their dump order
(the existing approach is a messy kluge dating to commit dc0e76ca3).
But that's going to be a rather major change, and it certainly wouldn't
lead to a back-patchable fix. As a short-term solution, convert the
existing two-pass hack (ie, normal objects then ACLs) to a three-pass hack,
ie, normal objects then ACLs then matview refreshes. Because this happens
in RestoreArchive(), it will also fix the problem when restoring from an
existing archive-format dump.
(Note this means that if a matview refresh would have failed under the
permissions prevailing at dump time, it'll fail during restore as well.
We'll define that as user error rather than something we should try
to work around.)
To avoid performance loss in parallel restore, we need the matview
refreshes to still be parallelizable. Hence, clean things up enough
so that both ACLs and matviews are handled by the parallel restore
infrastructure, instead of reverting back to serial restore for ACLs.
There is still a final serial step, but it shouldn't normally have to
do anything; it's only there to try to recover if we get stuck due to
some problem like unresolved circular dependencies.
Patch by me, but it owes something to an earlier attempt by Kevin Grittner.
Back-patch to 9.3 where materialized views were introduced.
Discussion: https://postgr.es/m/28572.1500912583@sss.pgh.pa.us
2017-08-03 23:36:23 +02:00
|
|
|
RestorePass restorePass; /* used only during parallel restore */
|
They are two different problems; the TOC entry is important for any
multiline command or to rerun the command easily later.
Whereas displaying the failed SQL command is a matter of fixing the
error
messages.
The latter is complicated by failed COPY commands which, with
die-on-errors
off, results in the data being processed as a command, so dumping the
command will dump all of the data.
In the case of long commands, should the whole command be dumped? eg.
(eg.
several pages of function definition).
In the case of the COPY command, I'm not sure what to do. Obviously, it
would be best to avoid sending the data, but the data and command are
combined (from memory). Also, the 'data' may be in the form of INSERT
statements.
Attached patch produces the first 125 chars of the command:
pg_restore: [archiver (db)] Error while PROCESSING TOC:
pg_restore: [archiver (db)] Error from TOC Entry 26; 1255 16449270
FUNCTION
plpgsql_call_handler() pjw
pg_restore: [archiver (db)] could not execute query: ERROR: function
"plpgsql_call_handler" already exists with same argument types
Command was: CREATE FUNCTION plpgsql_call_handler() RETURNS
language_handler
AS '/var/lib/pgsql-8.0b1/lib/plpgsql', 'plpgsql_call_han...
pg_restore: [archiver (db)] Error from TOC Entry 27; 1255 16449271
FUNCTION
plpgsql_validator(oid) pjw
pg_restore: [archiver (db)] could not execute query: ERROR: function
"plpgsql_validator" already exists with same argument types
Command was: CREATE FUNCTION plpgsql_validator(oid) RETURNS void
AS '/var/lib/pgsql-8.0b1/lib/plpgsql', 'plpgsql_validator'
LANGU...
Philip Warner
2004-08-20 22:00:34 +02:00
|
|
|
struct _tocEntry *currentTE;
|
|
|
|
struct _tocEntry *lastErrorTE;
|
2014-10-14 20:00:55 +02:00
|
|
|
};
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2014-10-14 20:00:55 +02:00
|
|
|
struct _tocEntry
|
2000-07-11 15:07:17 +02:00
|
|
|
{
|
|
|
|
struct _tocEntry *prev;
|
|
|
|
struct _tocEntry *next;
|
2003-12-06 04:00:16 +01:00
|
|
|
CatalogId catalogId;
|
|
|
|
DumpId dumpId;
|
2009-02-02 21:07:37 +01:00
|
|
|
teSection section;
|
2003-12-06 04:00:16 +01:00
|
|
|
bool hadDumper; /* Archiver was passed a dumper routine (used
|
2000-07-11 15:07:17 +02:00
|
|
|
* in restore) */
|
2002-07-04 17:35:07 +02:00
|
|
|
char *tag; /* index tag */
|
2002-05-11 00:36:27 +02:00
|
|
|
char *namespace; /* null or empty string if not in a schema */
|
2004-11-06 20:36:02 +01:00
|
|
|
char *tablespace; /* null if not in a tablespace; empty string
|
|
|
|
* means use database default */
|
2019-03-06 18:54:38 +01:00
|
|
|
char *tableam; /* table access method, only for TABLE tags */
|
2002-05-11 00:36:27 +02:00
|
|
|
char *owner;
|
2000-07-11 15:07:17 +02:00
|
|
|
char *desc;
|
|
|
|
char *defn;
|
|
|
|
char *dropStmt;
|
2000-07-21 13:40:08 +02:00
|
|
|
char *copyStmt;
|
2003-12-06 04:00:16 +01:00
|
|
|
DumpId *dependencies; /* dumpIds of objects this one depends on */
|
|
|
|
int nDeps; /* number of dependencies */
|
2001-04-01 07:42:51 +02:00
|
|
|
|
2000-07-11 15:07:17 +02:00
|
|
|
DataDumperPtr dataDumper; /* Routine to dump data for object */
|
2021-02-10 13:08:13 +01:00
|
|
|
const void *dataDumperArg; /* Arg for above routine */
|
2000-07-11 15:07:17 +02:00
|
|
|
void *formatData; /* TOC Entry data specific to file format */
|
2009-02-02 21:07:37 +01:00
|
|
|
|
Rewrite --section option to decouple it from --schema-only/--data-only.
The initial implementation of pg_dump's --section option supposed that the
existing --schema-only and --data-only options could be made equivalent to
--section settings. This is wrong, though, due to dubious but long since
set-in-stone decisions about where to dump SEQUENCE SET items, as seen in
bug report from Martin Pitt. (And I'm not totally convinced there weren't
other bugs, either.) Undo that coupling and instead drive --section
filtering off current-section state tracked as we scan through the TOC
list to call _tocEntryRequired().
To make sure those decisions don't shift around and hopefully save a few
cycles, run _tocEntryRequired() only once per TOC entry and save the result
in a new TOC field. This required minor rejiggering of ACL handling but
also allows a far cleaner implementation of inhibit_data_for_failed_table.
Also, to ensure that pg_dump and pg_restore have the same behavior with
respect to the --section switches, add _tocEntryRequired() filtering to
WriteToc() and WriteDataChunks(), rather than trying to implement section
filtering in an entirely orthogonal way in dumpDumpableObject(). This
required adjusting the handling of the special ENCODING and STDSTRINGS
items, but they were pretty weird before anyway.
Minor other code review for the patch, too.
2012-05-30 05:22:14 +02:00
|
|
|
/* working state while dumping/restoring */
|
Improve parallel scheduling logic in pg_dump/pg_restore.
Previously, the way this worked was that a parallel pg_dump would
re-order the TABLE_DATA items in the dump's TOC into decreasing size
order, and separately re-order (some of) the INDEX items into decreasing
size order. Then pg_dump would dump the items in that order. Later,
parallel pg_restore just followed the TOC order. This method had lots
of deficiencies:
* TOC ordering randomly differed between parallel and non-parallel
dumps, and was hard to predict in the former case, causing problems
for building stable pg_dump test cases.
* Parallel restore only followed a well-chosen order if the dump had
been done in parallel; in particular, this never happened for restore
from custom-format dumps.
* The best order for restore isn't necessarily the same as for dump,
and it's not really static either because of locking considerations.
* TABLE_DATA and INDEX items aren't the only things that might take a lot
of work during restore. Scheduling was particularly stupid for the BLOBS
item, which might require lots of work during dump as well as restore,
but was left to the end in either case.
This patch removes the logic that changed the TOC order, fixing the
test instability problem. Instead, we sort the parallelizable items
just before processing them during a parallel dump. Independently
of that, parallel restore prioritizes the ready-to-execute tasks
based on the size of the underlying table. In the case of dependent
tasks such as index, constraint, or foreign key creation, the largest
relevant table is used as the metric for estimating the task length.
(This is pretty crude, but it should be enough to avoid the case we
want to avoid, which is ending the run with just a few large tasks
such that we can't make use of all N workers.)
Patch by me, responding to a complaint from Peter Eisentraut,
who also reviewed the patch.
Discussion: https://postgr.es/m/5137fe12-d0a2-4971-61b6-eb4e7e8875f8@2ndquadrant.com
2018-09-14 23:31:51 +02:00
|
|
|
pgoff_t dataLength; /* item's data size; 0 if none or unknown */
|
2020-12-11 19:15:30 +01:00
|
|
|
int reqs; /* do we need schema and/or data of object
|
|
|
|
* (REQ_* bit mask) */
|
Rewrite --section option to decouple it from --schema-only/--data-only.
The initial implementation of pg_dump's --section option supposed that the
existing --schema-only and --data-only options could be made equivalent to
--section settings. This is wrong, though, due to dubious but long since
set-in-stone decisions about where to dump SEQUENCE SET items, as seen in
bug report from Martin Pitt. (And I'm not totally convinced there weren't
other bugs, either.) Undo that coupling and instead drive --section
filtering off current-section state tracked as we scan through the TOC
list to call _tocEntryRequired().
To make sure those decisions don't shift around and hopefully save a few
cycles, run _tocEntryRequired() only once per TOC entry and save the result
in a new TOC field. This required minor rejiggering of ACL handling but
also allows a far cleaner implementation of inhibit_data_for_failed_table.
Also, to ensure that pg_dump and pg_restore have the same behavior with
respect to the --section switches, add _tocEntryRequired() filtering to
WriteToc() and WriteDataChunks(), rather than trying to implement section
filtering in an entirely orthogonal way in dumpDumpableObject(). This
required adjusting the handling of the special ENCODING and STDSTRINGS
items, but they were pretty weird before anyway.
Minor other code review for the patch, too.
2012-05-30 05:22:14 +02:00
|
|
|
bool created; /* set for DATA member if TABLE was created */
|
2011-12-17 01:09:38 +01:00
|
|
|
|
2009-02-02 21:07:37 +01:00
|
|
|
/* working state (needed only for parallel restore) */
|
Improve parallel scheduling logic in pg_dump/pg_restore.
Previously, the way this worked was that a parallel pg_dump would
re-order the TABLE_DATA items in the dump's TOC into decreasing size
order, and separately re-order (some of) the INDEX items into decreasing
size order. Then pg_dump would dump the items in that order. Later,
parallel pg_restore just followed the TOC order. This method had lots
of deficiencies:
* TOC ordering randomly differed between parallel and non-parallel
dumps, and was hard to predict in the former case, causing problems
for building stable pg_dump test cases.
* Parallel restore only followed a well-chosen order if the dump had
been done in parallel; in particular, this never happened for restore
from custom-format dumps.
* The best order for restore isn't necessarily the same as for dump,
and it's not really static either because of locking considerations.
* TABLE_DATA and INDEX items aren't the only things that might take a lot
of work during restore. Scheduling was particularly stupid for the BLOBS
item, which might require lots of work during dump as well as restore,
but was left to the end in either case.
This patch removes the logic that changed the TOC order, fixing the
test instability problem. Instead, we sort the parallelizable items
just before processing them during a parallel dump. Independently
of that, parallel restore prioritizes the ready-to-execute tasks
based on the size of the underlying table. In the case of dependent
tasks such as index, constraint, or foreign key creation, the largest
relevant table is used as the metric for estimating the task length.
(This is pretty crude, but it should be enough to avoid the case we
want to avoid, which is ending the run with just a few large tasks
such that we can't make use of all N workers.)
Patch by me, responding to a complaint from Peter Eisentraut,
who also reviewed the patch.
Discussion: https://postgr.es/m/5137fe12-d0a2-4971-61b6-eb4e7e8875f8@2ndquadrant.com
2018-09-14 23:31:51 +02:00
|
|
|
struct _tocEntry *pending_prev; /* list links for pending-items list; */
|
|
|
|
struct _tocEntry *pending_next; /* NULL if not in that list */
|
2009-02-02 21:07:37 +01:00
|
|
|
int depCount; /* number of dependencies not yet restored */
|
2010-12-09 19:03:11 +01:00
|
|
|
DumpId *revDeps; /* dumpIds of objects depending on this one */
|
|
|
|
int nRevDeps; /* number of such dependencies */
|
2009-02-02 21:07:37 +01:00
|
|
|
DumpId *lockDeps; /* dumpIds of objects this one needs lock on */
|
|
|
|
int nLockDeps; /* number of such dependencies */
|
2014-10-14 20:00:55 +02:00
|
|
|
};
|
2000-07-11 15:07:17 +02:00
|
|
|
|
Redesign parallel dump/restore's wait-for-workers logic.
The ListenToWorkers/ReapWorkerStatus APIs were messy and hard to use.
Instead, make DispatchJobForTocEntry register a callback function that
will take care of state cleanup, doing whatever had been done by the caller
of ReapWorkerStatus in the old design. (This callback is essentially just
the old mark_work_done function in the restore case, and a trivial test for
worker failure in the dump case.) Then we can have ListenToWorkers call
the callback immediately on receipt of a status message, and return the
worker to WRKR_IDLE state; so the WRKR_FINISHED state goes away.
This allows us to design a unified wait-for-worker-messages loop:
WaitForWorkers replaces EnsureIdleWorker and EnsureWorkersFinished as well
as the mess in restore_toc_entries_parallel. Also, we no longer need the
fragile API spec that the caller of DispatchJobForTocEntry is responsible
for ensuring there's an idle worker, since DispatchJobForTocEntry can just
wait until there is one.
In passing, I got rid of the ParallelArgs struct, which was a net negative
in terms of notational verboseness, and didn't seem to be providing any
noticeable amount of abstraction either.
Tom Lane, reviewed by Kevin Grittner
Discussion: <1188.1464544443@sss.pgh.pa.us>
2016-09-27 19:22:39 +02:00
|
|
|
extern int parallel_restore(ArchiveHandle *AH, TocEntry *te);
|
2012-03-20 22:38:11 +01:00
|
|
|
extern void on_exit_close_archive(Archive *AHX);
|
2003-12-06 04:00:16 +01:00
|
|
|
|
Unified logging system for command-line programs
This unifies the various ad hoc logging (message printing, error
printing) systems used throughout the command-line programs.
Features:
- Program name is automatically prefixed.
- Message string does not end with newline. This removes a common
source of inconsistencies and omissions.
- Additionally, a final newline is automatically stripped, simplifying
use of PQerrorMessage() etc., another common source of mistakes.
- I converted error message strings to use %m where possible.
- As a result of the above several points, more translatable message
strings can be shared between different components and between
frontends and backend, without gratuitous punctuation or whitespace
differences.
- There is support for setting a "log level". This is not meant to be
user-facing, but can be used internally to implement debug or
verbose modes.
- Lazy argument evaluation, so no significant overhead if logging at
some level is disabled.
- Some color in the messages, similar to gcc and clang. Set
PG_COLOR=auto to try it out. Some colors are predefined, but can be
customized by setting PG_COLORS.
- Common files (common/, fe_utils/, etc.) can handle logging much more
simply by just using one API without worrying too much about the
context of the calling program, requiring callbacks, or having to
pass "progname" around everywhere.
- Some programs called setvbuf() to make sure that stderr is
unbuffered, even on Windows. But not all programs did that. This
is now done centrally.
Soft goals:
- Reduces vertical space use and visual complexity of error reporting
in the source code.
- Encourages more deliberate classification of messages. For example,
in some cases it wasn't clear without analyzing the surrounding code
whether a message was meant as an error or just an info.
- Concepts and terms are vaguely aligned with popular logging
frameworks such as log4j and Python logging.
This is all just about printing stuff out. Nothing affects program
flow (e.g., fatal exits). The uses are just too varied to do that.
Some existing code had wrappers that do some kind of print-and-exit,
and I adapted those.
I tried to keep the output mostly the same, but there is a lot of
historical baggage to unwind and special cases to consider, and I
might not always have succeeded. One significant change is that
pg_rewind used to write all error messages to stdout. That is now
changed to stderr.
Reviewed-by: Donald Dong <xdong@csumb.edu>
Reviewed-by: Arthur Zakirov <a.zakirov@postgrespro.ru>
Discussion: https://www.postgresql.org/message-id/flat/6a609b43-4f57-7348-6480-bd022f924310@2ndquadrant.com
2019-04-01 14:24:37 +02:00
|
|
|
extern void warn_or_exit_horribly(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3);
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2019-02-01 15:29:42 +01:00
|
|
|
/* Options for ArchiveEntry */
|
|
|
|
typedef struct _archiveOpts
|
|
|
|
{
|
|
|
|
const char *tag;
|
|
|
|
const char *namespace;
|
|
|
|
const char *tablespace;
|
2019-03-06 18:54:38 +01:00
|
|
|
const char *tableam;
|
2019-02-01 15:29:42 +01:00
|
|
|
const char *owner;
|
|
|
|
const char *description;
|
|
|
|
teSection section;
|
|
|
|
const char *createStmt;
|
|
|
|
const char *dropStmt;
|
|
|
|
const char *copyStmt;
|
|
|
|
const DumpId *deps;
|
|
|
|
int nDeps;
|
|
|
|
DataDumperPtr dumpFn;
|
2021-02-10 13:08:13 +01:00
|
|
|
const void *dumpArg;
|
2019-02-01 15:29:42 +01:00
|
|
|
} ArchiveOpts;
|
|
|
|
#define ARCHIVE_OPTS(...) &(ArchiveOpts){__VA_ARGS__}
|
Improve parallel scheduling logic in pg_dump/pg_restore.
Previously, the way this worked was that a parallel pg_dump would
re-order the TABLE_DATA items in the dump's TOC into decreasing size
order, and separately re-order (some of) the INDEX items into decreasing
size order. Then pg_dump would dump the items in that order. Later,
parallel pg_restore just followed the TOC order. This method had lots
of deficiencies:
* TOC ordering randomly differed between parallel and non-parallel
dumps, and was hard to predict in the former case, causing problems
for building stable pg_dump test cases.
* Parallel restore only followed a well-chosen order if the dump had
been done in parallel; in particular, this never happened for restore
from custom-format dumps.
* The best order for restore isn't necessarily the same as for dump,
and it's not really static either because of locking considerations.
* TABLE_DATA and INDEX items aren't the only things that might take a lot
of work during restore. Scheduling was particularly stupid for the BLOBS
item, which might require lots of work during dump as well as restore,
but was left to the end in either case.
This patch removes the logic that changed the TOC order, fixing the
test instability problem. Instead, we sort the parallelizable items
just before processing them during a parallel dump. Independently
of that, parallel restore prioritizes the ready-to-execute tasks
based on the size of the underlying table. In the case of dependent
tasks such as index, constraint, or foreign key creation, the largest
relevant table is used as the metric for estimating the task length.
(This is pretty crude, but it should be enough to avoid the case we
want to avoid, which is ending the run with just a few large tasks
such that we can't make use of all N workers.)
Patch by me, responding to a complaint from Peter Eisentraut,
who also reviewed the patch.
Discussion: https://postgr.es/m/5137fe12-d0a2-4971-61b6-eb4e7e8875f8@2ndquadrant.com
2018-09-14 23:31:51 +02:00
|
|
|
/* Called to add a TOC entry */
|
2019-02-01 15:29:42 +01:00
|
|
|
extern TocEntry *ArchiveEntry(Archive *AHX, CatalogId catalogId,
|
|
|
|
DumpId dumpId, ArchiveOpts *opts);
|
Improve parallel scheduling logic in pg_dump/pg_restore.
Previously, the way this worked was that a parallel pg_dump would
re-order the TABLE_DATA items in the dump's TOC into decreasing size
order, and separately re-order (some of) the INDEX items into decreasing
size order. Then pg_dump would dump the items in that order. Later,
parallel pg_restore just followed the TOC order. This method had lots
of deficiencies:
* TOC ordering randomly differed between parallel and non-parallel
dumps, and was hard to predict in the former case, causing problems
for building stable pg_dump test cases.
* Parallel restore only followed a well-chosen order if the dump had
been done in parallel; in particular, this never happened for restore
from custom-format dumps.
* The best order for restore isn't necessarily the same as for dump,
and it's not really static either because of locking considerations.
* TABLE_DATA and INDEX items aren't the only things that might take a lot
of work during restore. Scheduling was particularly stupid for the BLOBS
item, which might require lots of work during dump as well as restore,
but was left to the end in either case.
This patch removes the logic that changed the TOC order, fixing the
test instability problem. Instead, we sort the parallelizable items
just before processing them during a parallel dump. Independently
of that, parallel restore prioritizes the ready-to-execute tasks
based on the size of the underlying table. In the case of dependent
tasks such as index, constraint, or foreign key creation, the largest
relevant table is used as the metric for estimating the task length.
(This is pretty crude, but it should be enough to avoid the case we
want to avoid, which is ending the run with just a few large tasks
such that we can't make use of all N workers.)
Patch by me, responding to a complaint from Peter Eisentraut,
who also reviewed the patch.
Discussion: https://postgr.es/m/5137fe12-d0a2-4971-61b6-eb4e7e8875f8@2ndquadrant.com
2018-09-14 23:31:51 +02:00
|
|
|
|
2000-07-11 15:07:17 +02:00
|
|
|
extern void WriteHead(ArchiveHandle *AH);
|
|
|
|
extern void ReadHead(ArchiveHandle *AH);
|
|
|
|
extern void WriteToc(ArchiveHandle *AH);
|
|
|
|
extern void ReadToc(ArchiveHandle *AH);
|
2016-01-13 23:48:33 +01:00
|
|
|
extern void WriteDataChunks(ArchiveHandle *AH, struct ParallelState *pstate);
|
|
|
|
extern void WriteDataChunksForTocEntry(ArchiveHandle *AH, TocEntry *te);
|
2013-03-24 16:27:20 +01:00
|
|
|
extern ArchiveHandle *CloneArchive(ArchiveHandle *AH);
|
|
|
|
extern void DeCloneArchive(ArchiveHandle *AH);
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2020-12-11 19:15:30 +01:00
|
|
|
extern int TocIDRequired(ArchiveHandle *AH, DumpId id);
|
2013-03-24 16:27:20 +01:00
|
|
|
TocEntry *getTocEntryByDumpId(ArchiveHandle *AH, DumpId id);
|
2002-10-25 03:33:17 +02:00
|
|
|
extern bool checkSeek(FILE *fp);
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2006-05-28 23:13:54 +02:00
|
|
|
#define appendStringLiteralAHX(buf,str,AH) \
|
|
|
|
appendStringLiteral(buf, str, (AH)->public.encoding, (AH)->public.std_strings)
|
|
|
|
|
2009-08-04 23:56:09 +02:00
|
|
|
#define appendByteaLiteralAHX(buf,str,len,AH) \
|
|
|
|
appendByteaLiteral(buf, str, len, (AH)->public.std_strings)
|
|
|
|
|
2000-07-11 15:07:17 +02:00
|
|
|
/*
|
|
|
|
* Mandatory routines for each supported format
|
|
|
|
*/
|
|
|
|
|
2002-08-20 19:54:45 +02:00
|
|
|
extern size_t WriteInt(ArchiveHandle *AH, int i);
|
2000-07-21 13:40:08 +02:00
|
|
|
extern int ReadInt(ArchiveHandle *AH);
|
|
|
|
extern char *ReadStr(ArchiveHandle *AH);
|
2022-09-23 01:41:23 +02:00
|
|
|
extern size_t WriteStr(ArchiveHandle *AH, const char *c);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
2007-02-19 16:05:06 +01:00
|
|
|
int ReadOffset(ArchiveHandle *, pgoff_t *);
|
|
|
|
size_t WriteOffset(ArchiveHandle *, pgoff_t, int);
|
2002-10-22 21:15:23 +02:00
|
|
|
|
2022-12-05 08:52:11 +01:00
|
|
|
extern void StartRestoreLOs(ArchiveHandle *AH);
|
|
|
|
extern void StartRestoreLO(ArchiveHandle *AH, Oid oid, bool drop);
|
|
|
|
extern void EndRestoreLO(ArchiveHandle *AH, Oid oid);
|
|
|
|
extern void EndRestoreLOs(ArchiveHandle *AH);
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2000-07-21 13:40:08 +02:00
|
|
|
extern void InitArchiveFmt_Custom(ArchiveHandle *AH);
|
|
|
|
extern void InitArchiveFmt_Null(ArchiveHandle *AH);
|
2011-01-23 22:10:15 +01:00
|
|
|
extern void InitArchiveFmt_Directory(ArchiveHandle *AH);
|
2000-07-21 13:40:08 +02:00
|
|
|
extern void InitArchiveFmt_Tar(ArchiveHandle *AH);
|
|
|
|
|
2003-02-01 20:29:16 +01:00
|
|
|
extern bool isValidTarHeader(char *header);
|
2000-07-11 15:07:17 +02:00
|
|
|
|
Fix handling of -d "connection string" in pg_dump/pg_restore.
Parallel pg_dump failed if its -d parameter was a connection string
containing any essential information other than host, port, or username.
The same was true for pg_restore with --create.
The reason is that these scenarios failed to preserve the connection
string from the command line; the code felt free to replace that with
just the database name when reconnecting from a pg_dump parallel worker
or after creating the target database. By chance, parallel pg_restore
did not suffer this defect, as long as you didn't say --create.
In practice it seems that the error would be obvious only if the
connstring included essential, non-default SSL or GSS parameters.
This may explain why it took us so long to notice. (It also makes
it very difficult to craft a regression test case illustrating the
problem, since the test would fail in builds without those options.)
Fix by refactoring so that ConnectDatabase always receives all the
relevant options directly from the command line, rather than
reconstructed values. Inject a different database name, when necessary,
by relying on libpq's rules for handling multiple "dbname" parameters.
While here, let's get rid of the essentially duplicate _connectDB
function, as well as some obsolete nearby cruft.
Per bug #16604 from Zsolt Ero. Back-patch to all supported branches.
Discussion: https://postgr.es/m/16604-933f4b8791227b15@postgresql.org
2020-09-25 00:19:38 +02:00
|
|
|
extern void ReconnectToServer(ArchiveHandle *AH, const char *dbname);
|
2022-12-05 08:52:11 +01:00
|
|
|
extern void DropLOIfExists(ArchiveHandle *AH, Oid oid);
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2014-05-06 02:27:16 +02:00
|
|
|
void ahwrite(const void *ptr, size_t size, size_t nmemb, ArchiveHandle *AH);
|
2015-03-11 14:19:54 +01:00
|
|
|
int ahprintf(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
2000-07-11 15:07:17 +02:00
|
|
|
#endif
|