2000-07-11 15:07:17 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* pg_backup_archiver.h
|
|
|
|
*
|
|
|
|
* Private interface to the pg_dump archiver routines.
|
2001-03-22 05:01:46 +01:00
|
|
|
* It is NOT intended that these routines be called by any
|
2000-07-11 15:07:17 +02:00
|
|
|
* dumper directly.
|
|
|
|
*
|
|
|
|
* See the headers to pg_restore for more details.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2000, Philip Warner
|
2001-03-22 05:01:46 +01:00
|
|
|
* Rights are granted to use this software in any way so long
|
|
|
|
* as this notice is not removed.
|
2000-07-11 15:07:17 +02:00
|
|
|
*
|
|
|
|
* The author is not responsible for loss or damages that may
|
2019-05-23 03:17:41 +02:00
|
|
|
* result from its use.
|
2000-07-11 15:07:17 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/bin/pg_dump/pg_backup_archiver.h
|
2001-04-01 07:42:51 +02:00
|
|
|
*
|
2000-07-11 15:07:17 +02:00
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef __PG_BACKUP_ARCHIVE__
|
|
|
|
#define __PG_BACKUP_ARCHIVE__
|
|
|
|
|
2000-07-21 13:40:08 +02:00
|
|
|
#include <time.h>
|
|
|
|
|
2003-12-08 17:39:05 +01:00
|
|
|
#include "libpq-fe.h"
|
2019-11-25 03:38:57 +01:00
|
|
|
#include "pg_backup.h"
|
2000-07-21 13:40:08 +02:00
|
|
|
#include "pqexpbuffer.h"
|
2003-12-08 17:39:05 +01:00
|
|
|
|
2005-06-21 22:45:44 +02:00
|
|
|
#define LOBBUFSIZE 16384
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2003-12-08 17:39:05 +01:00
|
|
|
/*
|
|
|
|
* Note: zlib.h must be included *after* libpq-fe.h, because the latter may
|
|
|
|
* include ssl.h, which has a naming conflict with zlib.h.
|
|
|
|
*/
|
2000-07-11 15:07:17 +02:00
|
|
|
#ifdef HAVE_LIBZ
|
|
|
|
#include <zlib.h>
|
|
|
|
#define GZCLOSE(fh) gzclose(fh)
|
2005-05-25 23:40:43 +02:00
|
|
|
#define GZWRITE(p, s, n, fh) gzwrite(fh, p, (n) * (s))
|
|
|
|
#define GZREAD(p, s, n, fh) gzread(fh, p, (n) * (s))
|
2014-05-06 02:27:16 +02:00
|
|
|
#define GZEOF(fh) gzeof(fh)
|
2000-07-11 15:07:17 +02:00
|
|
|
#else
|
|
|
|
#define GZCLOSE(fh) fclose(fh)
|
2005-05-25 23:40:43 +02:00
|
|
|
#define GZWRITE(p, s, n, fh) (fwrite(p, s, n, fh) * (s))
|
2000-07-11 15:07:17 +02:00
|
|
|
#define GZREAD(p, s, n, fh) fread(p, s, n, fh)
|
2014-05-06 02:27:16 +02:00
|
|
|
#define GZEOF(fh) feof(fh)
|
2010-12-02 20:39:03 +01:00
|
|
|
/* this is just the redefinition of a libz constant */
|
2005-07-27 14:44:10 +02:00
|
|
|
#define Z_DEFAULT_COMPRESSION (-1)
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
typedef struct _z_stream
|
|
|
|
{
|
|
|
|
void *next_in;
|
|
|
|
void *next_out;
|
2002-08-20 19:54:45 +02:00
|
|
|
size_t avail_in;
|
|
|
|
size_t avail_out;
|
2000-07-11 15:07:17 +02:00
|
|
|
} z_stream;
|
|
|
|
typedef z_stream *z_streamp;
|
|
|
|
#endif
|
|
|
|
|
2000-07-21 13:40:08 +02:00
|
|
|
/* Data block types */
|
|
|
|
#define BLK_DATA 1
|
|
|
|
#define BLK_BLOBS 3
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2016-10-25 18:00:00 +02:00
|
|
|
/* Encode version components into a convenient integer <maj><min><rev> */
|
|
|
|
#define MAKE_ARCHIVE_VERSION(major, minor, rev) (((major) * 256 + (minor)) * 256 + (rev))
|
|
|
|
|
|
|
|
#define ARCHIVE_MAJOR(version) (((version) >> 16) & 255)
|
|
|
|
#define ARCHIVE_MINOR(version) (((version) >> 8) & 255)
|
2016-11-17 20:36:59 +01:00
|
|
|
#define ARCHIVE_REV(version) (((version) ) & 255)
|
2016-10-25 18:00:00 +02:00
|
|
|
|
2010-02-18 02:29:10 +01:00
|
|
|
/* Historical version numbers (checked in code) */
|
2016-10-25 18:00:00 +02:00
|
|
|
#define K_VERS_1_0 MAKE_ARCHIVE_VERSION(1, 0, 0)
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
#define K_VERS_1_2 MAKE_ARCHIVE_VERSION(1, 2, 0) /* Allow No ZLIB */
|
|
|
|
#define K_VERS_1_3 MAKE_ARCHIVE_VERSION(1, 3, 0) /* BLOBs */
|
|
|
|
#define K_VERS_1_4 MAKE_ARCHIVE_VERSION(1, 4, 0) /* Date & name in header */
|
|
|
|
#define K_VERS_1_5 MAKE_ARCHIVE_VERSION(1, 5, 0) /* Handle dependencies */
|
|
|
|
#define K_VERS_1_6 MAKE_ARCHIVE_VERSION(1, 6, 0) /* Schema field in TOCs */
|
|
|
|
#define K_VERS_1_7 MAKE_ARCHIVE_VERSION(1, 7, 0) /* File Offset size in
|
|
|
|
* header */
|
|
|
|
#define K_VERS_1_8 MAKE_ARCHIVE_VERSION(1, 8, 0) /* change interpretation
|
|
|
|
* of ID numbers and
|
|
|
|
* dependencies */
|
|
|
|
#define K_VERS_1_9 MAKE_ARCHIVE_VERSION(1, 9, 0) /* add default_with_oids
|
|
|
|
* tracking */
|
|
|
|
#define K_VERS_1_10 MAKE_ARCHIVE_VERSION(1, 10, 0) /* add tablespace */
|
|
|
|
#define K_VERS_1_11 MAKE_ARCHIVE_VERSION(1, 11, 0) /* add toc section
|
|
|
|
* indicator */
|
|
|
|
#define K_VERS_1_12 MAKE_ARCHIVE_VERSION(1, 12, 0) /* add separate BLOB
|
|
|
|
* entries */
|
Avoid using unsafe search_path settings during dump and restore.
Historically, pg_dump has "set search_path = foo, pg_catalog" when
dumping an object in schema "foo", and has also caused that setting
to be used while restoring the object. This is problematic because
functions and operators in schema "foo" could capture references meant
to refer to pg_catalog entries, both in the queries issued by pg_dump
and those issued during the subsequent restore run. That could
result in dump/restore misbehavior, or in privilege escalation if a
nefarious user installs trojan-horse functions or operators.
This patch changes pg_dump so that it does not change the search_path
dynamically. The emitted restore script sets the search_path to what
was used at dump time, and then leaves it alone thereafter. Created
objects are placed in the correct schema, regardless of the active
search_path, by dint of schema-qualifying their names in the CREATE
commands, as well as in subsequent ALTER and ALTER-like commands.
Since this change requires a change in the behavior of pg_restore
when processing an archive file made according to this new convention,
bump the archive file version number; old versions of pg_restore will
therefore refuse to process files made with new versions of pg_dump.
Security: CVE-2018-1058
2018-02-26 16:18:21 +01:00
|
|
|
#define K_VERS_1_13 MAKE_ARCHIVE_VERSION(1, 13, 0) /* change search_path
|
|
|
|
* behavior */
|
2019-03-06 18:54:38 +01:00
|
|
|
#define K_VERS_1_14 MAKE_ARCHIVE_VERSION(1, 14, 0) /* add tableam */
|
2016-10-25 18:00:00 +02:00
|
|
|
|
2019-04-26 18:03:59 +02:00
|
|
|
/* Current archive version number (the format we can output) */
|
2016-10-25 18:00:00 +02:00
|
|
|
#define K_VERS_MAJOR 1
|
2019-03-06 18:54:38 +01:00
|
|
|
#define K_VERS_MINOR 14
|
2016-10-25 18:00:00 +02:00
|
|
|
#define K_VERS_REV 0
|
2016-11-17 20:36:59 +01:00
|
|
|
#define K_VERS_SELF MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, K_VERS_REV);
|
2003-12-06 04:00:16 +01:00
|
|
|
|
2010-02-18 02:29:10 +01:00
|
|
|
/* Newest format we can read */
|
2016-10-25 18:00:00 +02:00
|
|
|
#define K_VERS_MAX MAKE_ARCHIVE_VERSION(K_VERS_MAJOR, K_VERS_MINOR, 255)
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2000-10-31 15:20:30 +01:00
|
|
|
|
2002-10-22 21:15:23 +02:00
|
|
|
/* Flags to indicate disposition of offsets stored in files */
|
|
|
|
#define K_OFFSET_POS_NOT_SET 1
|
|
|
|
#define K_OFFSET_POS_SET 2
|
|
|
|
#define K_OFFSET_NO_DATA 3
|
|
|
|
|
2013-03-24 16:27:20 +01:00
|
|
|
/*
|
|
|
|
* Special exit values from worker children. We reserve 0 for normal
|
|
|
|
* success; 1 and other small values should be interpreted as crashes.
|
|
|
|
*/
|
|
|
|
#define WORKER_OK 0
|
|
|
|
#define WORKER_CREATE_DONE 10
|
|
|
|
#define WORKER_INHIBIT_DATA 11
|
|
|
|
#define WORKER_IGNORED_ERRORS 12
|
|
|
|
|
2014-10-14 20:00:55 +02:00
|
|
|
typedef struct _archiveHandle ArchiveHandle;
|
|
|
|
typedef struct _tocEntry TocEntry;
|
2013-03-24 16:27:20 +01:00
|
|
|
struct ParallelState;
|
2013-03-24 20:35:37 +01:00
|
|
|
|
2014-05-06 02:27:16 +02:00
|
|
|
#define READ_ERROR_EXIT(fd) \
|
|
|
|
do { \
|
|
|
|
if (feof(fd)) \
|
Unified logging system for command-line programs
This unifies the various ad hoc logging (message printing, error
printing) systems used throughout the command-line programs.
Features:
- Program name is automatically prefixed.
- Message string does not end with newline. This removes a common
source of inconsistencies and omissions.
- Additionally, a final newline is automatically stripped, simplifying
use of PQerrorMessage() etc., another common source of mistakes.
- I converted error message strings to use %m where possible.
- As a result of the above several points, more translatable message
strings can be shared between different components and between
frontends and backend, without gratuitous punctuation or whitespace
differences.
- There is support for setting a "log level". This is not meant to be
user-facing, but can be used internally to implement debug or
verbose modes.
- Lazy argument evaluation, so no significant overhead if logging at
some level is disabled.
- Some color in the messages, similar to gcc and clang. Set
PG_COLOR=auto to try it out. Some colors are predefined, but can be
customized by setting PG_COLORS.
- Common files (common/, fe_utils/, etc.) can handle logging much more
simply by just using one API without worrying too much about the
context of the calling program, requiring callbacks, or having to
pass "progname" around everywhere.
- Some programs called setvbuf() to make sure that stderr is
unbuffered, even on Windows. But not all programs did that. This
is now done centrally.
Soft goals:
- Reduces vertical space use and visual complexity of error reporting
in the source code.
- Encourages more deliberate classification of messages. For example,
in some cases it wasn't clear without analyzing the surrounding code
whether a message was meant as an error or just an info.
- Concepts and terms are vaguely aligned with popular logging
frameworks such as log4j and Python logging.
This is all just about printing stuff out. Nothing affects program
flow (e.g., fatal exits). The uses are just too varied to do that.
Some existing code had wrappers that do some kind of print-and-exit,
and I adapted those.
I tried to keep the output mostly the same, but there is a lot of
historical baggage to unwind and special cases to consider, and I
might not always have succeeded. One significant change is that
pg_rewind used to write all error messages to stdout. That is now
changed to stderr.
Reviewed-by: Donald Dong <xdong@csumb.edu>
Reviewed-by: Arthur Zakirov <a.zakirov@postgrespro.ru>
Discussion: https://www.postgresql.org/message-id/flat/6a609b43-4f57-7348-6480-bd022f924310@2ndquadrant.com
2019-04-01 14:24:37 +02:00
|
|
|
fatal("could not read from input file: end of file"); \
|
2014-05-06 02:27:16 +02:00
|
|
|
else \
|
Unified logging system for command-line programs
This unifies the various ad hoc logging (message printing, error
printing) systems used throughout the command-line programs.
Features:
- Program name is automatically prefixed.
- Message string does not end with newline. This removes a common
source of inconsistencies and omissions.
- Additionally, a final newline is automatically stripped, simplifying
use of PQerrorMessage() etc., another common source of mistakes.
- I converted error message strings to use %m where possible.
- As a result of the above several points, more translatable message
strings can be shared between different components and between
frontends and backend, without gratuitous punctuation or whitespace
differences.
- There is support for setting a "log level". This is not meant to be
user-facing, but can be used internally to implement debug or
verbose modes.
- Lazy argument evaluation, so no significant overhead if logging at
some level is disabled.
- Some color in the messages, similar to gcc and clang. Set
PG_COLOR=auto to try it out. Some colors are predefined, but can be
customized by setting PG_COLORS.
- Common files (common/, fe_utils/, etc.) can handle logging much more
simply by just using one API without worrying too much about the
context of the calling program, requiring callbacks, or having to
pass "progname" around everywhere.
- Some programs called setvbuf() to make sure that stderr is
unbuffered, even on Windows. But not all programs did that. This
is now done centrally.
Soft goals:
- Reduces vertical space use and visual complexity of error reporting
in the source code.
- Encourages more deliberate classification of messages. For example,
in some cases it wasn't clear without analyzing the surrounding code
whether a message was meant as an error or just an info.
- Concepts and terms are vaguely aligned with popular logging
frameworks such as log4j and Python logging.
This is all just about printing stuff out. Nothing affects program
flow (e.g., fatal exits). The uses are just too varied to do that.
Some existing code had wrappers that do some kind of print-and-exit,
and I adapted those.
I tried to keep the output mostly the same, but there is a lot of
historical baggage to unwind and special cases to consider, and I
might not always have succeeded. One significant change is that
pg_rewind used to write all error messages to stdout. That is now
changed to stderr.
Reviewed-by: Donald Dong <xdong@csumb.edu>
Reviewed-by: Arthur Zakirov <a.zakirov@postgrespro.ru>
Discussion: https://www.postgresql.org/message-id/flat/6a609b43-4f57-7348-6480-bd022f924310@2ndquadrant.com
2019-04-01 14:24:37 +02:00
|
|
|
fatal("could not read from input file: %m"); \
|
2014-05-06 02:27:16 +02:00
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define WRITE_ERROR_EXIT \
|
|
|
|
do { \
|
Unified logging system for command-line programs
This unifies the various ad hoc logging (message printing, error
printing) systems used throughout the command-line programs.
Features:
- Program name is automatically prefixed.
- Message string does not end with newline. This removes a common
source of inconsistencies and omissions.
- Additionally, a final newline is automatically stripped, simplifying
use of PQerrorMessage() etc., another common source of mistakes.
- I converted error message strings to use %m where possible.
- As a result of the above several points, more translatable message
strings can be shared between different components and between
frontends and backend, without gratuitous punctuation or whitespace
differences.
- There is support for setting a "log level". This is not meant to be
user-facing, but can be used internally to implement debug or
verbose modes.
- Lazy argument evaluation, so no significant overhead if logging at
some level is disabled.
- Some color in the messages, similar to gcc and clang. Set
PG_COLOR=auto to try it out. Some colors are predefined, but can be
customized by setting PG_COLORS.
- Common files (common/, fe_utils/, etc.) can handle logging much more
simply by just using one API without worrying too much about the
context of the calling program, requiring callbacks, or having to
pass "progname" around everywhere.
- Some programs called setvbuf() to make sure that stderr is
unbuffered, even on Windows. But not all programs did that. This
is now done centrally.
Soft goals:
- Reduces vertical space use and visual complexity of error reporting
in the source code.
- Encourages more deliberate classification of messages. For example,
in some cases it wasn't clear without analyzing the surrounding code
whether a message was meant as an error or just an info.
- Concepts and terms are vaguely aligned with popular logging
frameworks such as log4j and Python logging.
This is all just about printing stuff out. Nothing affects program
flow (e.g., fatal exits). The uses are just too varied to do that.
Some existing code had wrappers that do some kind of print-and-exit,
and I adapted those.
I tried to keep the output mostly the same, but there is a lot of
historical baggage to unwind and special cases to consider, and I
might not always have succeeded. One significant change is that
pg_rewind used to write all error messages to stdout. That is now
changed to stderr.
Reviewed-by: Donald Dong <xdong@csumb.edu>
Reviewed-by: Arthur Zakirov <a.zakirov@postgrespro.ru>
Discussion: https://www.postgresql.org/message-id/flat/6a609b43-4f57-7348-6480-bd022f924310@2ndquadrant.com
2019-04-01 14:24:37 +02:00
|
|
|
fatal("could not write to output file: %m"); \
|
2014-05-06 02:27:16 +02:00
|
|
|
} while (0)
|
2014-05-06 18:12:18 +02:00
|
|
|
|
2013-03-24 20:35:37 +01:00
|
|
|
typedef enum T_Action
|
|
|
|
{
|
|
|
|
ACT_DUMP,
|
|
|
|
ACT_RESTORE
|
|
|
|
} T_Action;
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef void (*ClosePtrType) (ArchiveHandle *AH);
|
|
|
|
typedef void (*ReopenPtrType) (ArchiveHandle *AH);
|
|
|
|
typedef void (*ArchiveEntryPtrType) (ArchiveHandle *AH, TocEntry *te);
|
2014-10-14 20:00:55 +02:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef void (*StartDataPtrType) (ArchiveHandle *AH, TocEntry *te);
|
|
|
|
typedef void (*WriteDataPtrType) (ArchiveHandle *AH, const void *data, size_t dLen);
|
|
|
|
typedef void (*EndDataPtrType) (ArchiveHandle *AH, TocEntry *te);
|
2014-10-14 20:00:55 +02:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef void (*StartBlobsPtrType) (ArchiveHandle *AH, TocEntry *te);
|
|
|
|
typedef void (*StartBlobPtrType) (ArchiveHandle *AH, TocEntry *te, Oid oid);
|
|
|
|
typedef void (*EndBlobPtrType) (ArchiveHandle *AH, TocEntry *te, Oid oid);
|
|
|
|
typedef void (*EndBlobsPtrType) (ArchiveHandle *AH, TocEntry *te);
|
2014-10-14 20:00:55 +02:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef int (*WriteBytePtrType) (ArchiveHandle *AH, const int i);
|
|
|
|
typedef int (*ReadBytePtrType) (ArchiveHandle *AH);
|
|
|
|
typedef void (*WriteBufPtrType) (ArchiveHandle *AH, const void *c, size_t len);
|
|
|
|
typedef void (*ReadBufPtrType) (ArchiveHandle *AH, void *buf, size_t len);
|
|
|
|
typedef void (*WriteExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te);
|
|
|
|
typedef void (*ReadExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te);
|
|
|
|
typedef void (*PrintExtraTocPtrType) (ArchiveHandle *AH, TocEntry *te);
|
|
|
|
typedef void (*PrintTocDataPtrType) (ArchiveHandle *AH, TocEntry *te);
|
2014-10-14 20:00:55 +02:00
|
|
|
|
Improve parallel scheduling logic in pg_dump/pg_restore.
Previously, the way this worked was that a parallel pg_dump would
re-order the TABLE_DATA items in the dump's TOC into decreasing size
order, and separately re-order (some of) the INDEX items into decreasing
size order. Then pg_dump would dump the items in that order. Later,
parallel pg_restore just followed the TOC order. This method had lots
of deficiencies:
* TOC ordering randomly differed between parallel and non-parallel
dumps, and was hard to predict in the former case, causing problems
for building stable pg_dump test cases.
* Parallel restore only followed a well-chosen order if the dump had
been done in parallel; in particular, this never happened for restore
from custom-format dumps.
* The best order for restore isn't necessarily the same as for dump,
and it's not really static either because of locking considerations.
* TABLE_DATA and INDEX items aren't the only things that might take a lot
of work during restore. Scheduling was particularly stupid for the BLOBS
item, which might require lots of work during dump as well as restore,
but was left to the end in either case.
This patch removes the logic that changed the TOC order, fixing the
test instability problem. Instead, we sort the parallelizable items
just before processing them during a parallel dump. Independently
of that, parallel restore prioritizes the ready-to-execute tasks
based on the size of the underlying table. In the case of dependent
tasks such as index, constraint, or foreign key creation, the largest
relevant table is used as the metric for estimating the task length.
(This is pretty crude, but it should be enough to avoid the case we
want to avoid, which is ending the run with just a few large tasks
such that we can't make use of all N workers.)
Patch by me, responding to a complaint from Peter Eisentraut,
who also reviewed the patch.
Discussion: https://postgr.es/m/5137fe12-d0a2-4971-61b6-eb4e7e8875f8@2ndquadrant.com
2018-09-14 23:31:51 +02:00
|
|
|
typedef void (*PrepParallelRestorePtrType) (ArchiveHandle *AH);
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef void (*ClonePtrType) (ArchiveHandle *AH);
|
|
|
|
typedef void (*DeClonePtrType) (ArchiveHandle *AH);
|
2014-10-14 20:00:55 +02:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef int (*WorkerJobDumpPtrType) (ArchiveHandle *AH, TocEntry *te);
|
|
|
|
typedef int (*WorkerJobRestorePtrType) (ArchiveHandle *AH, TocEntry *te);
|
2013-03-24 16:27:20 +01:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef size_t (*CustomOutPtrType) (ArchiveHandle *AH, const void *buf, size_t len);
|
2001-03-22 05:01:46 +01:00
|
|
|
|
Fix pg_restore's direct-to-database mode for INSERT-style table data.
In commit 6545a901aaf84cb05212bb6a7674059908f527c3, I removed the mini SQL
lexer that was in pg_backup_db.c, thinking that it had no real purpose
beyond separating COPY data from SQL commands, which purpose had been
obsoleted by long-ago fixes in pg_dump's archive file format.
Unfortunately this was in error: that code was also used to identify
command boundaries in INSERT-style table data, which is run together as a
single string in the archive file for better compressibility. As a result,
direct-to-database restores from archive files made with --inserts or
--column-inserts fail in our latest releases, as reported by Dick Visser.
To fix, restore the mini SQL lexer, but simplify it by adjusting the
calling logic so that it's only required to cope with INSERT-style table
data, not arbitrary SQL commands. This allows us to not have to deal with
SQL comments, E'' strings, or dollar-quoted strings, none of which have
ever been emitted by dumpTableData_insert.
Also, fix the lexer to cope with standard-conforming strings, which was the
actual bug that the previous patch was meant to solve.
Back-patch to all supported branches. The previous patch went back to 8.2,
which unfortunately means that the EOL release of 8.2 contains this bug,
but I don't think we're doing another 8.2 release just because of that.
2012-01-06 19:04:09 +01:00
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
SQL_SCAN = 0, /* normal */
|
|
|
|
SQL_IN_SINGLE_QUOTE, /* '...' literal */
|
|
|
|
SQL_IN_DOUBLE_QUOTE /* "..." identifier */
|
|
|
|
} sqlparseState;
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
sqlparseState state; /* see above */
|
|
|
|
bool backSlash; /* next char is backslash quoted? */
|
|
|
|
PQExpBuffer curCmd; /* incomplete line (NULL if not created) */
|
|
|
|
} sqlparseInfo;
|
|
|
|
|
2004-08-29 07:07:03 +02:00
|
|
|
typedef enum
|
They are two different problems; the TOC entry is important for any
multiline command or to rerun the command easily later.
Whereas displaying the failed SQL command is a matter of fixing the
error
messages.
The latter is complicated by failed COPY commands which, with
die-on-errors
off, results in the data being processed as a command, so dumping the
command will dump all of the data.
In the case of long commands, should the whole command be dumped? eg.
(eg.
several pages of function definition).
In the case of the COPY command, I'm not sure what to do. Obviously, it
would be best to avoid sending the data, but the data and command are
combined (from memory). Also, the 'data' may be in the form of INSERT
statements.
Attached patch produces the first 125 chars of the command:
pg_restore: [archiver (db)] Error while PROCESSING TOC:
pg_restore: [archiver (db)] Error from TOC Entry 26; 1255 16449270
FUNCTION
plpgsql_call_handler() pjw
pg_restore: [archiver (db)] could not execute query: ERROR: function
"plpgsql_call_handler" already exists with same argument types
Command was: CREATE FUNCTION plpgsql_call_handler() RETURNS
language_handler
AS '/var/lib/pgsql-8.0b1/lib/plpgsql', 'plpgsql_call_han...
pg_restore: [archiver (db)] Error from TOC Entry 27; 1255 16449271
FUNCTION
plpgsql_validator(oid) pjw
pg_restore: [archiver (db)] could not execute query: ERROR: function
"plpgsql_validator" already exists with same argument types
Command was: CREATE FUNCTION plpgsql_validator(oid) RETURNS void
AS '/var/lib/pgsql-8.0b1/lib/plpgsql', 'plpgsql_validator'
LANGU...
Philip Warner
2004-08-20 22:00:34 +02:00
|
|
|
{
|
2004-08-29 07:07:03 +02:00
|
|
|
STAGE_NONE = 0,
|
|
|
|
STAGE_INITIALIZING,
|
|
|
|
STAGE_PROCESSING,
|
|
|
|
STAGE_FINALIZING
|
They are two different problems; the TOC entry is important for any
multiline command or to rerun the command easily later.
Whereas displaying the failed SQL command is a matter of fixing the
error
messages.
The latter is complicated by failed COPY commands which, with
die-on-errors
off, results in the data being processed as a command, so dumping the
command will dump all of the data.
In the case of long commands, should the whole command be dumped? eg.
(eg.
several pages of function definition).
In the case of the COPY command, I'm not sure what to do. Obviously, it
would be best to avoid sending the data, but the data and command are
combined (from memory). Also, the 'data' may be in the form of INSERT
statements.
Attached patch produces the first 125 chars of the command:
pg_restore: [archiver (db)] Error while PROCESSING TOC:
pg_restore: [archiver (db)] Error from TOC Entry 26; 1255 16449270
FUNCTION
plpgsql_call_handler() pjw
pg_restore: [archiver (db)] could not execute query: ERROR: function
"plpgsql_call_handler" already exists with same argument types
Command was: CREATE FUNCTION plpgsql_call_handler() RETURNS
language_handler
AS '/var/lib/pgsql-8.0b1/lib/plpgsql', 'plpgsql_call_han...
pg_restore: [archiver (db)] Error from TOC Entry 27; 1255 16449271
FUNCTION
plpgsql_validator(oid) pjw
pg_restore: [archiver (db)] could not execute query: ERROR: function
"plpgsql_validator" already exists with same argument types
Command was: CREATE FUNCTION plpgsql_validator(oid) RETURNS void
AS '/var/lib/pgsql-8.0b1/lib/plpgsql', 'plpgsql_validator'
LANGU...
Philip Warner
2004-08-20 22:00:34 +02:00
|
|
|
} ArchiverStage;
|
|
|
|
|
Fix pg_restore's direct-to-database mode for INSERT-style table data.
In commit 6545a901aaf84cb05212bb6a7674059908f527c3, I removed the mini SQL
lexer that was in pg_backup_db.c, thinking that it had no real purpose
beyond separating COPY data from SQL commands, which purpose had been
obsoleted by long-ago fixes in pg_dump's archive file format.
Unfortunately this was in error: that code was also used to identify
command boundaries in INSERT-style table data, which is run together as a
single string in the archive file for better compressibility. As a result,
direct-to-database restores from archive files made with --inserts or
--column-inserts fail in our latest releases, as reported by Dick Visser.
To fix, restore the mini SQL lexer, but simplify it by adjusting the
calling logic so that it's only required to cope with INSERT-style table
data, not arbitrary SQL commands. This allows us to not have to deal with
SQL comments, E'' strings, or dollar-quoted strings, none of which have
ever been emitted by dumpTableData_insert.
Also, fix the lexer to cope with standard-conforming strings, which was the
actual bug that the previous patch was meant to solve.
Back-patch to all supported branches. The previous patch went back to 8.2,
which unfortunately means that the EOL release of 8.2 contains this bug,
but I don't think we're doing another 8.2 release just because of that.
2012-01-06 19:04:09 +01:00
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
OUTPUT_SQLCMDS = 0, /* emitting general SQL commands */
|
|
|
|
OUTPUT_COPYDATA, /* writing COPY data */
|
|
|
|
OUTPUT_OTHERDATA /* writing data as INSERT commands */
|
|
|
|
} ArchiverOutput;
|
|
|
|
|
Fix pg_dump/pg_restore to emit REFRESH MATERIALIZED VIEW commands last.
Because we push all ACL (i.e. GRANT/REVOKE) restore steps to the end,
materialized view refreshes were occurring while the permissions on
referenced objects were still at defaults. This led to failures if,
say, an MV owned by user A reads from a table owned by user B, even
if B had granted the necessary privileges to A. We've had multiple
complaints about that type of restore failure, most recently from
Jordan Gigov.
The ideal fix for this would be to start treating ACLs as dependency-
sortable objects, rather than hard-wiring anything about their dump order
(the existing approach is a messy kluge dating to commit dc0e76ca3).
But that's going to be a rather major change, and it certainly wouldn't
lead to a back-patchable fix. As a short-term solution, convert the
existing two-pass hack (ie, normal objects then ACLs) to a three-pass hack,
ie, normal objects then ACLs then matview refreshes. Because this happens
in RestoreArchive(), it will also fix the problem when restoring from an
existing archive-format dump.
(Note this means that if a matview refresh would have failed under the
permissions prevailing at dump time, it'll fail during restore as well.
We'll define that as user error rather than something we should try
to work around.)
To avoid performance loss in parallel restore, we need the matview
refreshes to still be parallelizable. Hence, clean things up enough
so that both ACLs and matviews are handled by the parallel restore
infrastructure, instead of reverting back to serial restore for ACLs.
There is still a final serial step, but it shouldn't normally have to
do anything; it's only there to try to recover if we get stuck due to
some problem like unresolved circular dependencies.
Patch by me, but it owes something to an earlier attempt by Kevin Grittner.
Back-patch to 9.3 where materialized views were introduced.
Discussion: https://postgr.es/m/28572.1500912583@sss.pgh.pa.us
2017-08-03 23:36:23 +02:00
|
|
|
/*
|
|
|
|
* For historical reasons, ACL items are interspersed with everything else in
|
|
|
|
* a dump file's TOC; typically they're right after the object they're for.
|
|
|
|
* However, we need to restore data before ACLs, as otherwise a read-only
|
|
|
|
* table (ie one where the owner has revoked her own INSERT privilege) causes
|
|
|
|
* data restore failures. On the other hand, matview REFRESH commands should
|
|
|
|
* come out after ACLs, as otherwise non-superuser-owned matviews might not
|
|
|
|
* be able to execute. (If the permissions at the time of dumping would not
|
|
|
|
* allow a REFRESH, too bad; we won't fix that for you.) These considerations
|
|
|
|
* force us to make three passes over the TOC, restoring the appropriate
|
|
|
|
* subset of items in each pass. We assume that the dependency sort resulted
|
|
|
|
* in an appropriate ordering of items within each subset.
|
|
|
|
* XXX This mechanism should be superseded by tracking dependencies on ACLs
|
|
|
|
* properly; but we'll still need it for old dump files even after that.
|
|
|
|
*/
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
RESTORE_PASS_MAIN = 0, /* Main pass (most TOC item types) */
|
|
|
|
RESTORE_PASS_ACL, /* ACL item types */
|
|
|
|
RESTORE_PASS_REFRESH /* Matview REFRESH items */
|
|
|
|
|
|
|
|
#define RESTORE_PASS_LAST RESTORE_PASS_REFRESH
|
|
|
|
} RestorePass;
|
|
|
|
|
2005-01-25 23:44:31 +01:00
|
|
|
typedef enum
|
|
|
|
{
|
Rewrite --section option to decouple it from --schema-only/--data-only.
The initial implementation of pg_dump's --section option supposed that the
existing --schema-only and --data-only options could be made equivalent to
--section settings. This is wrong, though, due to dubious but long since
set-in-stone decisions about where to dump SEQUENCE SET items, as seen in
bug report from Martin Pitt. (And I'm not totally convinced there weren't
other bugs, either.) Undo that coupling and instead drive --section
filtering off current-section state tracked as we scan through the TOC
list to call _tocEntryRequired().
To make sure those decisions don't shift around and hopefully save a few
cycles, run _tocEntryRequired() only once per TOC entry and save the result
in a new TOC field. This required minor rejiggering of ACL handling but
also allows a far cleaner implementation of inhibit_data_for_failed_table.
Also, to ensure that pg_dump and pg_restore have the same behavior with
respect to the --section switches, add _tocEntryRequired() filtering to
WriteToc() and WriteDataChunks(), rather than trying to implement section
filtering in an entirely orthogonal way in dumpDumpableObject(). This
required adjusting the handling of the special ENCODING and STDSTRINGS
items, but they were pretty weird before anyway.
Minor other code review for the patch, too.
2012-05-30 05:22:14 +02:00
|
|
|
REQ_SCHEMA = 0x01, /* want schema */
|
|
|
|
REQ_DATA = 0x02, /* want data */
|
|
|
|
REQ_SPECIAL = 0x04 /* for special TOC entries */
|
2005-01-25 23:44:31 +01:00
|
|
|
} teReqs;
|
|
|
|
|
2014-10-14 20:00:55 +02:00
|
|
|
struct _archiveHandle
|
2001-03-22 05:01:46 +01:00
|
|
|
{
|
|
|
|
Archive public; /* Public part of archive */
|
2016-10-25 18:00:00 +02:00
|
|
|
int version; /* Version of file */
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2005-10-15 04:49:52 +02:00
|
|
|
char *archiveRemoteVersion; /* When reading an archive, the
|
|
|
|
* version of the dumped DB */
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
char *archiveDumpVersion; /* When reading an archive, the version of
|
|
|
|
* the dumper */
|
2004-11-06 20:36:02 +01:00
|
|
|
|
2002-08-20 19:54:45 +02:00
|
|
|
size_t intSize; /* Size of an integer in the archive */
|
2003-08-04 02:43:34 +02:00
|
|
|
size_t offSize; /* Size of a file offset in the archive -
|
|
|
|
* Added V1.7 */
|
2001-03-22 05:01:46 +01:00
|
|
|
ArchiveFormat format; /* Archive format */
|
2000-07-11 15:07:17 +02:00
|
|
|
|
Fix pg_restore's direct-to-database mode for INSERT-style table data.
In commit 6545a901aaf84cb05212bb6a7674059908f527c3, I removed the mini SQL
lexer that was in pg_backup_db.c, thinking that it had no real purpose
beyond separating COPY data from SQL commands, which purpose had been
obsoleted by long-ago fixes in pg_dump's archive file format.
Unfortunately this was in error: that code was also used to identify
command boundaries in INSERT-style table data, which is run together as a
single string in the archive file for better compressibility. As a result,
direct-to-database restores from archive files made with --inserts or
--column-inserts fail in our latest releases, as reported by Dick Visser.
To fix, restore the mini SQL lexer, but simplify it by adjusting the
calling logic so that it's only required to cope with INSERT-style table
data, not arbitrary SQL commands. This allows us to not have to deal with
SQL comments, E'' strings, or dollar-quoted strings, none of which have
ever been emitted by dumpTableData_insert.
Also, fix the lexer to cope with standard-conforming strings, which was the
actual bug that the previous patch was meant to solve.
Back-patch to all supported branches. The previous patch went back to 8.2,
which unfortunately means that the EOL release of 8.2 contains this bug,
but I don't think we're doing another 8.2 release just because of that.
2012-01-06 19:04:09 +01:00
|
|
|
sqlparseInfo sqlparse; /* state for parsing INSERT data */
|
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
time_t createDate; /* Date archive created */
|
2000-07-21 13:40:08 +02:00
|
|
|
|
|
|
|
/*
|
2001-03-22 05:01:46 +01:00
|
|
|
* Fields used when discovering header. A format can always get the
|
|
|
|
* previous read bytes from here...
|
2000-07-21 13:40:08 +02:00
|
|
|
*/
|
2005-10-15 04:49:52 +02:00
|
|
|
int readHeader; /* Used if file header has been read already */
|
|
|
|
char *lookahead; /* Buffer used when reading header to discover
|
|
|
|
* format */
|
2002-08-20 19:54:45 +02:00
|
|
|
size_t lookaheadSize; /* Size of allocated buffer */
|
|
|
|
size_t lookaheadLen; /* Length of data in lookahead */
|
2007-02-19 16:05:06 +01:00
|
|
|
pgoff_t lookaheadPos; /* Current read position in lookahead buffer */
|
2001-03-22 05:01:46 +01:00
|
|
|
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
ArchiveEntryPtrType ArchiveEntryPtr; /* Called for each metadata object */
|
|
|
|
StartDataPtrType StartDataPtr; /* Called when table data is about to be
|
|
|
|
* dumped */
|
|
|
|
WriteDataPtrType WriteDataPtr; /* Called to send some table data to the
|
|
|
|
* archive */
|
2017-05-17 22:31:56 +02:00
|
|
|
EndDataPtrType EndDataPtr; /* Called when table data dump is finished */
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
WriteBytePtrType WriteBytePtr; /* Write a byte to output */
|
2016-08-30 18:00:00 +02:00
|
|
|
ReadBytePtrType ReadBytePtr; /* Read a byte from an archive */
|
|
|
|
WriteBufPtrType WriteBufPtr; /* Write a buffer of output to the archive */
|
2017-05-17 22:31:56 +02:00
|
|
|
ReadBufPtrType ReadBufPtr; /* Read a buffer of input from the archive */
|
|
|
|
ClosePtrType ClosePtr; /* Close the archive */
|
|
|
|
ReopenPtrType ReopenPtr; /* Reopen the archive */
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
WriteExtraTocPtrType WriteExtraTocPtr; /* Write extra TOC entry data
|
|
|
|
* associated with the current
|
|
|
|
* archive format */
|
|
|
|
ReadExtraTocPtrType ReadExtraTocPtr; /* Read extra info associated with
|
|
|
|
* archive format */
|
|
|
|
PrintExtraTocPtrType PrintExtraTocPtr; /* Extra TOC info for format */
|
2016-08-30 18:00:00 +02:00
|
|
|
PrintTocDataPtrType PrintTocDataPtr;
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
StartBlobsPtrType StartBlobsPtr;
|
|
|
|
EndBlobsPtrType EndBlobsPtr;
|
|
|
|
StartBlobPtrType StartBlobPtr;
|
|
|
|
EndBlobPtrType EndBlobPtr;
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
SetupWorkerPtrType SetupWorkerPtr;
|
|
|
|
WorkerJobDumpPtrType WorkerJobDumpPtr;
|
|
|
|
WorkerJobRestorePtrType WorkerJobRestorePtr;
|
2013-03-24 16:27:20 +01:00
|
|
|
|
Improve parallel scheduling logic in pg_dump/pg_restore.
Previously, the way this worked was that a parallel pg_dump would
re-order the TABLE_DATA items in the dump's TOC into decreasing size
order, and separately re-order (some of) the INDEX items into decreasing
size order. Then pg_dump would dump the items in that order. Later,
parallel pg_restore just followed the TOC order. This method had lots
of deficiencies:
* TOC ordering randomly differed between parallel and non-parallel
dumps, and was hard to predict in the former case, causing problems
for building stable pg_dump test cases.
* Parallel restore only followed a well-chosen order if the dump had
been done in parallel; in particular, this never happened for restore
from custom-format dumps.
* The best order for restore isn't necessarily the same as for dump,
and it's not really static either because of locking considerations.
* TABLE_DATA and INDEX items aren't the only things that might take a lot
of work during restore. Scheduling was particularly stupid for the BLOBS
item, which might require lots of work during dump as well as restore,
but was left to the end in either case.
This patch removes the logic that changed the TOC order, fixing the
test instability problem. Instead, we sort the parallelizable items
just before processing them during a parallel dump. Independently
of that, parallel restore prioritizes the ready-to-execute tasks
based on the size of the underlying table. In the case of dependent
tasks such as index, constraint, or foreign key creation, the largest
relevant table is used as the metric for estimating the task length.
(This is pretty crude, but it should be enough to avoid the case we
want to avoid, which is ending the run with just a few large tasks
such that we can't make use of all N workers.)
Patch by me, responding to a complaint from Peter Eisentraut,
who also reviewed the patch.
Discussion: https://postgr.es/m/5137fe12-d0a2-4971-61b6-eb4e7e8875f8@2ndquadrant.com
2018-09-14 23:31:51 +02:00
|
|
|
PrepParallelRestorePtrType PrepParallelRestorePtr;
|
2017-05-17 22:31:56 +02:00
|
|
|
ClonePtrType ClonePtr; /* Clone format-specific fields */
|
|
|
|
DeClonePtrType DeClonePtr; /* Clean up cloned fields */
|
2009-02-02 21:07:37 +01:00
|
|
|
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
CustomOutPtrType CustomOutPtr; /* Alternative script output routine */
|
2000-07-21 13:40:08 +02:00
|
|
|
|
|
|
|
/* Stuff for direct DB connection */
|
2001-03-22 05:01:46 +01:00
|
|
|
char *archdbname; /* DB name *read* from archive */
|
2014-10-14 20:00:55 +02:00
|
|
|
trivalue promptPassword;
|
2009-06-11 16:49:15 +02:00
|
|
|
char *savedPassword; /* password for ropt->username, if known */
|
2013-03-24 16:27:20 +01:00
|
|
|
char *use_role;
|
2001-03-22 05:01:46 +01:00
|
|
|
PGconn *connection;
|
Redesign handling of SIGTERM/control-C in parallel pg_dump/pg_restore.
Formerly, Unix builds of pg_dump/pg_restore would trap SIGINT and similar
signals and set a flag that was tested in various data-transfer loops.
This was prone to errors of omission (cf commit 3c8aa6654); and even if
the client-side response was prompt, we did nothing that would cause
long-running SQL commands (e.g. CREATE INDEX) to terminate early.
Also, the master process would effectively do nothing at all upon receipt
of SIGINT; the only reason it seemed to work was that in typical scenarios
the signal would also be delivered to the child processes. We should
support termination when a signal is delivered only to the master process,
though.
Windows builds had no console interrupt handler, so they would just fall
over immediately at control-C, again leaving long-running SQL commands to
finish unmolested.
To fix, remove the flag-checking approach altogether. Instead, allow the
Unix signal handler to send a cancel request directly and then exit(1).
In the master process, also have it forward the signal to the children.
On Windows, add a console interrupt handler that behaves approximately
the same. The main difference is that a single execution of the Windows
handler can send all the cancel requests since all the info is available
in one process, whereas on Unix each process sends a cancel only for its
own database connection.
In passing, fix an old problem that DisconnectDatabase tends to send a
cancel request before exiting a parallel worker, even if nothing went
wrong. This is at least a waste of cycles, and could lead to unexpected
log messages, or maybe even data loss if it happened in pg_restore (though
in the current code the problem seems to affect only pg_dump). The cause
was that after a COPY step, pg_dump was leaving libpq in PGASYNC_BUSY
state, causing PQtransactionStatus() to report PQTRANS_ACTIVE. That's
normally harmless because the next PQexec() will silently clear the
PGASYNC_BUSY state; but in a parallel worker we might exit without any
additional SQL commands after a COPY step. So add an extra PQgetResult()
call after a COPY to allow libpq to return to PGASYNC_IDLE state.
This is a bug fix, IMO, so back-patch to 9.3 where parallel dump/restore
were introduced.
Thanks to Kyotaro Horiguchi for Windows testing and code suggestions.
Original-Patch: <7005.1464657274@sss.pgh.pa.us>
Discussion: <20160602.174941.256342236.horiguchi.kyotaro@lab.ntt.co.jp>
2016-06-02 19:27:53 +02:00
|
|
|
/* If connCancel isn't NULL, SIGINT handler will send a cancel */
|
|
|
|
PGcancel *volatile connCancel;
|
|
|
|
|
2005-10-15 04:49:52 +02:00
|
|
|
int connectToDB; /* Flag to indicate if direct DB connection is
|
|
|
|
* required */
|
Fix pg_restore's direct-to-database mode for INSERT-style table data.
In commit 6545a901aaf84cb05212bb6a7674059908f527c3, I removed the mini SQL
lexer that was in pg_backup_db.c, thinking that it had no real purpose
beyond separating COPY data from SQL commands, which purpose had been
obsoleted by long-ago fixes in pg_dump's archive file format.
Unfortunately this was in error: that code was also used to identify
command boundaries in INSERT-style table data, which is run together as a
single string in the archive file for better compressibility. As a result,
direct-to-database restores from archive files made with --inserts or
--column-inserts fail in our latest releases, as reported by Dick Visser.
To fix, restore the mini SQL lexer, but simplify it by adjusting the
calling logic so that it's only required to cope with INSERT-style table
data, not arbitrary SQL commands. This allows us to not have to deal with
SQL comments, E'' strings, or dollar-quoted strings, none of which have
ever been emitted by dumpTableData_insert.
Also, fix the lexer to cope with standard-conforming strings, which was the
actual bug that the previous patch was meant to solve.
Back-patch to all supported branches. The previous patch went back to 8.2,
which unfortunately means that the EOL release of 8.2 contains this bug,
but I don't think we're doing another 8.2 release just because of that.
2012-01-06 19:04:09 +01:00
|
|
|
ArchiverOutput outputKind; /* Flag for what we're currently writing */
|
2006-02-05 21:58:47 +01:00
|
|
|
bool pgCopyIn; /* Currently in libpq 'COPY IN' mode. */
|
2001-03-22 05:01:46 +01:00
|
|
|
|
|
|
|
int loFd; /* BLOB fd */
|
|
|
|
int writingBlob; /* Flag */
|
|
|
|
int blobCount; /* # of blobs restored */
|
|
|
|
|
|
|
|
char *fSpec; /* Archive File Spec */
|
|
|
|
FILE *FH; /* General purpose file handle */
|
|
|
|
void *OF;
|
|
|
|
int gzOut; /* Output file */
|
|
|
|
|
2012-05-29 02:38:28 +02:00
|
|
|
struct _tocEntry *toc; /* Header of circular list of TOC entries */
|
2001-03-22 05:01:46 +01:00
|
|
|
int tocCount; /* Number of TOC entries */
|
2003-12-06 04:00:16 +01:00
|
|
|
DumpId maxDumpId; /* largest DumpId among all TOC entries */
|
|
|
|
|
2012-05-29 02:38:28 +02:00
|
|
|
/* arrays created after the TOC list is complete: */
|
2012-06-10 21:20:04 +02:00
|
|
|
struct _tocEntry **tocsByDumpId; /* TOCs indexed by dumpId */
|
2012-05-29 02:38:28 +02:00
|
|
|
DumpId *tableDataId; /* TABLE DATA ids, indexed by table dumpId */
|
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
struct _tocEntry *currToc; /* Used when dumping data */
|
2011-04-10 17:42:00 +02:00
|
|
|
int compression; /* Compression requested on open Possible
|
|
|
|
* values for compression: -1
|
|
|
|
* Z_DEFAULT_COMPRESSION 0 COMPRESSION_NONE
|
|
|
|
* 1-9 levels for gzip compression */
|
2017-03-22 15:00:30 +01:00
|
|
|
bool dosync; /* data requested to be synced on sight */
|
2001-03-22 05:01:46 +01:00
|
|
|
ArchiveMode mode; /* File mode - r or w */
|
|
|
|
void *formatData; /* Header data specific to file format */
|
|
|
|
|
2003-10-03 22:10:59 +02:00
|
|
|
/* these vars track state to avoid sending redundant SET commands */
|
2009-02-02 21:07:37 +01:00
|
|
|
char *currUser; /* current username, or NULL if unknown */
|
|
|
|
char *currSchema; /* current schema, or NULL */
|
|
|
|
char *currTablespace; /* current tablespace, or NULL */
|
2019-03-06 18:54:38 +01:00
|
|
|
char *currTableAm; /* current table access method, or NULL */
|
2004-08-29 07:07:03 +02:00
|
|
|
|
2002-09-04 22:31:48 +02:00
|
|
|
void *lo_buf;
|
2002-08-20 19:54:45 +02:00
|
|
|
size_t lo_buf_used;
|
|
|
|
size_t lo_buf_size;
|
They are two different problems; the TOC entry is important for any
multiline command or to rerun the command easily later.
Whereas displaying the failed SQL command is a matter of fixing the
error
messages.
The latter is complicated by failed COPY commands which, with
die-on-errors
off, results in the data being processed as a command, so dumping the
command will dump all of the data.
In the case of long commands, should the whole command be dumped? eg.
(eg.
several pages of function definition).
In the case of the COPY command, I'm not sure what to do. Obviously, it
would be best to avoid sending the data, but the data and command are
combined (from memory). Also, the 'data' may be in the form of INSERT
statements.
Attached patch produces the first 125 chars of the command:
pg_restore: [archiver (db)] Error while PROCESSING TOC:
pg_restore: [archiver (db)] Error from TOC Entry 26; 1255 16449270
FUNCTION
plpgsql_call_handler() pjw
pg_restore: [archiver (db)] could not execute query: ERROR: function
"plpgsql_call_handler" already exists with same argument types
Command was: CREATE FUNCTION plpgsql_call_handler() RETURNS
language_handler
AS '/var/lib/pgsql-8.0b1/lib/plpgsql', 'plpgsql_call_han...
pg_restore: [archiver (db)] Error from TOC Entry 27; 1255 16449271
FUNCTION
plpgsql_validator(oid) pjw
pg_restore: [archiver (db)] could not execute query: ERROR: function
"plpgsql_validator" already exists with same argument types
Command was: CREATE FUNCTION plpgsql_validator(oid) RETURNS void
AS '/var/lib/pgsql-8.0b1/lib/plpgsql', 'plpgsql_validator'
LANGU...
Philip Warner
2004-08-20 22:00:34 +02:00
|
|
|
|
2004-08-29 07:07:03 +02:00
|
|
|
int noTocComments;
|
|
|
|
ArchiverStage stage;
|
|
|
|
ArchiverStage lastErrorStage;
|
Fix pg_dump/pg_restore to emit REFRESH MATERIALIZED VIEW commands last.
Because we push all ACL (i.e. GRANT/REVOKE) restore steps to the end,
materialized view refreshes were occurring while the permissions on
referenced objects were still at defaults. This led to failures if,
say, an MV owned by user A reads from a table owned by user B, even
if B had granted the necessary privileges to A. We've had multiple
complaints about that type of restore failure, most recently from
Jordan Gigov.
The ideal fix for this would be to start treating ACLs as dependency-
sortable objects, rather than hard-wiring anything about their dump order
(the existing approach is a messy kluge dating to commit dc0e76ca3).
But that's going to be a rather major change, and it certainly wouldn't
lead to a back-patchable fix. As a short-term solution, convert the
existing two-pass hack (ie, normal objects then ACLs) to a three-pass hack,
ie, normal objects then ACLs then matview refreshes. Because this happens
in RestoreArchive(), it will also fix the problem when restoring from an
existing archive-format dump.
(Note this means that if a matview refresh would have failed under the
permissions prevailing at dump time, it'll fail during restore as well.
We'll define that as user error rather than something we should try
to work around.)
To avoid performance loss in parallel restore, we need the matview
refreshes to still be parallelizable. Hence, clean things up enough
so that both ACLs and matviews are handled by the parallel restore
infrastructure, instead of reverting back to serial restore for ACLs.
There is still a final serial step, but it shouldn't normally have to
do anything; it's only there to try to recover if we get stuck due to
some problem like unresolved circular dependencies.
Patch by me, but it owes something to an earlier attempt by Kevin Grittner.
Back-patch to 9.3 where materialized views were introduced.
Discussion: https://postgr.es/m/28572.1500912583@sss.pgh.pa.us
2017-08-03 23:36:23 +02:00
|
|
|
RestorePass restorePass; /* used only during parallel restore */
|
2004-08-29 07:07:03 +02:00
|
|
|
struct _tocEntry *currentTE;
|
|
|
|
struct _tocEntry *lastErrorTE;
|
2014-10-14 20:00:55 +02:00
|
|
|
};
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2014-10-14 20:00:55 +02:00
|
|
|
struct _tocEntry
|
2001-03-22 05:01:46 +01:00
|
|
|
{
|
|
|
|
struct _tocEntry *prev;
|
|
|
|
struct _tocEntry *next;
|
2003-12-06 04:00:16 +01:00
|
|
|
CatalogId catalogId;
|
|
|
|
DumpId dumpId;
|
2009-06-11 16:49:15 +02:00
|
|
|
teSection section;
|
2005-10-15 04:49:52 +02:00
|
|
|
bool hadDumper; /* Archiver was passed a dumper routine (used
|
|
|
|
* in restore) */
|
2002-09-04 22:31:48 +02:00
|
|
|
char *tag; /* index tag */
|
2002-05-11 00:36:27 +02:00
|
|
|
char *namespace; /* null or empty string if not in a schema */
|
2004-11-06 20:36:02 +01:00
|
|
|
char *tablespace; /* null if not in a tablespace; empty string
|
|
|
|
* means use database default */
|
2019-03-06 18:54:38 +01:00
|
|
|
char *tableam; /* table access method, only for TABLE tags */
|
2002-05-11 00:36:27 +02:00
|
|
|
char *owner;
|
2001-03-22 05:01:46 +01:00
|
|
|
char *desc;
|
|
|
|
char *defn;
|
|
|
|
char *dropStmt;
|
|
|
|
char *copyStmt;
|
2003-12-06 04:00:16 +01:00
|
|
|
DumpId *dependencies; /* dumpIds of objects this one depends on */
|
|
|
|
int nDeps; /* number of dependencies */
|
2001-04-01 07:42:51 +02:00
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
DataDumperPtr dataDumper; /* Routine to dump data for object */
|
|
|
|
void *dataDumperArg; /* Arg for above routine */
|
|
|
|
void *formatData; /* TOC Entry data specific to file format */
|
2009-02-02 21:07:37 +01:00
|
|
|
|
Rewrite --section option to decouple it from --schema-only/--data-only.
The initial implementation of pg_dump's --section option supposed that the
existing --schema-only and --data-only options could be made equivalent to
--section settings. This is wrong, though, due to dubious but long since
set-in-stone decisions about where to dump SEQUENCE SET items, as seen in
bug report from Martin Pitt. (And I'm not totally convinced there weren't
other bugs, either.) Undo that coupling and instead drive --section
filtering off current-section state tracked as we scan through the TOC
list to call _tocEntryRequired().
To make sure those decisions don't shift around and hopefully save a few
cycles, run _tocEntryRequired() only once per TOC entry and save the result
in a new TOC field. This required minor rejiggering of ACL handling but
also allows a far cleaner implementation of inhibit_data_for_failed_table.
Also, to ensure that pg_dump and pg_restore have the same behavior with
respect to the --section switches, add _tocEntryRequired() filtering to
WriteToc() and WriteDataChunks(), rather than trying to implement section
filtering in an entirely orthogonal way in dumpDumpableObject(). This
required adjusting the handling of the special ENCODING and STDSTRINGS
items, but they were pretty weird before anyway.
Minor other code review for the patch, too.
2012-05-30 05:22:14 +02:00
|
|
|
/* working state while dumping/restoring */
|
Improve parallel scheduling logic in pg_dump/pg_restore.
Previously, the way this worked was that a parallel pg_dump would
re-order the TABLE_DATA items in the dump's TOC into decreasing size
order, and separately re-order (some of) the INDEX items into decreasing
size order. Then pg_dump would dump the items in that order. Later,
parallel pg_restore just followed the TOC order. This method had lots
of deficiencies:
* TOC ordering randomly differed between parallel and non-parallel
dumps, and was hard to predict in the former case, causing problems
for building stable pg_dump test cases.
* Parallel restore only followed a well-chosen order if the dump had
been done in parallel; in particular, this never happened for restore
from custom-format dumps.
* The best order for restore isn't necessarily the same as for dump,
and it's not really static either because of locking considerations.
* TABLE_DATA and INDEX items aren't the only things that might take a lot
of work during restore. Scheduling was particularly stupid for the BLOBS
item, which might require lots of work during dump as well as restore,
but was left to the end in either case.
This patch removes the logic that changed the TOC order, fixing the
test instability problem. Instead, we sort the parallelizable items
just before processing them during a parallel dump. Independently
of that, parallel restore prioritizes the ready-to-execute tasks
based on the size of the underlying table. In the case of dependent
tasks such as index, constraint, or foreign key creation, the largest
relevant table is used as the metric for estimating the task length.
(This is pretty crude, but it should be enough to avoid the case we
want to avoid, which is ending the run with just a few large tasks
such that we can't make use of all N workers.)
Patch by me, responding to a complaint from Peter Eisentraut,
who also reviewed the patch.
Discussion: https://postgr.es/m/5137fe12-d0a2-4971-61b6-eb4e7e8875f8@2ndquadrant.com
2018-09-14 23:31:51 +02:00
|
|
|
pgoff_t dataLength; /* item's data size; 0 if none or unknown */
|
Rewrite --section option to decouple it from --schema-only/--data-only.
The initial implementation of pg_dump's --section option supposed that the
existing --schema-only and --data-only options could be made equivalent to
--section settings. This is wrong, though, due to dubious but long since
set-in-stone decisions about where to dump SEQUENCE SET items, as seen in
bug report from Martin Pitt. (And I'm not totally convinced there weren't
other bugs, either.) Undo that coupling and instead drive --section
filtering off current-section state tracked as we scan through the TOC
list to call _tocEntryRequired().
To make sure those decisions don't shift around and hopefully save a few
cycles, run _tocEntryRequired() only once per TOC entry and save the result
in a new TOC field. This required minor rejiggering of ACL handling but
also allows a far cleaner implementation of inhibit_data_for_failed_table.
Also, to ensure that pg_dump and pg_restore have the same behavior with
respect to the --section switches, add _tocEntryRequired() filtering to
WriteToc() and WriteDataChunks(), rather than trying to implement section
filtering in an entirely orthogonal way in dumpDumpableObject(). This
required adjusting the handling of the special ENCODING and STDSTRINGS
items, but they were pretty weird before anyway.
Minor other code review for the patch, too.
2012-05-30 05:22:14 +02:00
|
|
|
teReqs reqs; /* do we need schema and/or data of object */
|
|
|
|
bool created; /* set for DATA member if TABLE was created */
|
2011-12-17 01:09:38 +01:00
|
|
|
|
2009-02-02 21:07:37 +01:00
|
|
|
/* working state (needed only for parallel restore) */
|
Improve parallel scheduling logic in pg_dump/pg_restore.
Previously, the way this worked was that a parallel pg_dump would
re-order the TABLE_DATA items in the dump's TOC into decreasing size
order, and separately re-order (some of) the INDEX items into decreasing
size order. Then pg_dump would dump the items in that order. Later,
parallel pg_restore just followed the TOC order. This method had lots
of deficiencies:
* TOC ordering randomly differed between parallel and non-parallel
dumps, and was hard to predict in the former case, causing problems
for building stable pg_dump test cases.
* Parallel restore only followed a well-chosen order if the dump had
been done in parallel; in particular, this never happened for restore
from custom-format dumps.
* The best order for restore isn't necessarily the same as for dump,
and it's not really static either because of locking considerations.
* TABLE_DATA and INDEX items aren't the only things that might take a lot
of work during restore. Scheduling was particularly stupid for the BLOBS
item, which might require lots of work during dump as well as restore,
but was left to the end in either case.
This patch removes the logic that changed the TOC order, fixing the
test instability problem. Instead, we sort the parallelizable items
just before processing them during a parallel dump. Independently
of that, parallel restore prioritizes the ready-to-execute tasks
based on the size of the underlying table. In the case of dependent
tasks such as index, constraint, or foreign key creation, the largest
relevant table is used as the metric for estimating the task length.
(This is pretty crude, but it should be enough to avoid the case we
want to avoid, which is ending the run with just a few large tasks
such that we can't make use of all N workers.)
Patch by me, responding to a complaint from Peter Eisentraut,
who also reviewed the patch.
Discussion: https://postgr.es/m/5137fe12-d0a2-4971-61b6-eb4e7e8875f8@2ndquadrant.com
2018-09-14 23:31:51 +02:00
|
|
|
struct _tocEntry *pending_prev; /* list links for pending-items list; */
|
|
|
|
struct _tocEntry *pending_next; /* NULL if not in that list */
|
2009-02-02 21:07:37 +01:00
|
|
|
int depCount; /* number of dependencies not yet restored */
|
2010-12-09 19:03:11 +01:00
|
|
|
DumpId *revDeps; /* dumpIds of objects depending on this one */
|
|
|
|
int nRevDeps; /* number of such dependencies */
|
2009-02-02 21:07:37 +01:00
|
|
|
DumpId *lockDeps; /* dumpIds of objects this one needs lock on */
|
|
|
|
int nLockDeps; /* number of such dependencies */
|
2014-10-14 20:00:55 +02:00
|
|
|
};
|
2000-07-11 15:07:17 +02:00
|
|
|
|
Redesign parallel dump/restore's wait-for-workers logic.
The ListenToWorkers/ReapWorkerStatus APIs were messy and hard to use.
Instead, make DispatchJobForTocEntry register a callback function that
will take care of state cleanup, doing whatever had been done by the caller
of ReapWorkerStatus in the old design. (This callback is essentially just
the old mark_work_done function in the restore case, and a trivial test for
worker failure in the dump case.) Then we can have ListenToWorkers call
the callback immediately on receipt of a status message, and return the
worker to WRKR_IDLE state; so the WRKR_FINISHED state goes away.
This allows us to design a unified wait-for-worker-messages loop:
WaitForWorkers replaces EnsureIdleWorker and EnsureWorkersFinished as well
as the mess in restore_toc_entries_parallel. Also, we no longer need the
fragile API spec that the caller of DispatchJobForTocEntry is responsible
for ensuring there's an idle worker, since DispatchJobForTocEntry can just
wait until there is one.
In passing, I got rid of the ParallelArgs struct, which was a net negative
in terms of notational verboseness, and didn't seem to be providing any
noticeable amount of abstraction either.
Tom Lane, reviewed by Kevin Grittner
Discussion: <1188.1464544443@sss.pgh.pa.us>
2016-09-27 19:22:39 +02:00
|
|
|
extern int parallel_restore(ArchiveHandle *AH, TocEntry *te);
|
2012-03-20 22:38:11 +01:00
|
|
|
extern void on_exit_close_archive(Archive *AHX);
|
2003-12-06 04:00:16 +01:00
|
|
|
|
Unified logging system for command-line programs
This unifies the various ad hoc logging (message printing, error
printing) systems used throughout the command-line programs.
Features:
- Program name is automatically prefixed.
- Message string does not end with newline. This removes a common
source of inconsistencies and omissions.
- Additionally, a final newline is automatically stripped, simplifying
use of PQerrorMessage() etc., another common source of mistakes.
- I converted error message strings to use %m where possible.
- As a result of the above several points, more translatable message
strings can be shared between different components and between
frontends and backend, without gratuitous punctuation or whitespace
differences.
- There is support for setting a "log level". This is not meant to be
user-facing, but can be used internally to implement debug or
verbose modes.
- Lazy argument evaluation, so no significant overhead if logging at
some level is disabled.
- Some color in the messages, similar to gcc and clang. Set
PG_COLOR=auto to try it out. Some colors are predefined, but can be
customized by setting PG_COLORS.
- Common files (common/, fe_utils/, etc.) can handle logging much more
simply by just using one API without worrying too much about the
context of the calling program, requiring callbacks, or having to
pass "progname" around everywhere.
- Some programs called setvbuf() to make sure that stderr is
unbuffered, even on Windows. But not all programs did that. This
is now done centrally.
Soft goals:
- Reduces vertical space use and visual complexity of error reporting
in the source code.
- Encourages more deliberate classification of messages. For example,
in some cases it wasn't clear without analyzing the surrounding code
whether a message was meant as an error or just an info.
- Concepts and terms are vaguely aligned with popular logging
frameworks such as log4j and Python logging.
This is all just about printing stuff out. Nothing affects program
flow (e.g., fatal exits). The uses are just too varied to do that.
Some existing code had wrappers that do some kind of print-and-exit,
and I adapted those.
I tried to keep the output mostly the same, but there is a lot of
historical baggage to unwind and special cases to consider, and I
might not always have succeeded. One significant change is that
pg_rewind used to write all error messages to stdout. That is now
changed to stderr.
Reviewed-by: Donald Dong <xdong@csumb.edu>
Reviewed-by: Arthur Zakirov <a.zakirov@postgrespro.ru>
Discussion: https://www.postgresql.org/message-id/flat/6a609b43-4f57-7348-6480-bd022f924310@2ndquadrant.com
2019-04-01 14:24:37 +02:00
|
|
|
extern void warn_or_exit_horribly(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3);
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2019-02-01 15:29:42 +01:00
|
|
|
/* Options for ArchiveEntry */
|
|
|
|
typedef struct _archiveOpts
|
|
|
|
{
|
|
|
|
const char *tag;
|
|
|
|
const char *namespace;
|
|
|
|
const char *tablespace;
|
2019-03-06 18:54:38 +01:00
|
|
|
const char *tableam;
|
2019-02-01 15:29:42 +01:00
|
|
|
const char *owner;
|
|
|
|
const char *description;
|
|
|
|
teSection section;
|
|
|
|
const char *createStmt;
|
|
|
|
const char *dropStmt;
|
|
|
|
const char *copyStmt;
|
|
|
|
const DumpId *deps;
|
|
|
|
int nDeps;
|
|
|
|
DataDumperPtr dumpFn;
|
|
|
|
void *dumpArg;
|
|
|
|
} ArchiveOpts;
|
|
|
|
#define ARCHIVE_OPTS(...) &(ArchiveOpts){__VA_ARGS__}
|
Improve parallel scheduling logic in pg_dump/pg_restore.
Previously, the way this worked was that a parallel pg_dump would
re-order the TABLE_DATA items in the dump's TOC into decreasing size
order, and separately re-order (some of) the INDEX items into decreasing
size order. Then pg_dump would dump the items in that order. Later,
parallel pg_restore just followed the TOC order. This method had lots
of deficiencies:
* TOC ordering randomly differed between parallel and non-parallel
dumps, and was hard to predict in the former case, causing problems
for building stable pg_dump test cases.
* Parallel restore only followed a well-chosen order if the dump had
been done in parallel; in particular, this never happened for restore
from custom-format dumps.
* The best order for restore isn't necessarily the same as for dump,
and it's not really static either because of locking considerations.
* TABLE_DATA and INDEX items aren't the only things that might take a lot
of work during restore. Scheduling was particularly stupid for the BLOBS
item, which might require lots of work during dump as well as restore,
but was left to the end in either case.
This patch removes the logic that changed the TOC order, fixing the
test instability problem. Instead, we sort the parallelizable items
just before processing them during a parallel dump. Independently
of that, parallel restore prioritizes the ready-to-execute tasks
based on the size of the underlying table. In the case of dependent
tasks such as index, constraint, or foreign key creation, the largest
relevant table is used as the metric for estimating the task length.
(This is pretty crude, but it should be enough to avoid the case we
want to avoid, which is ending the run with just a few large tasks
such that we can't make use of all N workers.)
Patch by me, responding to a complaint from Peter Eisentraut,
who also reviewed the patch.
Discussion: https://postgr.es/m/5137fe12-d0a2-4971-61b6-eb4e7e8875f8@2ndquadrant.com
2018-09-14 23:31:51 +02:00
|
|
|
/* Called to add a TOC entry */
|
2019-02-01 15:29:42 +01:00
|
|
|
extern TocEntry *ArchiveEntry(Archive *AHX, CatalogId catalogId,
|
2019-05-22 19:04:48 +02:00
|
|
|
DumpId dumpId, ArchiveOpts *opts);
|
Improve parallel scheduling logic in pg_dump/pg_restore.
Previously, the way this worked was that a parallel pg_dump would
re-order the TABLE_DATA items in the dump's TOC into decreasing size
order, and separately re-order (some of) the INDEX items into decreasing
size order. Then pg_dump would dump the items in that order. Later,
parallel pg_restore just followed the TOC order. This method had lots
of deficiencies:
* TOC ordering randomly differed between parallel and non-parallel
dumps, and was hard to predict in the former case, causing problems
for building stable pg_dump test cases.
* Parallel restore only followed a well-chosen order if the dump had
been done in parallel; in particular, this never happened for restore
from custom-format dumps.
* The best order for restore isn't necessarily the same as for dump,
and it's not really static either because of locking considerations.
* TABLE_DATA and INDEX items aren't the only things that might take a lot
of work during restore. Scheduling was particularly stupid for the BLOBS
item, which might require lots of work during dump as well as restore,
but was left to the end in either case.
This patch removes the logic that changed the TOC order, fixing the
test instability problem. Instead, we sort the parallelizable items
just before processing them during a parallel dump. Independently
of that, parallel restore prioritizes the ready-to-execute tasks
based on the size of the underlying table. In the case of dependent
tasks such as index, constraint, or foreign key creation, the largest
relevant table is used as the metric for estimating the task length.
(This is pretty crude, but it should be enough to avoid the case we
want to avoid, which is ending the run with just a few large tasks
such that we can't make use of all N workers.)
Patch by me, responding to a complaint from Peter Eisentraut,
who also reviewed the patch.
Discussion: https://postgr.es/m/5137fe12-d0a2-4971-61b6-eb4e7e8875f8@2ndquadrant.com
2018-09-14 23:31:51 +02:00
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
extern void WriteHead(ArchiveHandle *AH);
|
|
|
|
extern void ReadHead(ArchiveHandle *AH);
|
|
|
|
extern void WriteToc(ArchiveHandle *AH);
|
|
|
|
extern void ReadToc(ArchiveHandle *AH);
|
2016-01-13 23:48:33 +01:00
|
|
|
extern void WriteDataChunks(ArchiveHandle *AH, struct ParallelState *pstate);
|
|
|
|
extern void WriteDataChunksForTocEntry(ArchiveHandle *AH, TocEntry *te);
|
2013-03-24 16:27:20 +01:00
|
|
|
extern ArchiveHandle *CloneArchive(ArchiveHandle *AH);
|
|
|
|
extern void DeCloneArchive(ArchiveHandle *AH);
|
2000-07-11 15:07:17 +02:00
|
|
|
|
Rewrite --section option to decouple it from --schema-only/--data-only.
The initial implementation of pg_dump's --section option supposed that the
existing --schema-only and --data-only options could be made equivalent to
--section settings. This is wrong, though, due to dubious but long since
set-in-stone decisions about where to dump SEQUENCE SET items, as seen in
bug report from Martin Pitt. (And I'm not totally convinced there weren't
other bugs, either.) Undo that coupling and instead drive --section
filtering off current-section state tracked as we scan through the TOC
list to call _tocEntryRequired().
To make sure those decisions don't shift around and hopefully save a few
cycles, run _tocEntryRequired() only once per TOC entry and save the result
in a new TOC field. This required minor rejiggering of ACL handling but
also allows a far cleaner implementation of inhibit_data_for_failed_table.
Also, to ensure that pg_dump and pg_restore have the same behavior with
respect to the --section switches, add _tocEntryRequired() filtering to
WriteToc() and WriteDataChunks(), rather than trying to implement section
filtering in an entirely orthogonal way in dumpDumpableObject(). This
required adjusting the handling of the special ENCODING and STDSTRINGS
items, but they were pretty weird before anyway.
Minor other code review for the patch, too.
2012-05-30 05:22:14 +02:00
|
|
|
extern teReqs TocIDRequired(ArchiveHandle *AH, DumpId id);
|
2013-03-24 16:27:20 +01:00
|
|
|
TocEntry *getTocEntryByDumpId(ArchiveHandle *AH, DumpId id);
|
2002-10-25 03:33:17 +02:00
|
|
|
extern bool checkSeek(FILE *fp);
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2006-05-28 23:13:54 +02:00
|
|
|
#define appendStringLiteralAHX(buf,str,AH) \
|
|
|
|
appendStringLiteral(buf, str, (AH)->public.encoding, (AH)->public.std_strings)
|
|
|
|
|
2009-08-04 23:56:09 +02:00
|
|
|
#define appendByteaLiteralAHX(buf,str,len,AH) \
|
|
|
|
appendByteaLiteral(buf, str, len, (AH)->public.std_strings)
|
|
|
|
|
2000-07-11 15:07:17 +02:00
|
|
|
/*
|
|
|
|
* Mandatory routines for each supported format
|
|
|
|
*/
|
|
|
|
|
2002-09-04 22:31:48 +02:00
|
|
|
extern size_t WriteInt(ArchiveHandle *AH, int i);
|
2001-03-22 05:01:46 +01:00
|
|
|
extern int ReadInt(ArchiveHandle *AH);
|
|
|
|
extern char *ReadStr(ArchiveHandle *AH);
|
2002-09-04 22:31:48 +02:00
|
|
|
extern size_t WriteStr(ArchiveHandle *AH, const char *s);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
2007-02-19 16:05:06 +01:00
|
|
|
int ReadOffset(ArchiveHandle *, pgoff_t *);
|
|
|
|
size_t WriteOffset(ArchiveHandle *, pgoff_t, int);
|
2002-10-22 21:15:23 +02:00
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
extern void StartRestoreBlobs(ArchiveHandle *AH);
|
2009-07-21 23:46:10 +02:00
|
|
|
extern void StartRestoreBlob(ArchiveHandle *AH, Oid oid, bool drop);
|
2001-04-01 07:42:51 +02:00
|
|
|
extern void EndRestoreBlob(ArchiveHandle *AH, Oid oid);
|
2001-03-22 05:01:46 +01:00
|
|
|
extern void EndRestoreBlobs(ArchiveHandle *AH);
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2001-03-22 05:01:46 +01:00
|
|
|
extern void InitArchiveFmt_Custom(ArchiveHandle *AH);
|
|
|
|
extern void InitArchiveFmt_Null(ArchiveHandle *AH);
|
2011-01-23 22:10:15 +01:00
|
|
|
extern void InitArchiveFmt_Directory(ArchiveHandle *AH);
|
2001-03-22 05:01:46 +01:00
|
|
|
extern void InitArchiveFmt_Tar(ArchiveHandle *AH);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
2003-02-01 20:29:16 +01:00
|
|
|
extern bool isValidTarHeader(char *header);
|
2000-07-11 15:07:17 +02:00
|
|
|
|
In pg_dump, force reconnection after issuing ALTER DATABASE SET command(s).
The folly of not doing this was exposed by the buildfarm: in some cases,
the GUC settings applied through ALTER DATABASE SET may be essential to
interpreting the reloaded data correctly. Another argument why we can't
really get away with the scheme proposed in commit b3f840120 is that it
cannot work for parallel restore: even if the parent process manages to
hang onto the previous GUC state, worker processes would see the state
post-ALTER-DATABASE. (Perhaps we could have dodged that bullet by
delaying DATABASE PROPERTIES restoration to the end of the run, but
that does nothing for the data semantics problem.)
This leaves us with no solution for the default_transaction_read_only issue
that commit 4bd371f6f intended to work around, other than "you gotta remove
such settings before dumping/upgrading". However, in view of the fact that
parallel restore broke that hack years ago and no one has noticed, it's
fair to question how many people care. I'm unexcited about adding a large
dollop of new complexity to handle that corner case.
This would be a one-liner fix, except it turns out that ReconnectToServer
tries to optimize away "redundant" reconnections. While that may have been
valuable when coded, a quick survey of current callers shows that there are
no cases where that's actually useful, so just remove that check. While at
it, remove the function's useless return value.
Discussion: https://postgr.es/m/12453.1516655001@sss.pgh.pa.us
2018-01-23 16:55:08 +01:00
|
|
|
extern void ReconnectToServer(ArchiveHandle *AH, const char *dbname, const char *newUser);
|
2010-02-26 03:01:40 +01:00
|
|
|
extern void DropBlobIfExists(ArchiveHandle *AH, Oid oid);
|
2000-07-11 15:07:17 +02:00
|
|
|
|
2014-05-06 02:27:16 +02:00
|
|
|
void ahwrite(const void *ptr, size_t size, size_t nmemb, ArchiveHandle *AH);
|
2015-03-11 14:19:54 +01:00
|
|
|
int ahprintf(ArchiveHandle *AH, const char *fmt,...) pg_attribute_printf(2, 3);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
2000-07-11 15:07:17 +02:00
|
|
|
#endif
|