2000-07-21 13:40:08 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* pg_backup.h
|
|
|
|
*
|
|
|
|
* Public interface to the pg_dump archiver routines.
|
|
|
|
*
|
|
|
|
* See the headers to pg_restore for more details.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2000, Philip Warner
|
|
|
|
* Rights are granted to use this software in any way so long
|
|
|
|
* as this notice is not removed.
|
|
|
|
*
|
|
|
|
* The author is not responsible for loss or damages that may
|
2019-05-23 03:17:41 +02:00
|
|
|
* result from its use.
|
2000-07-21 13:40:08 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/bin/pg_dump/pg_backup.h
|
2000-08-01 17:51:45 +02:00
|
|
|
*
|
2000-07-21 13:40:08 +02:00
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
2003-12-06 04:00:16 +01:00
|
|
|
#ifndef PG_BACKUP_H
|
|
|
|
#define PG_BACKUP_H
|
2000-07-21 13:40:08 +02:00
|
|
|
|
Switch pg_dump to use compression specifications
Compression specifications are currently used by pg_basebackup and
pg_receivewal, and are able to let the user control in an extended way
the method and level of compression used. As an effect of this commit,
pg_dump's -Z/--compress is now able to use more than just an integer, as
of the grammar "method[:detail]".
The method can be either "none" or "gzip", and can optionally take a
detail string. If the detail string is only an integer, it defines the
compression level. A comma-separated list of keywords can also be used
method allows for more options, the only keyword supported now is
"level".
The change is backward-compatible, hence specifying only an integer
leads to no compression for a level of 0 and gzip compression when the
level is greater than 0.
Most of the code changes are straight-forward, as pg_dump was relying on
an integer tracking the compression level to check for gzip or no
compression. These are changed to use a compression specification and
the algorithm stored in it.
As of this change, note that the dump format is not bumped because there
is no need yet to track the compression algorithm in the TOC entries.
Hence, we still rely on the compression level to make the difference
when reading them. This will be mandatory once a new compression method
is added, though.
In order to keep the code simpler when parsing the compression
specification, the code is changed so as pg_dump now fails hard when
using gzip on -Z/--compress without its support compiled, rather than
enforcing no compression without the user knowing about it except
through a warning. Like before this commit, archive and custom formats
are compressed by default when the code is compiled with gzip, and left
uncompressed without gzip.
Author: Georgios Kokolatos
Reviewed-by: Michael Paquier
Discussion: https://postgr.es/m/O4mutIrCES8ZhlXJiMvzsivT7ztAMja2lkdL1LJx6O5f22I2W8PBIeLKz7mDLwxHoibcnRAYJXm1pH4tyUNC4a8eDzLn22a6Pb1S74Niexg=@pm.me
2022-12-02 02:45:02 +01:00
|
|
|
#include "common/compression.h"
|
2023-09-07 01:27:00 +02:00
|
|
|
#include "common/file_utils.h"
|
2016-03-24 20:55:44 +01:00
|
|
|
#include "fe_utils/simple_list.h"
|
2000-07-21 13:40:08 +02:00
|
|
|
#include "libpq-fe.h"
|
|
|
|
|
2003-12-06 04:00:16 +01:00
|
|
|
|
2014-10-14 20:00:55 +02:00
|
|
|
typedef enum trivalue
|
2009-02-26 17:02:39 +01:00
|
|
|
{
|
|
|
|
TRI_DEFAULT,
|
|
|
|
TRI_NO,
|
|
|
|
TRI_YES,
|
2014-10-14 20:00:55 +02:00
|
|
|
} trivalue;
|
2009-02-26 17:02:39 +01:00
|
|
|
|
2000-07-21 13:40:08 +02:00
|
|
|
typedef enum _archiveFormat
|
|
|
|
{
|
|
|
|
archUnknown = 0,
|
|
|
|
archCustom = 1,
|
|
|
|
archTar = 3,
|
2011-01-23 22:10:15 +01:00
|
|
|
archNull = 4,
|
|
|
|
archDirectory = 5,
|
2000-07-21 13:40:08 +02:00
|
|
|
} ArchiveFormat;
|
|
|
|
|
2007-01-25 04:30:43 +01:00
|
|
|
typedef enum _archiveMode
|
|
|
|
{
|
|
|
|
archModeAppend,
|
|
|
|
archModeWrite,
|
|
|
|
archModeRead,
|
|
|
|
} ArchiveMode;
|
|
|
|
|
2009-02-02 21:07:37 +01:00
|
|
|
typedef enum _teSection
|
|
|
|
{
|
2022-12-05 08:52:11 +01:00
|
|
|
SECTION_NONE = 1, /* comments, ACLs, etc; can be anywhere */
|
2009-02-02 21:07:37 +01:00
|
|
|
SECTION_PRE_DATA, /* stuff to be processed before data */
|
2022-12-05 08:52:11 +01:00
|
|
|
SECTION_DATA, /* table data, large objects, LO comments */
|
2009-02-02 21:07:37 +01:00
|
|
|
SECTION_POST_DATA, /* stuff to be processed after data */
|
|
|
|
} teSection;
|
|
|
|
|
2021-12-06 19:14:29 +01:00
|
|
|
/* We need one enum entry per prepared query in pg_dump */
|
|
|
|
enum _dumpPreparedQueries
|
|
|
|
{
|
|
|
|
PREPQUERY_DUMPAGG,
|
|
|
|
PREPQUERY_DUMPBASETYPE,
|
|
|
|
PREPQUERY_DUMPCOMPOSITETYPE,
|
|
|
|
PREPQUERY_DUMPDOMAIN,
|
|
|
|
PREPQUERY_DUMPENUMTYPE,
|
|
|
|
PREPQUERY_DUMPFUNC,
|
|
|
|
PREPQUERY_DUMPOPR,
|
|
|
|
PREPQUERY_DUMPRANGETYPE,
|
|
|
|
PREPQUERY_DUMPTABLEATTACH,
|
|
|
|
PREPQUERY_GETCOLUMNACLS,
|
|
|
|
PREPQUERY_GETDOMAINCONSTRAINTS,
|
|
|
|
NUM_PREP_QUERIES /* must be last */
|
|
|
|
};
|
|
|
|
|
Fix handling of -d "connection string" in pg_dump/pg_restore.
Parallel pg_dump failed if its -d parameter was a connection string
containing any essential information other than host, port, or username.
The same was true for pg_restore with --create.
The reason is that these scenarios failed to preserve the connection
string from the command line; the code felt free to replace that with
just the database name when reconnecting from a pg_dump parallel worker
or after creating the target database. By chance, parallel pg_restore
did not suffer this defect, as long as you didn't say --create.
In practice it seems that the error would be obvious only if the
connstring included essential, non-default SSL or GSS parameters.
This may explain why it took us so long to notice. (It also makes
it very difficult to craft a regression test case illustrating the
problem, since the test would fail in builds without those options.)
Fix by refactoring so that ConnectDatabase always receives all the
relevant options directly from the command line, rather than
reconstructed values. Inject a different database name, when necessary,
by relying on libpq's rules for handling multiple "dbname" parameters.
While here, let's get rid of the essentially duplicate _connectDB
function, as well as some obsolete nearby cruft.
Per bug #16604 from Zsolt Ero. Back-patch to all supported branches.
Discussion: https://postgr.es/m/16604-933f4b8791227b15@postgresql.org
2020-09-25 00:19:38 +02:00
|
|
|
/* Parameters needed by ConnectDatabase; same for dump and restore */
|
|
|
|
typedef struct _connParams
|
|
|
|
{
|
|
|
|
/* These fields record the actual command line parameters */
|
|
|
|
char *dbname; /* this may be a connstring! */
|
|
|
|
char *pgport;
|
|
|
|
char *pghost;
|
|
|
|
char *username;
|
|
|
|
trivalue promptPassword;
|
|
|
|
/* If not NULL, this overrides the dbname obtained from command line */
|
|
|
|
/* (but *only* the DB name, not anything else in the connstring) */
|
|
|
|
char *override_dbname;
|
|
|
|
} ConnParams;
|
|
|
|
|
2000-07-21 13:40:08 +02:00
|
|
|
typedef struct _restoreOptions
|
|
|
|
{
|
2010-05-15 23:41:16 +02:00
|
|
|
int createDB; /* Issue commands to create the database */
|
2003-09-24 00:48:53 +02:00
|
|
|
int noOwner; /* Don't try to match original object owner */
|
2022-01-17 06:51:46 +01:00
|
|
|
int noTableAm; /* Don't issue table-AM-related commands */
|
2008-03-20 18:36:58 +01:00
|
|
|
int noTablespace; /* Don't issue tablespace-related commands */
|
2002-05-11 00:36:27 +02:00
|
|
|
int disable_triggers; /* disable triggers during data-only
|
|
|
|
* restore */
|
2004-07-13 05:00:17 +02:00
|
|
|
int use_setsessauth; /* Use SET SESSION AUTHORIZATION commands
|
|
|
|
* instead of OWNER TO */
|
2000-08-01 17:51:45 +02:00
|
|
|
char *superuser; /* Username to use as superuser */
|
2009-01-05 17:54:37 +01:00
|
|
|
char *use_role; /* Issue SET ROLE to this */
|
2000-07-21 13:40:08 +02:00
|
|
|
int dropSchema;
|
2014-10-14 20:00:55 +02:00
|
|
|
int disable_dollar_quoting;
|
2020-07-08 20:52:39 +02:00
|
|
|
int dump_inserts; /* 0 = COPY, otherwise rows per INSERT */
|
2014-10-14 20:00:55 +02:00
|
|
|
int column_inserts;
|
2014-03-03 19:02:18 +01:00
|
|
|
int if_exists;
|
Support --no-comments in pg_dump, pg_dumpall, pg_restore.
We have switches already to suppress other subsidiary object properties,
such as ACLs, security labels, ownership, and tablespaces, so just on
the grounds of symmetry we should allow suppressing comments as well.
Also, commit 0d4e6ed30 added a positive reason to have this feature,
i.e. to allow obtaining the old behavior of selective pg_restore should
anyone desire that.
Recent commits have removed the cases where pg_dump emitted comments on
built-in objects that the restoring user might not have privileges to
comment on, so the original primary motivation for this feature is gone,
but it still seems at least somewhat useful in its own right.
Robins Tharakan, reviewed by Fabrízio Mello
Discussion: https://postgr.es/m/CAEP4nAx22Z4ch74oJGzr5RyyjcyUSbpiFLyeYXX8pehfou92ug@mail.gmail.com
2018-01-25 21:27:24 +01:00
|
|
|
int no_comments; /* Skip comments */
|
2017-05-12 15:15:40 +02:00
|
|
|
int no_publications; /* Skip publication entries */
|
2014-10-14 20:00:55 +02:00
|
|
|
int no_security_labels; /* Skip security label entries */
|
2017-05-09 16:58:06 +02:00
|
|
|
int no_subscriptions; /* Skip subscription entries */
|
2015-09-14 15:19:49 +02:00
|
|
|
int strict_names;
|
2014-10-14 20:00:55 +02:00
|
|
|
|
2012-02-07 22:20:29 +01:00
|
|
|
const char *filename;
|
Rewrite --section option to decouple it from --schema-only/--data-only.
The initial implementation of pg_dump's --section option supposed that the
existing --schema-only and --data-only options could be made equivalent to
--section settings. This is wrong, though, due to dubious but long since
set-in-stone decisions about where to dump SEQUENCE SET items, as seen in
bug report from Martin Pitt. (And I'm not totally convinced there weren't
other bugs, either.) Undo that coupling and instead drive --section
filtering off current-section state tracked as we scan through the TOC
list to call _tocEntryRequired().
To make sure those decisions don't shift around and hopefully save a few
cycles, run _tocEntryRequired() only once per TOC entry and save the result
in a new TOC field. This required minor rejiggering of ACL handling but
also allows a far cleaner implementation of inhibit_data_for_failed_table.
Also, to ensure that pg_dump and pg_restore have the same behavior with
respect to the --section switches, add _tocEntryRequired() filtering to
WriteToc() and WriteDataChunks(), rather than trying to implement section
filtering in an entirely orthogonal way in dumpDumpableObject(). This
required adjusting the handling of the special ENCODING and STDSTRINGS
items, but they were pretty weird before anyway.
Minor other code review for the patch, too.
2012-05-30 05:22:14 +02:00
|
|
|
int dataOnly;
|
2000-07-21 13:40:08 +02:00
|
|
|
int schemaOnly;
|
2011-12-17 01:09:38 +01:00
|
|
|
int dumpSections;
|
2000-07-21 13:40:08 +02:00
|
|
|
int verbose;
|
|
|
|
int aclsSkip;
|
2014-10-14 20:00:55 +02:00
|
|
|
const char *lockWaitTimeout;
|
|
|
|
int include_everything;
|
|
|
|
|
2000-07-21 13:40:08 +02:00
|
|
|
int tocSummary;
|
|
|
|
char *tocFile;
|
|
|
|
int format;
|
|
|
|
char *formatName;
|
|
|
|
|
|
|
|
int selTypes;
|
|
|
|
int selIndex;
|
|
|
|
int selFunction;
|
|
|
|
int selTrigger;
|
|
|
|
int selTable;
|
2013-08-28 08:43:34 +02:00
|
|
|
SimpleStringList indexNames;
|
|
|
|
SimpleStringList functionNames;
|
|
|
|
SimpleStringList schemaNames;
|
2016-09-20 18:00:00 +02:00
|
|
|
SimpleStringList schemaExcludeNames;
|
2013-08-28 08:43:34 +02:00
|
|
|
SimpleStringList triggerNames;
|
2013-01-17 11:24:47 +01:00
|
|
|
SimpleStringList tableNames;
|
2000-07-21 13:40:08 +02:00
|
|
|
|
|
|
|
int useDB;
|
Fix handling of -d "connection string" in pg_dump/pg_restore.
Parallel pg_dump failed if its -d parameter was a connection string
containing any essential information other than host, port, or username.
The same was true for pg_restore with --create.
The reason is that these scenarios failed to preserve the connection
string from the command line; the code felt free to replace that with
just the database name when reconnecting from a pg_dump parallel worker
or after creating the target database. By chance, parallel pg_restore
did not suffer this defect, as long as you didn't say --create.
In practice it seems that the error would be obvious only if the
connstring included essential, non-default SSL or GSS parameters.
This may explain why it took us so long to notice. (It also makes
it very difficult to craft a regression test case illustrating the
problem, since the test would fail in builds without those options.)
Fix by refactoring so that ConnectDatabase always receives all the
relevant options directly from the command line, rather than
reconstructed values. Inject a different database name, when necessary,
by relying on libpq's rules for handling multiple "dbname" parameters.
While here, let's get rid of the essentially duplicate _connectDB
function, as well as some obsolete nearby cruft.
Per bug #16604 from Zsolt Ero. Back-patch to all supported branches.
Discussion: https://postgr.es/m/16604-933f4b8791227b15@postgresql.org
2020-09-25 00:19:38 +02:00
|
|
|
ConnParams cparams; /* parameters to use if useDB */
|
|
|
|
|
2006-08-01 20:21:44 +02:00
|
|
|
int noDataForFailedTables;
|
2004-08-20 06:20:23 +02:00
|
|
|
int exit_on_error;
|
Switch pg_dump to use compression specifications
Compression specifications are currently used by pg_basebackup and
pg_receivewal, and are able to let the user control in an extended way
the method and level of compression used. As an effect of this commit,
pg_dump's -Z/--compress is now able to use more than just an integer, as
of the grammar "method[:detail]".
The method can be either "none" or "gzip", and can optionally take a
detail string. If the detail string is only an integer, it defines the
compression level. A comma-separated list of keywords can also be used
method allows for more options, the only keyword supported now is
"level".
The change is backward-compatible, hence specifying only an integer
leads to no compression for a level of 0 and gzip compression when the
level is greater than 0.
Most of the code changes are straight-forward, as pg_dump was relying on
an integer tracking the compression level to check for gzip or no
compression. These are changed to use a compression specification and
the algorithm stored in it.
As of this change, note that the dump format is not bumped because there
is no need yet to track the compression algorithm in the TOC entries.
Hence, we still rely on the compression level to make the difference
when reading them. This will be mandatory once a new compression method
is added, though.
In order to keep the code simpler when parsing the compression
specification, the code is changed so as pg_dump now fails hard when
using gzip on -Z/--compress without its support compiled, rather than
enforcing no compression without the user knowing about it except
through a warning. Like before this commit, archive and custom formats
are compressed by default when the code is compiled with gzip, and left
uncompressed without gzip.
Author: Georgios Kokolatos
Reviewed-by: Michael Paquier
Discussion: https://postgr.es/m/O4mutIrCES8ZhlXJiMvzsivT7ztAMja2lkdL1LJx6O5f22I2W8PBIeLKz7mDLwxHoibcnRAYJXm1pH4tyUNC4a8eDzLn22a6Pb1S74Niexg=@pm.me
2022-12-02 02:45:02 +01:00
|
|
|
pg_compress_specification compression_spec; /* Specification for
|
|
|
|
* compression */
|
2001-04-25 09:03:20 +02:00
|
|
|
int suppressDumpWarnings; /* Suppress output of WARNING entries
|
|
|
|
* to stderr */
|
2024-04-01 22:46:24 +02:00
|
|
|
|
|
|
|
bool single_txn; /* restore all TOCs in one transaction */
|
|
|
|
int txn_size; /* restore this many TOCs per txn, if > 0 */
|
2006-02-12 05:04:32 +01:00
|
|
|
|
2006-10-15 01:07:22 +02:00
|
|
|
bool *idWanted; /* array showing which dump IDs to emit */
|
Row-Level Security Policies (RLS)
Building on the updatable security-barrier views work, add the
ability to define policies on tables to limit the set of rows
which are returned from a query and which are allowed to be added
to a table. Expressions defined by the policy for filtering are
added to the security barrier quals of the query, while expressions
defined to check records being added to a table are added to the
with-check options of the query.
New top-level commands are CREATE/ALTER/DROP POLICY and are
controlled by the table owner. Row Security is able to be enabled
and disabled by the owner on a per-table basis using
ALTER TABLE .. ENABLE/DISABLE ROW SECURITY.
Per discussion, ROW SECURITY is disabled on tables by default and
must be enabled for policies on the table to be used. If no
policies exist on a table with ROW SECURITY enabled, a default-deny
policy is used and no records will be visible.
By default, row security is applied at all times except for the
table owner and the superuser. A new GUC, row_security, is added
which can be set to ON, OFF, or FORCE. When set to FORCE, row
security will be applied even for the table owner and superusers.
When set to OFF, row security will be disabled when allowed and an
error will be thrown if the user does not have rights to bypass row
security.
Per discussion, pg_dump sets row_security = OFF by default to ensure
that exports and backups will have all data in the table or will
error if there are insufficient privileges to bypass row security.
A new option has been added to pg_dump, --enable-row-security, to
ask pg_dump to export with row security enabled.
A new role capability, BYPASSRLS, which can only be set by the
superuser, is added to allow other users to be able to bypass row
security using row_security = OFF.
Many thanks to the various individuals who have helped with the
design, particularly Robert Haas for his feedback.
Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean
Rasheed, with additional changes and rework by me.
Reviewers have included all of the above, Greg Smith,
Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
|
|
|
int enable_row_security;
|
2016-08-23 18:00:00 +02:00
|
|
|
int sequence_data; /* dump sequence data even in schema-only mode */
|
pg_upgrade: Fix large object COMMENTS, SECURITY LABELS
When performing a pg_upgrade, we copy the files behind pg_largeobject
and pg_largeobject_metadata, allowing us to avoid having to dump out and
reload the actual data for large objects and their ACLs.
Unfortunately, that isn't all of the information which can be associated
with large objects. Currently, we also support COMMENTs and SECURITY
LABELs with large objects and these were being silently dropped during a
pg_upgrade as pg_dump would skip everything having to do with a large
object and pg_upgrade only copied the tables mentioned to the new
cluster.
As the file copies happen after the catalog dump and reload, we can't
simply include the COMMENTs and SECURITY LABELs in pg_dump's binary-mode
output but we also have to include the actual large object definition as
well. With the definition, comments, and security labels in the pg_dump
output and the file copies performed by pg_upgrade, all of the data and
metadata associated with large objects is able to be successfully pulled
forward across a pg_upgrade.
In 9.6 and master, we can simply adjust the dump bitmask to indicate
which components we don't want. In 9.5 and earlier, we have to put
explciit checks in in dumpBlob() and dumpBlobs() to not include the ACL
or the data when in binary-upgrade mode.
Adjustments made to the privileges regression test to allow another test
(large_object.sql) to be added which explicitly leaves a large object
with a comment in place to provide coverage of that case with
pg_upgrade.
Back-patch to all supported branches.
Discussion: https://postgr.es/m/20170221162655.GE9812@tamriel.snowman.net
2017-03-06 23:03:57 +01:00
|
|
|
int binary_upgrade;
|
2000-07-21 13:40:08 +02:00
|
|
|
} RestoreOptions;
|
|
|
|
|
2014-10-14 20:00:55 +02:00
|
|
|
typedef struct _dumpOptions
|
|
|
|
{
|
Fix handling of -d "connection string" in pg_dump/pg_restore.
Parallel pg_dump failed if its -d parameter was a connection string
containing any essential information other than host, port, or username.
The same was true for pg_restore with --create.
The reason is that these scenarios failed to preserve the connection
string from the command line; the code felt free to replace that with
just the database name when reconnecting from a pg_dump parallel worker
or after creating the target database. By chance, parallel pg_restore
did not suffer this defect, as long as you didn't say --create.
In practice it seems that the error would be obvious only if the
connstring included essential, non-default SSL or GSS parameters.
This may explain why it took us so long to notice. (It also makes
it very difficult to craft a regression test case illustrating the
problem, since the test would fail in builds without those options.)
Fix by refactoring so that ConnectDatabase always receives all the
relevant options directly from the command line, rather than
reconstructed values. Inject a different database name, when necessary,
by relying on libpq's rules for handling multiple "dbname" parameters.
While here, let's get rid of the essentially duplicate _connectDB
function, as well as some obsolete nearby cruft.
Per bug #16604 from Zsolt Ero. Back-patch to all supported branches.
Discussion: https://postgr.es/m/16604-933f4b8791227b15@postgresql.org
2020-09-25 00:19:38 +02:00
|
|
|
ConnParams cparams;
|
2014-10-14 20:00:55 +02:00
|
|
|
|
|
|
|
int binary_upgrade;
|
|
|
|
|
|
|
|
/* various user-settable parameters */
|
|
|
|
bool schemaOnly;
|
|
|
|
bool dataOnly;
|
|
|
|
int dumpSections; /* bitmask of chosen sections */
|
|
|
|
bool aclsSkip;
|
|
|
|
const char *lockWaitTimeout;
|
2019-03-07 13:26:14 +01:00
|
|
|
int dump_inserts; /* 0 = COPY, otherwise rows per INSERT */
|
2014-10-14 20:00:55 +02:00
|
|
|
|
|
|
|
/* flags for various command-line long options */
|
|
|
|
int disable_dollar_quoting;
|
|
|
|
int column_inserts;
|
|
|
|
int if_exists;
|
Support --no-comments in pg_dump, pg_dumpall, pg_restore.
We have switches already to suppress other subsidiary object properties,
such as ACLs, security labels, ownership, and tablespaces, so just on
the grounds of symmetry we should allow suppressing comments as well.
Also, commit 0d4e6ed30 added a positive reason to have this feature,
i.e. to allow obtaining the old behavior of selective pg_restore should
anyone desire that.
Recent commits have removed the cases where pg_dump emitted comments on
built-in objects that the restoring user might not have privileges to
comment on, so the original primary motivation for this feature is gone,
but it still seems at least somewhat useful in its own right.
Robins Tharakan, reviewed by Fabrízio Mello
Discussion: https://postgr.es/m/CAEP4nAx22Z4ch74oJGzr5RyyjcyUSbpiFLyeYXX8pehfou92ug@mail.gmail.com
2018-01-25 21:27:24 +01:00
|
|
|
int no_comments;
|
2014-10-14 20:00:55 +02:00
|
|
|
int no_security_labels;
|
2017-05-12 15:15:40 +02:00
|
|
|
int no_publications;
|
2017-05-09 16:58:06 +02:00
|
|
|
int no_subscriptions;
|
Allow configurable LZ4 TOAST compression.
There is now a per-column COMPRESSION option which can be set to pglz
(the default, and the only option in up until now) or lz4. Or, if you
like, you can set the new default_toast_compression GUC to lz4, and
then that will be the default for new table columns for which no value
is specified. We don't have lz4 support in the PostgreSQL code, so
to use lz4 compression, PostgreSQL must be built --with-lz4.
In general, TOAST compression means compression of individual column
values, not the whole tuple, and those values can either be compressed
inline within the tuple or compressed and then stored externally in
the TOAST table, so those properties also apply to this feature.
Prior to this commit, a TOAST pointer has two unused bits as part of
the va_extsize field, and a compessed datum has two unused bits as
part of the va_rawsize field. These bits are unused because the length
of a varlena is limited to 1GB; we now use them to indicate the
compression type that was used. This means we only have bit space for
2 more built-in compresison types, but we could work around that
problem, if necessary, by introducing a new vartag_external value for
any further types we end up wanting to add. Hopefully, it won't be
too important to offer a wide selection of algorithms here, since
each one we add not only takes more coding but also adds a build
dependency for every packager. Nevertheless, it seems worth doing
at least this much, because LZ4 gets better compression than PGLZ
with less CPU usage.
It's possible for LZ4-compressed datums to leak into composite type
values stored on disk, just as it is for PGLZ. It's also possible for
LZ4-compressed attributes to be copied into a different table via SQL
commands such as CREATE TABLE AS or INSERT .. SELECT. It would be
expensive to force such values to be decompressed, so PostgreSQL has
never done so. For the same reasons, we also don't force recompression
of already-compressed values even if the target table prefers a
different compression method than was used for the source data. These
architectural decisions are perhaps arguable but revisiting them is
well beyond the scope of what seemed possible to do as part of this
project. However, it's relatively cheap to recompress as part of
VACUUM FULL or CLUSTER, so this commit adjusts those commands to do
so, if the configured compression method of the table happens not to
match what was used for some column value stored therein.
Dilip Kumar. The original patches on which this work was based were
written by Ildus Kurbangaliev, and those were patches were based on
even earlier work by Nikita Glukhov, but the design has since changed
very substantially, since allow a potentially large number of
compression methods that could be added and dropped on a running
system proved too problematic given some of the architectural issues
mentioned above; the choice of which specific compression method to
add first is now different; and a lot of the code has been heavily
refactored. More recently, Justin Przyby helped quite a bit with
testing and reviewing and this version also includes some code
contributions from him. Other design input and review from Tomas
Vondra, Álvaro Herrera, Andres Freund, Oleg Bartunov, Alexander
Korotkov, and me.
Discussion: http://postgr.es/m/20170907194236.4cefce96%40wp.localdomain
Discussion: http://postgr.es/m/CAFiTN-uUpX3ck%3DK0mLEk-G_kUQY%3DSNOTeqdaNRR9FMdQrHKebw%40mail.gmail.com
2021-03-19 20:10:38 +01:00
|
|
|
int no_toast_compression;
|
2021-03-20 20:01:10 +01:00
|
|
|
int no_unlogged_table_data;
|
2014-10-14 20:00:55 +02:00
|
|
|
int serializable_deferrable;
|
|
|
|
int disable_triggers;
|
2022-01-17 06:51:46 +01:00
|
|
|
int outputNoTableAm;
|
2014-10-14 20:00:55 +02:00
|
|
|
int outputNoTablespaces;
|
|
|
|
int use_setsessauth;
|
|
|
|
int enable_row_security;
|
2017-08-15 04:54:41 +02:00
|
|
|
int load_via_partition_root;
|
2014-10-14 20:00:55 +02:00
|
|
|
|
|
|
|
/* default, if no "inclusion" switches appear, is to dump everything */
|
|
|
|
bool include_everything;
|
|
|
|
|
|
|
|
int outputClean;
|
|
|
|
int outputCreateDB;
|
2022-12-05 08:52:11 +01:00
|
|
|
bool outputLOs;
|
|
|
|
bool dontOutputLOs;
|
2014-10-14 20:00:55 +02:00
|
|
|
int outputNoOwner;
|
|
|
|
char *outputSuperuser;
|
2016-08-23 18:00:00 +02:00
|
|
|
|
|
|
|
int sequence_data; /* dump sequence data even in schema-only mode */
|
2018-07-13 03:57:03 +02:00
|
|
|
int do_nothing;
|
2014-10-14 20:00:55 +02:00
|
|
|
} DumpOptions;
|
|
|
|
|
2016-01-13 23:48:33 +01:00
|
|
|
/*
|
|
|
|
* We may want to have some more user-readable data, but in the mean
|
|
|
|
* time this gives us some abstraction and type checking.
|
|
|
|
*/
|
|
|
|
typedef struct Archive
|
|
|
|
{
|
|
|
|
DumpOptions *dopt; /* options, if dumping */
|
|
|
|
RestoreOptions *ropt; /* options, if restoring */
|
|
|
|
|
|
|
|
int verbose;
|
|
|
|
char *remoteVersionStr; /* server's version string */
|
|
|
|
int remoteVersion; /* same in numeric form */
|
2016-05-26 22:14:23 +02:00
|
|
|
bool isStandby; /* is server a standby node */
|
2016-01-13 23:48:33 +01:00
|
|
|
|
|
|
|
int minRemoteVersion; /* allowable range */
|
|
|
|
int maxRemoteVersion;
|
|
|
|
|
|
|
|
int numWorkers; /* number of parallel processes */
|
|
|
|
char *sync_snapshot_id; /* sync snapshot id for parallel operation */
|
|
|
|
|
|
|
|
/* info needed for string escaping */
|
|
|
|
int encoding; /* libpq code for client_encoding */
|
|
|
|
bool std_strings; /* standard_conforming_strings */
|
Avoid using unsafe search_path settings during dump and restore.
Historically, pg_dump has "set search_path = foo, pg_catalog" when
dumping an object in schema "foo", and has also caused that setting
to be used while restoring the object. This is problematic because
functions and operators in schema "foo" could capture references meant
to refer to pg_catalog entries, both in the queries issued by pg_dump
and those issued during the subsequent restore run. That could
result in dump/restore misbehavior, or in privilege escalation if a
nefarious user installs trojan-horse functions or operators.
This patch changes pg_dump so that it does not change the search_path
dynamically. The emitted restore script sets the search_path to what
was used at dump time, and then leaves it alone thereafter. Created
objects are placed in the correct schema, regardless of the active
search_path, by dint of schema-qualifying their names in the CREATE
commands, as well as in subsequent ALTER and ALTER-like commands.
Since this change requires a change in the behavior of pg_restore
when processing an archive file made according to this new convention,
bump the archive file version number; old versions of pg_restore will
therefore refuse to process files made with new versions of pg_dump.
Security: CVE-2018-1058
2018-02-26 16:18:21 +01:00
|
|
|
|
|
|
|
/* other important stuff */
|
|
|
|
char *searchpath; /* search_path to set during restore */
|
2016-01-13 23:48:33 +01:00
|
|
|
char *use_role; /* Issue SET ROLE to this */
|
|
|
|
|
|
|
|
/* error handling */
|
|
|
|
bool exit_on_error; /* whether to exit on SQL errors... */
|
|
|
|
int n_errors; /* number of errors (if no die) */
|
|
|
|
|
2021-12-06 19:14:29 +01:00
|
|
|
/* prepared-query status */
|
|
|
|
bool *is_prepared; /* indexed by enum _dumpPreparedQueries */
|
|
|
|
|
2016-01-13 23:48:33 +01:00
|
|
|
/* The rest is private */
|
|
|
|
} Archive;
|
|
|
|
|
2014-10-14 20:00:55 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* pg_dump uses two different mechanisms for identifying database objects:
|
|
|
|
*
|
|
|
|
* CatalogId represents an object by the tableoid and oid of its defining
|
|
|
|
* entry in the system catalogs. We need this to interpret pg_depend entries,
|
|
|
|
* for instance.
|
|
|
|
*
|
|
|
|
* DumpId is a simple sequential integer counter assigned as dumpable objects
|
|
|
|
* are identified during a pg_dump run. We use DumpId internally in preference
|
|
|
|
* to CatalogId for two reasons: it's more compact, and we can assign DumpIds
|
|
|
|
* to "objects" that don't have a separate CatalogId. For example, it is
|
|
|
|
* convenient to consider a table, its data, and its ACL as three separate
|
|
|
|
* dumpable "objects" with distinct DumpIds --- this lets us reason about the
|
|
|
|
* order in which to dump these things.
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
2021-10-22 23:19:03 +02:00
|
|
|
/* Note: this struct must not contain any unused bytes */
|
2014-10-14 20:00:55 +02:00
|
|
|
Oid tableoid;
|
|
|
|
Oid oid;
|
|
|
|
} CatalogId;
|
|
|
|
|
|
|
|
typedef int DumpId;
|
|
|
|
|
2020-07-11 19:36:50 +02:00
|
|
|
#define InvalidDumpId 0
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Function pointer prototypes for assorted callback methods.
|
|
|
|
*/
|
|
|
|
|
2021-02-10 13:08:13 +01:00
|
|
|
typedef int (*DataDumperPtr) (Archive *AH, const void *userArg);
|
2014-10-14 20:00:55 +02:00
|
|
|
|
2016-08-30 18:00:00 +02:00
|
|
|
typedef void (*SetupWorkerPtrType) (Archive *AH);
|
2013-03-24 16:27:20 +01:00
|
|
|
|
2000-07-21 13:40:08 +02:00
|
|
|
/*
|
|
|
|
* Main archiver interface.
|
|
|
|
*/
|
|
|
|
|
Fix handling of -d "connection string" in pg_dump/pg_restore.
Parallel pg_dump failed if its -d parameter was a connection string
containing any essential information other than host, port, or username.
The same was true for pg_restore with --create.
The reason is that these scenarios failed to preserve the connection
string from the command line; the code felt free to replace that with
just the database name when reconnecting from a pg_dump parallel worker
or after creating the target database. By chance, parallel pg_restore
did not suffer this defect, as long as you didn't say --create.
In practice it seems that the error would be obvious only if the
connstring included essential, non-default SSL or GSS parameters.
This may explain why it took us so long to notice. (It also makes
it very difficult to craft a regression test case illustrating the
problem, since the test would fail in builds without those options.)
Fix by refactoring so that ConnectDatabase always receives all the
relevant options directly from the command line, rather than
reconstructed values. Inject a different database name, when necessary,
by relying on libpq's rules for handling multiple "dbname" parameters.
While here, let's get rid of the essentially duplicate _connectDB
function, as well as some obsolete nearby cruft.
Per bug #16604 from Zsolt Ero. Back-patch to all supported branches.
Discussion: https://postgr.es/m/16604-933f4b8791227b15@postgresql.org
2020-09-25 00:19:38 +02:00
|
|
|
extern void ConnectDatabase(Archive *AHX,
|
|
|
|
const ConnParams *cparams,
|
|
|
|
bool isReconnect);
|
2012-02-16 17:49:20 +01:00
|
|
|
extern void DisconnectDatabase(Archive *AHX);
|
2012-02-16 19:00:24 +01:00
|
|
|
extern PGconn *GetConnection(Archive *AHX);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
|
|
|
/* Called to write *data* to the archive */
|
2022-09-23 01:41:23 +02:00
|
|
|
extern void WriteData(Archive *AHX, const void *data, size_t dLen);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
2022-12-05 08:52:11 +01:00
|
|
|
extern int StartLO(Archive *AHX, Oid oid);
|
|
|
|
extern int EndLO(Archive *AHX, Oid oid);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
2022-09-23 01:41:23 +02:00
|
|
|
extern void CloseArchive(Archive *AHX);
|
2016-01-13 23:48:33 +01:00
|
|
|
|
|
|
|
extern void SetArchiveOptions(Archive *AH, DumpOptions *dopt, RestoreOptions *ropt);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
2022-09-23 01:41:23 +02:00
|
|
|
extern void ProcessArchiveRestoreOptions(Archive *AHX);
|
Rewrite --section option to decouple it from --schema-only/--data-only.
The initial implementation of pg_dump's --section option supposed that the
existing --schema-only and --data-only options could be made equivalent to
--section settings. This is wrong, though, due to dubious but long since
set-in-stone decisions about where to dump SEQUENCE SET items, as seen in
bug report from Martin Pitt. (And I'm not totally convinced there weren't
other bugs, either.) Undo that coupling and instead drive --section
filtering off current-section state tracked as we scan through the TOC
list to call _tocEntryRequired().
To make sure those decisions don't shift around and hopefully save a few
cycles, run _tocEntryRequired() only once per TOC entry and save the result
in a new TOC field. This required minor rejiggering of ACL handling but
also allows a far cleaner implementation of inhibit_data_for_failed_table.
Also, to ensure that pg_dump and pg_restore have the same behavior with
respect to the --section switches, add _tocEntryRequired() filtering to
WriteToc() and WriteDataChunks(), rather than trying to implement section
filtering in an entirely orthogonal way in dumpDumpableObject(). This
required adjusting the handling of the special ENCODING and STDSTRINGS
items, but they were pretty weird before anyway.
Minor other code review for the patch, too.
2012-05-30 05:22:14 +02:00
|
|
|
|
2022-09-23 01:41:23 +02:00
|
|
|
extern void RestoreArchive(Archive *AHX);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
|
|
|
/* Open an existing archive */
|
|
|
|
extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt);
|
|
|
|
|
|
|
|
/* Create a new archive */
|
|
|
|
extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
|
Switch pg_dump to use compression specifications
Compression specifications are currently used by pg_basebackup and
pg_receivewal, and are able to let the user control in an extended way
the method and level of compression used. As an effect of this commit,
pg_dump's -Z/--compress is now able to use more than just an integer, as
of the grammar "method[:detail]".
The method can be either "none" or "gzip", and can optionally take a
detail string. If the detail string is only an integer, it defines the
compression level. A comma-separated list of keywords can also be used
method allows for more options, the only keyword supported now is
"level".
The change is backward-compatible, hence specifying only an integer
leads to no compression for a level of 0 and gzip compression when the
level is greater than 0.
Most of the code changes are straight-forward, as pg_dump was relying on
an integer tracking the compression level to check for gzip or no
compression. These are changed to use a compression specification and
the algorithm stored in it.
As of this change, note that the dump format is not bumped because there
is no need yet to track the compression algorithm in the TOC entries.
Hence, we still rely on the compression level to make the difference
when reading them. This will be mandatory once a new compression method
is added, though.
In order to keep the code simpler when parsing the compression
specification, the code is changed so as pg_dump now fails hard when
using gzip on -Z/--compress without its support compiled, rather than
enforcing no compression without the user knowing about it except
through a warning. Like before this commit, archive and custom formats
are compressed by default when the code is compiled with gzip, and left
uncompressed without gzip.
Author: Georgios Kokolatos
Reviewed-by: Michael Paquier
Discussion: https://postgr.es/m/O4mutIrCES8ZhlXJiMvzsivT7ztAMja2lkdL1LJx6O5f22I2W8PBIeLKz7mDLwxHoibcnRAYJXm1pH4tyUNC4a8eDzLn22a6Pb1S74Niexg=@pm.me
2022-12-02 02:45:02 +01:00
|
|
|
const pg_compress_specification compression_spec,
|
|
|
|
bool dosync, ArchiveMode mode,
|
2023-09-07 01:27:00 +02:00
|
|
|
SetupWorkerPtrType setupDumpWorker,
|
|
|
|
DataDirSyncMethod sync_method);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
|
|
|
/* The --list option */
|
2022-09-23 01:41:23 +02:00
|
|
|
extern void PrintTOCSummary(Archive *AHX);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
|
|
|
extern RestoreOptions *NewRestoreOptions(void);
|
|
|
|
|
2014-10-14 20:00:55 +02:00
|
|
|
extern DumpOptions *NewDumpOptions(void);
|
2015-01-11 19:28:26 +01:00
|
|
|
extern void InitDumpOptions(DumpOptions *opts);
|
2014-10-14 20:00:55 +02:00
|
|
|
extern DumpOptions *dumpOptionsFromRestoreOptions(RestoreOptions *ropt);
|
|
|
|
|
2006-10-15 01:07:22 +02:00
|
|
|
/* Rearrange and filter TOC entries */
|
2016-01-13 23:48:33 +01:00
|
|
|
extern void SortTocFromFile(Archive *AHX);
|
2000-07-21 13:40:08 +02:00
|
|
|
|
|
|
|
/* Convenience functions used only when writing DATA */
|
2014-05-06 02:27:16 +02:00
|
|
|
extern void archputs(const char *s, Archive *AH);
|
2001-10-03 23:58:28 +02:00
|
|
|
extern int archprintf(Archive *AH, const char *fmt,...) pg_attribute_printf(2, 3);
|
2001-10-28 07:26:15 +01:00
|
|
|
|
2006-05-28 23:13:54 +02:00
|
|
|
#define appendStringLiteralAH(buf,str,AH) \
|
|
|
|
appendStringLiteral(buf, str, (AH)->encoding, (AH)->std_strings)
|
|
|
|
|
2003-12-06 04:00:16 +01:00
|
|
|
#endif /* PG_BACKUP_H */
|