postgresql/src/bin/pg_dump/pg_backup.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

332 lines
9.1 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* pg_backup.h
*
* Public interface to the pg_dump archiver routines.
*
* See the headers to pg_restore for more details.
*
* Copyright (c) 2000, Philip Warner
* Rights are granted to use this software in any way so long
* as this notice is not removed.
*
* The author is not responsible for loss or damages that may
* result from its use.
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/bin/pg_dump/pg_backup.h
*
*-------------------------------------------------------------------------
*/
#ifndef PG_BACKUP_H
#define PG_BACKUP_H
Switch pg_dump to use compression specifications Compression specifications are currently used by pg_basebackup and pg_receivewal, and are able to let the user control in an extended way the method and level of compression used. As an effect of this commit, pg_dump's -Z/--compress is now able to use more than just an integer, as of the grammar "method[:detail]". The method can be either "none" or "gzip", and can optionally take a detail string. If the detail string is only an integer, it defines the compression level. A comma-separated list of keywords can also be used method allows for more options, the only keyword supported now is "level". The change is backward-compatible, hence specifying only an integer leads to no compression for a level of 0 and gzip compression when the level is greater than 0. Most of the code changes are straight-forward, as pg_dump was relying on an integer tracking the compression level to check for gzip or no compression. These are changed to use a compression specification and the algorithm stored in it. As of this change, note that the dump format is not bumped because there is no need yet to track the compression algorithm in the TOC entries. Hence, we still rely on the compression level to make the difference when reading them. This will be mandatory once a new compression method is added, though. In order to keep the code simpler when parsing the compression specification, the code is changed so as pg_dump now fails hard when using gzip on -Z/--compress without its support compiled, rather than enforcing no compression without the user knowing about it except through a warning. Like before this commit, archive and custom formats are compressed by default when the code is compiled with gzip, and left uncompressed without gzip. Author: Georgios Kokolatos Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/O4mutIrCES8ZhlXJiMvzsivT7ztAMja2lkdL1LJx6O5f22I2W8PBIeLKz7mDLwxHoibcnRAYJXm1pH4tyUNC4a8eDzLn22a6Pb1S74Niexg=@pm.me
2022-12-02 02:45:02 +01:00
#include "common/compression.h"
#include "fe_utils/simple_list.h"
#include "libpq-fe.h"
typedef enum trivalue
{
TRI_DEFAULT,
TRI_NO,
TRI_YES
} trivalue;
typedef enum _archiveFormat
{
archUnknown = 0,
archCustom = 1,
archTar = 3,
archNull = 4,
archDirectory = 5
} ArchiveFormat;
typedef enum _archiveMode
{
archModeAppend,
archModeWrite,
archModeRead
} ArchiveMode;
typedef enum _teSection
{
SECTION_NONE = 1, /* comments, ACLs, etc; can be anywhere */
SECTION_PRE_DATA, /* stuff to be processed before data */
SECTION_DATA, /* table data, large objects, LO comments */
SECTION_POST_DATA /* stuff to be processed after data */
} teSection;
/* We need one enum entry per prepared query in pg_dump */
enum _dumpPreparedQueries
{
PREPQUERY_DUMPAGG,
PREPQUERY_DUMPBASETYPE,
PREPQUERY_DUMPCOMPOSITETYPE,
PREPQUERY_DUMPDOMAIN,
PREPQUERY_DUMPENUMTYPE,
PREPQUERY_DUMPFUNC,
PREPQUERY_DUMPOPR,
PREPQUERY_DUMPRANGETYPE,
PREPQUERY_DUMPTABLEATTACH,
PREPQUERY_GETCOLUMNACLS,
PREPQUERY_GETDOMAINCONSTRAINTS,
NUM_PREP_QUERIES /* must be last */
};
/* Parameters needed by ConnectDatabase; same for dump and restore */
typedef struct _connParams
{
/* These fields record the actual command line parameters */
char *dbname; /* this may be a connstring! */
char *pgport;
char *pghost;
char *username;
trivalue promptPassword;
/* If not NULL, this overrides the dbname obtained from command line */
/* (but *only* the DB name, not anything else in the connstring) */
char *override_dbname;
} ConnParams;
typedef struct _restoreOptions
{
int createDB; /* Issue commands to create the database */
int noOwner; /* Don't try to match original object owner */
int noTableAm; /* Don't issue table-AM-related commands */
int noTablespace; /* Don't issue tablespace-related commands */
int disable_triggers; /* disable triggers during data-only
* restore */
int use_setsessauth; /* Use SET SESSION AUTHORIZATION commands
* instead of OWNER TO */
char *superuser; /* Username to use as superuser */
char *use_role; /* Issue SET ROLE to this */
int dropSchema;
int disable_dollar_quoting;
int dump_inserts; /* 0 = COPY, otherwise rows per INSERT */
int column_inserts;
int if_exists;
int no_comments; /* Skip comments */
int no_publications; /* Skip publication entries */
int no_security_labels; /* Skip security label entries */
int no_subscriptions; /* Skip subscription entries */
int strict_names;
2012-02-07 22:20:29 +01:00
const char *filename;
int dataOnly;
int schemaOnly;
int dumpSections;
int verbose;
int aclsSkip;
const char *lockWaitTimeout;
int include_everything;
int tocSummary;
char *tocFile;
int format;
char *formatName;
int selTypes;
int selIndex;
int selFunction;
int selTrigger;
int selTable;
SimpleStringList indexNames;
SimpleStringList functionNames;
SimpleStringList schemaNames;
SimpleStringList schemaExcludeNames;
SimpleStringList triggerNames;
SimpleStringList tableNames;
int useDB;
ConnParams cparams; /* parameters to use if useDB */
int noDataForFailedTables;
int exit_on_error;
Switch pg_dump to use compression specifications Compression specifications are currently used by pg_basebackup and pg_receivewal, and are able to let the user control in an extended way the method and level of compression used. As an effect of this commit, pg_dump's -Z/--compress is now able to use more than just an integer, as of the grammar "method[:detail]". The method can be either "none" or "gzip", and can optionally take a detail string. If the detail string is only an integer, it defines the compression level. A comma-separated list of keywords can also be used method allows for more options, the only keyword supported now is "level". The change is backward-compatible, hence specifying only an integer leads to no compression for a level of 0 and gzip compression when the level is greater than 0. Most of the code changes are straight-forward, as pg_dump was relying on an integer tracking the compression level to check for gzip or no compression. These are changed to use a compression specification and the algorithm stored in it. As of this change, note that the dump format is not bumped because there is no need yet to track the compression algorithm in the TOC entries. Hence, we still rely on the compression level to make the difference when reading them. This will be mandatory once a new compression method is added, though. In order to keep the code simpler when parsing the compression specification, the code is changed so as pg_dump now fails hard when using gzip on -Z/--compress without its support compiled, rather than enforcing no compression without the user knowing about it except through a warning. Like before this commit, archive and custom formats are compressed by default when the code is compiled with gzip, and left uncompressed without gzip. Author: Georgios Kokolatos Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/O4mutIrCES8ZhlXJiMvzsivT7ztAMja2lkdL1LJx6O5f22I2W8PBIeLKz7mDLwxHoibcnRAYJXm1pH4tyUNC4a8eDzLn22a6Pb1S74Niexg=@pm.me
2022-12-02 02:45:02 +01:00
pg_compress_specification compression_spec; /* Specification for
* compression */
int suppressDumpWarnings; /* Suppress output of WARNING entries
* to stderr */
bool single_txn;
bool *idWanted; /* array showing which dump IDs to emit */
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
int enable_row_security;
int sequence_data; /* dump sequence data even in schema-only mode */
pg_upgrade: Fix large object COMMENTS, SECURITY LABELS When performing a pg_upgrade, we copy the files behind pg_largeobject and pg_largeobject_metadata, allowing us to avoid having to dump out and reload the actual data for large objects and their ACLs. Unfortunately, that isn't all of the information which can be associated with large objects. Currently, we also support COMMENTs and SECURITY LABELs with large objects and these were being silently dropped during a pg_upgrade as pg_dump would skip everything having to do with a large object and pg_upgrade only copied the tables mentioned to the new cluster. As the file copies happen after the catalog dump and reload, we can't simply include the COMMENTs and SECURITY LABELs in pg_dump's binary-mode output but we also have to include the actual large object definition as well. With the definition, comments, and security labels in the pg_dump output and the file copies performed by pg_upgrade, all of the data and metadata associated with large objects is able to be successfully pulled forward across a pg_upgrade. In 9.6 and master, we can simply adjust the dump bitmask to indicate which components we don't want. In 9.5 and earlier, we have to put explciit checks in in dumpBlob() and dumpBlobs() to not include the ACL or the data when in binary-upgrade mode. Adjustments made to the privileges regression test to allow another test (large_object.sql) to be added which explicitly leaves a large object with a comment in place to provide coverage of that case with pg_upgrade. Back-patch to all supported branches. Discussion: https://postgr.es/m/20170221162655.GE9812@tamriel.snowman.net
2017-03-06 23:03:57 +01:00
int binary_upgrade;
} RestoreOptions;
typedef struct _dumpOptions
{
ConnParams cparams;
int binary_upgrade;
/* various user-settable parameters */
bool schemaOnly;
bool dataOnly;
int dumpSections; /* bitmask of chosen sections */
bool aclsSkip;
const char *lockWaitTimeout;
int dump_inserts; /* 0 = COPY, otherwise rows per INSERT */
/* flags for various command-line long options */
int disable_dollar_quoting;
int column_inserts;
int if_exists;
int no_comments;
int no_security_labels;
int no_publications;
int no_subscriptions;
Allow configurable LZ4 TOAST compression. There is now a per-column COMPRESSION option which can be set to pglz (the default, and the only option in up until now) or lz4. Or, if you like, you can set the new default_toast_compression GUC to lz4, and then that will be the default for new table columns for which no value is specified. We don't have lz4 support in the PostgreSQL code, so to use lz4 compression, PostgreSQL must be built --with-lz4. In general, TOAST compression means compression of individual column values, not the whole tuple, and those values can either be compressed inline within the tuple or compressed and then stored externally in the TOAST table, so those properties also apply to this feature. Prior to this commit, a TOAST pointer has two unused bits as part of the va_extsize field, and a compessed datum has two unused bits as part of the va_rawsize field. These bits are unused because the length of a varlena is limited to 1GB; we now use them to indicate the compression type that was used. This means we only have bit space for 2 more built-in compresison types, but we could work around that problem, if necessary, by introducing a new vartag_external value for any further types we end up wanting to add. Hopefully, it won't be too important to offer a wide selection of algorithms here, since each one we add not only takes more coding but also adds a build dependency for every packager. Nevertheless, it seems worth doing at least this much, because LZ4 gets better compression than PGLZ with less CPU usage. It's possible for LZ4-compressed datums to leak into composite type values stored on disk, just as it is for PGLZ. It's also possible for LZ4-compressed attributes to be copied into a different table via SQL commands such as CREATE TABLE AS or INSERT .. SELECT. It would be expensive to force such values to be decompressed, so PostgreSQL has never done so. For the same reasons, we also don't force recompression of already-compressed values even if the target table prefers a different compression method than was used for the source data. These architectural decisions are perhaps arguable but revisiting them is well beyond the scope of what seemed possible to do as part of this project. However, it's relatively cheap to recompress as part of VACUUM FULL or CLUSTER, so this commit adjusts those commands to do so, if the configured compression method of the table happens not to match what was used for some column value stored therein. Dilip Kumar. The original patches on which this work was based were written by Ildus Kurbangaliev, and those were patches were based on even earlier work by Nikita Glukhov, but the design has since changed very substantially, since allow a potentially large number of compression methods that could be added and dropped on a running system proved too problematic given some of the architectural issues mentioned above; the choice of which specific compression method to add first is now different; and a lot of the code has been heavily refactored. More recently, Justin Przyby helped quite a bit with testing and reviewing and this version also includes some code contributions from him. Other design input and review from Tomas Vondra, Álvaro Herrera, Andres Freund, Oleg Bartunov, Alexander Korotkov, and me. Discussion: http://postgr.es/m/20170907194236.4cefce96%40wp.localdomain Discussion: http://postgr.es/m/CAFiTN-uUpX3ck%3DK0mLEk-G_kUQY%3DSNOTeqdaNRR9FMdQrHKebw%40mail.gmail.com
2021-03-19 20:10:38 +01:00
int no_toast_compression;
int no_unlogged_table_data;
int serializable_deferrable;
int disable_triggers;
int outputNoTableAm;
int outputNoTablespaces;
int use_setsessauth;
int enable_row_security;
int load_via_partition_root;
/* default, if no "inclusion" switches appear, is to dump everything */
bool include_everything;
int outputClean;
int outputCreateDB;
bool outputLOs;
bool dontOutputLOs;
int outputNoOwner;
char *outputSuperuser;
int sequence_data; /* dump sequence data even in schema-only mode */
int do_nothing;
} DumpOptions;
/*
* We may want to have some more user-readable data, but in the mean
* time this gives us some abstraction and type checking.
*/
typedef struct Archive
{
DumpOptions *dopt; /* options, if dumping */
RestoreOptions *ropt; /* options, if restoring */
int verbose;
char *remoteVersionStr; /* server's version string */
int remoteVersion; /* same in numeric form */
bool isStandby; /* is server a standby node */
int minRemoteVersion; /* allowable range */
int maxRemoteVersion;
int numWorkers; /* number of parallel processes */
char *sync_snapshot_id; /* sync snapshot id for parallel operation */
/* info needed for string escaping */
int encoding; /* libpq code for client_encoding */
bool std_strings; /* standard_conforming_strings */
/* other important stuff */
char *searchpath; /* search_path to set during restore */
char *use_role; /* Issue SET ROLE to this */
/* error handling */
bool exit_on_error; /* whether to exit on SQL errors... */
int n_errors; /* number of errors (if no die) */
/* prepared-query status */
bool *is_prepared; /* indexed by enum _dumpPreparedQueries */
/* The rest is private */
} Archive;
/*
* pg_dump uses two different mechanisms for identifying database objects:
*
* CatalogId represents an object by the tableoid and oid of its defining
* entry in the system catalogs. We need this to interpret pg_depend entries,
* for instance.
*
* DumpId is a simple sequential integer counter assigned as dumpable objects
* are identified during a pg_dump run. We use DumpId internally in preference
* to CatalogId for two reasons: it's more compact, and we can assign DumpIds
* to "objects" that don't have a separate CatalogId. For example, it is
* convenient to consider a table, its data, and its ACL as three separate
* dumpable "objects" with distinct DumpIds --- this lets us reason about the
* order in which to dump these things.
*/
typedef struct
{
/* Note: this struct must not contain any unused bytes */
Oid tableoid;
Oid oid;
} CatalogId;
typedef int DumpId;
#define InvalidDumpId 0
/*
* Function pointer prototypes for assorted callback methods.
*/
typedef int (*DataDumperPtr) (Archive *AH, const void *userArg);
typedef void (*SetupWorkerPtrType) (Archive *AH);
/*
* Main archiver interface.
*/
extern void ConnectDatabase(Archive *AHX,
const ConnParams *cparams,
bool isReconnect);
extern void DisconnectDatabase(Archive *AHX);
extern PGconn *GetConnection(Archive *AHX);
/* Called to write *data* to the archive */
extern void WriteData(Archive *AHX, const void *data, size_t dLen);
extern int StartLO(Archive *AHX, Oid oid);
extern int EndLO(Archive *AHX, Oid oid);
extern void CloseArchive(Archive *AHX);
extern void SetArchiveOptions(Archive *AH, DumpOptions *dopt, RestoreOptions *ropt);
extern void ProcessArchiveRestoreOptions(Archive *AHX);
extern void RestoreArchive(Archive *AHX);
/* Open an existing archive */
extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt);
/* Create a new archive */
extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
Switch pg_dump to use compression specifications Compression specifications are currently used by pg_basebackup and pg_receivewal, and are able to let the user control in an extended way the method and level of compression used. As an effect of this commit, pg_dump's -Z/--compress is now able to use more than just an integer, as of the grammar "method[:detail]". The method can be either "none" or "gzip", and can optionally take a detail string. If the detail string is only an integer, it defines the compression level. A comma-separated list of keywords can also be used method allows for more options, the only keyword supported now is "level". The change is backward-compatible, hence specifying only an integer leads to no compression for a level of 0 and gzip compression when the level is greater than 0. Most of the code changes are straight-forward, as pg_dump was relying on an integer tracking the compression level to check for gzip or no compression. These are changed to use a compression specification and the algorithm stored in it. As of this change, note that the dump format is not bumped because there is no need yet to track the compression algorithm in the TOC entries. Hence, we still rely on the compression level to make the difference when reading them. This will be mandatory once a new compression method is added, though. In order to keep the code simpler when parsing the compression specification, the code is changed so as pg_dump now fails hard when using gzip on -Z/--compress without its support compiled, rather than enforcing no compression without the user knowing about it except through a warning. Like before this commit, archive and custom formats are compressed by default when the code is compiled with gzip, and left uncompressed without gzip. Author: Georgios Kokolatos Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/O4mutIrCES8ZhlXJiMvzsivT7ztAMja2lkdL1LJx6O5f22I2W8PBIeLKz7mDLwxHoibcnRAYJXm1pH4tyUNC4a8eDzLn22a6Pb1S74Niexg=@pm.me
2022-12-02 02:45:02 +01:00
const pg_compress_specification compression_spec,
bool dosync, ArchiveMode mode,
SetupWorkerPtrType setupDumpWorker);
/* The --list option */
extern void PrintTOCSummary(Archive *AHX);
extern RestoreOptions *NewRestoreOptions(void);
extern DumpOptions *NewDumpOptions(void);
extern void InitDumpOptions(DumpOptions *opts);
extern DumpOptions *dumpOptionsFromRestoreOptions(RestoreOptions *ropt);
/* Rearrange and filter TOC entries */
extern void SortTocFromFile(Archive *AHX);
/* Convenience functions used only when writing DATA */
extern void archputs(const char *s, Archive *AH);
extern int archprintf(Archive *AH, const char *fmt,...) pg_attribute_printf(2, 3);
#define appendStringLiteralAH(buf,str,AH) \
appendStringLiteral(buf, str, (AH)->encoding, (AH)->std_strings)
#endif /* PG_BACKUP_H */