postgresql/src/bin/pg_dump/pg_backup_null.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

225 lines
5.0 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* pg_backup_null.c
*
* Implementation of an archive that is never saved; it is used by
* pg_dump to output a plain text SQL script instead of saving
* a real archive.
*
* See the headers to pg_restore for more details.
*
* Copyright (c) 2000, Philip Warner
* Rights are granted to use this software in any way so long
* as this notice is not removed.
*
* The author is not responsible for loss or damages that may
* result from its use.
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/bin/pg_dump/pg_backup_null.c
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include "fe_utils/string_utils.h"
#include "libpq/libpq-fs.h"
#include "pg_backup_archiver.h"
#include "pg_backup_utils.h"
static void _WriteData(ArchiveHandle *AH, const void *data, size_t dLen);
static void _WriteLOData(ArchiveHandle *AH, const void *data, size_t dLen);
static void _EndData(ArchiveHandle *AH, TocEntry *te);
static int _WriteByte(ArchiveHandle *AH, const int i);
static void _WriteBuf(ArchiveHandle *AH, const void *buf, size_t len);
static void _CloseArchive(ArchiveHandle *AH);
static void _PrintTocData(ArchiveHandle *AH, TocEntry *te);
static void _StartLOs(ArchiveHandle *AH, TocEntry *te);
static void _StartLO(ArchiveHandle *AH, TocEntry *te, Oid oid);
static void _EndLO(ArchiveHandle *AH, TocEntry *te, Oid oid);
static void _EndLOs(ArchiveHandle *AH, TocEntry *te);
/*
* Initializer
*/
void
InitArchiveFmt_Null(ArchiveHandle *AH)
{
/* Assuming static functions, this can be copied for each format. */
AH->WriteDataPtr = _WriteData;
AH->EndDataPtr = _EndData;
AH->WriteBytePtr = _WriteByte;
AH->WriteBufPtr = _WriteBuf;
AH->ClosePtr = _CloseArchive;
AH->ReopenPtr = NULL;
AH->PrintTocDataPtr = _PrintTocData;
AH->StartLOsPtr = _StartLOs;
AH->StartLOPtr = _StartLO;
AH->EndLOPtr = _EndLO;
AH->EndLOsPtr = _EndLOs;
AH->ClonePtr = NULL;
AH->DeClonePtr = NULL;
/*
* Now prevent reading...
*/
if (AH->mode == archModeRead)
pg_fatal("this format cannot be read");
}
/*
* - Start a new TOC entry
*/
/*
* Called by dumper via archiver from within a data dump routine
*/
static void
_WriteData(ArchiveHandle *AH, const void *data, size_t dLen)
{
/* Just send it to output, ahwrite() already errors on failure */
ahwrite(data, 1, dLen, AH);
}
/*
* Called by dumper via archiver from within a data dump routine
* We substitute this for _WriteData while emitting a LO
*/
static void
_WriteLOData(ArchiveHandle *AH, const void *data, size_t dLen)
{
if (dLen > 0)
{
PQExpBuffer buf = createPQExpBuffer();
appendByteaLiteralAHX(buf,
(const unsigned char *) data,
dLen,
AH);
ahprintf(AH, "SELECT pg_catalog.lowrite(0, %s);\n", buf->data);
destroyPQExpBuffer(buf);
}
}
static void
_EndData(ArchiveHandle *AH, TocEntry *te)
{
ahprintf(AH, "\n\n");
}
/*
Rearrange pg_dump's handling of large objects for better efficiency. Commit c0d5be5d6 caused pg_dump to create a separate BLOB metadata TOC entry for each large object (blob), but it did not touch the ancient decision to put all the blobs' data into a single "BLOBS" TOC entry. This is bad for a few reasons: for databases with millions of blobs, the TOC becomes unreasonably large, causing performance issues; selective restore of just some blobs is quite impossible; and we cannot parallelize either dump or restore of the blob data, since our architecture for that relies on farming out whole TOC entries to worker processes. To improve matters, let's group multiple blobs into each blob metadata TOC entry, and then make corresponding per-group blob data TOC entries. Selective restore using pg_restore's -l/-L switches is then possible, though only at the group level. (Perhaps we should provide a switch to allow forcing one-blob-per-group for users who need precise selective restore and don't have huge numbers of blobs. This patch doesn't do that, instead just hard-wiring the maximum number of blobs per entry at 1000.) The blobs in a group must all have the same owner, since the TOC entry format only allows one owner to be named. In this implementation we also require them to all share the same ACL (grants); the archive format wouldn't require that, but pg_dump's representation of DumpableObjects does. It seems unlikely that either restriction will be problematic for databases with huge numbers of blobs. The metadata TOC entries now have a "desc" string of "BLOB METADATA", and their "defn" string is just a newline-separated list of blob OIDs. The restore code has to generate creation commands, ALTER OWNER commands, and drop commands (for --clean mode) from that. We would need special-case code for ALTER OWNER and drop in any case, so the alternative of keeping the "defn" as directly executable SQL code for creation wouldn't buy much, and it seems like it'd bloat the archive to little purpose. Since we require the blobs of a metadata group to share the same ACL, we can furthermore store only one copy of that ACL, and then make pg_restore regenerate the appropriate commands for each blob. This saves space in the dump file not only by removing duplicative SQL command strings, but by not needing a separate TOC entry for each blob's ACL. In turn, that reduces client-side memory requirements for handling many blobs. ACL TOC entries that need this special processing are labeled as "ACL"/"LARGE OBJECTS nnn..nnn". If we have a blob with a unique ACL, continue to label it as "ACL"/"LARGE OBJECT nnn". We don't actually have to make such a distinction, but it saves a few cycles during restore for the easy case, and it seems like a good idea to not change the TOC contents unnecessarily. The data TOC entries ("BLOBS") are exactly the same as before, except that now there can be more than one, so we'd better give them identifying tag strings. Also, commit c0d5be5d6 put the new BLOB metadata TOC entries into SECTION_PRE_DATA, which perhaps is defensible in some ways, but it's a rather odd choice considering that we go out of our way to treat blobs as data. Moreover, because parallel restore handles the PRE_DATA section serially, this means we'd only get part of the parallelism speedup we could hope for. Move these entries into SECTION_DATA, letting us parallelize the lo_create calls not just the data loading when there are many blobs. Add dependencies to ensure that we won't try to load data for a blob we've not yet created. As this stands, we still generate a separate TOC entry for any comment or security label attached to a blob. I feel comfortable in believing that comments and security labels on blobs are rare, so this patch should be enough to get most of the useful TOC compression for blobs. We have to bump the archive file format version number, since existing versions of pg_restore wouldn't know they need to do something special for BLOB METADATA, plus they aren't going to work correctly with multiple BLOBS entries or multiple-large-object ACL entries. The directory and tar-file format handlers need some work for multiple BLOBS entries: they used to hard-wire the file name as "blobs.toc", which is replaced here with "blobs_<dumpid>.toc". The 002_pg_dump.pl test script also knows about that and requires minor updates. (I had to drop the test for manually-compressed blobs.toc files with LZ4, because lz4's obtuse command line design requires explicit specification of the output file name which seems impractical here. I don't think we're losing any useful test coverage thereby; that test stanza seems completely duplicative with the gzip and zstd cases anyway.) In passing, centralize management of the lo_buf used to hold data while restoring blobs. The code previously had each format handler create lo_buf, which seems rather pointless given that the format handlers all make it the same way. Moreover, the format handlers never use lo_buf directly, making this setup a failure from a separation-of-concerns standpoint. Let's move the responsibility into pg_backup_archiver.c, which is the only module concerned with lo_buf. The reason to do this in this patch is that it allows a centralized fix for the now-false assumption that we never restore blobs in parallel. Also, get rid of dead code in DropLOIfExists: it's been a long time since we had any need to be able to restore to a pre-9.0 server. Discussion: https://postgr.es/m/a9f9376f1c3343a6bb319dce294e20ac@EX13D05UWC001.ant.amazon.com
2024-04-01 22:25:56 +02:00
* Called by the archiver when starting to save BLOB DATA (not schema).
* This routine should save whatever format-specific information is needed
* to read the LOs back into memory.
*
* It is called just prior to the dumper's DataDumper routine.
*
* Optional, but strongly recommended.
*/
static void
_StartLOs(ArchiveHandle *AH, TocEntry *te)
{
ahprintf(AH, "BEGIN;\n\n");
}
/*
* Called by the archiver when the dumper calls StartLO.
*
* Mandatory.
*
* Must save the passed OID for retrieval at restore-time.
*/
static void
_StartLO(ArchiveHandle *AH, TocEntry *te, Oid oid)
{
bool old_lo_style = (AH->version < K_VERS_1_12);
if (oid == 0)
pg_fatal("invalid OID for large object");
/* With an old archive we must do drop and create logic here */
if (old_lo_style && AH->public.ropt->dropSchema)
DropLOIfExists(AH, oid);
if (old_lo_style)
ahprintf(AH, "SELECT pg_catalog.lo_open(pg_catalog.lo_create('%u'), %d);\n",
oid, INV_WRITE);
else
ahprintf(AH, "SELECT pg_catalog.lo_open('%u', %d);\n",
oid, INV_WRITE);
AH->WriteDataPtr = _WriteLOData;
}
/*
* Called by the archiver when the dumper calls EndLO.
*
* Optional.
*/
static void
_EndLO(ArchiveHandle *AH, TocEntry *te, Oid oid)
{
AH->WriteDataPtr = _WriteData;
2005-10-15 04:49:52 +02:00
ahprintf(AH, "SELECT pg_catalog.lo_close(0);\n\n");
}
/*
Rearrange pg_dump's handling of large objects for better efficiency. Commit c0d5be5d6 caused pg_dump to create a separate BLOB metadata TOC entry for each large object (blob), but it did not touch the ancient decision to put all the blobs' data into a single "BLOBS" TOC entry. This is bad for a few reasons: for databases with millions of blobs, the TOC becomes unreasonably large, causing performance issues; selective restore of just some blobs is quite impossible; and we cannot parallelize either dump or restore of the blob data, since our architecture for that relies on farming out whole TOC entries to worker processes. To improve matters, let's group multiple blobs into each blob metadata TOC entry, and then make corresponding per-group blob data TOC entries. Selective restore using pg_restore's -l/-L switches is then possible, though only at the group level. (Perhaps we should provide a switch to allow forcing one-blob-per-group for users who need precise selective restore and don't have huge numbers of blobs. This patch doesn't do that, instead just hard-wiring the maximum number of blobs per entry at 1000.) The blobs in a group must all have the same owner, since the TOC entry format only allows one owner to be named. In this implementation we also require them to all share the same ACL (grants); the archive format wouldn't require that, but pg_dump's representation of DumpableObjects does. It seems unlikely that either restriction will be problematic for databases with huge numbers of blobs. The metadata TOC entries now have a "desc" string of "BLOB METADATA", and their "defn" string is just a newline-separated list of blob OIDs. The restore code has to generate creation commands, ALTER OWNER commands, and drop commands (for --clean mode) from that. We would need special-case code for ALTER OWNER and drop in any case, so the alternative of keeping the "defn" as directly executable SQL code for creation wouldn't buy much, and it seems like it'd bloat the archive to little purpose. Since we require the blobs of a metadata group to share the same ACL, we can furthermore store only one copy of that ACL, and then make pg_restore regenerate the appropriate commands for each blob. This saves space in the dump file not only by removing duplicative SQL command strings, but by not needing a separate TOC entry for each blob's ACL. In turn, that reduces client-side memory requirements for handling many blobs. ACL TOC entries that need this special processing are labeled as "ACL"/"LARGE OBJECTS nnn..nnn". If we have a blob with a unique ACL, continue to label it as "ACL"/"LARGE OBJECT nnn". We don't actually have to make such a distinction, but it saves a few cycles during restore for the easy case, and it seems like a good idea to not change the TOC contents unnecessarily. The data TOC entries ("BLOBS") are exactly the same as before, except that now there can be more than one, so we'd better give them identifying tag strings. Also, commit c0d5be5d6 put the new BLOB metadata TOC entries into SECTION_PRE_DATA, which perhaps is defensible in some ways, but it's a rather odd choice considering that we go out of our way to treat blobs as data. Moreover, because parallel restore handles the PRE_DATA section serially, this means we'd only get part of the parallelism speedup we could hope for. Move these entries into SECTION_DATA, letting us parallelize the lo_create calls not just the data loading when there are many blobs. Add dependencies to ensure that we won't try to load data for a blob we've not yet created. As this stands, we still generate a separate TOC entry for any comment or security label attached to a blob. I feel comfortable in believing that comments and security labels on blobs are rare, so this patch should be enough to get most of the useful TOC compression for blobs. We have to bump the archive file format version number, since existing versions of pg_restore wouldn't know they need to do something special for BLOB METADATA, plus they aren't going to work correctly with multiple BLOBS entries or multiple-large-object ACL entries. The directory and tar-file format handlers need some work for multiple BLOBS entries: they used to hard-wire the file name as "blobs.toc", which is replaced here with "blobs_<dumpid>.toc". The 002_pg_dump.pl test script also knows about that and requires minor updates. (I had to drop the test for manually-compressed blobs.toc files with LZ4, because lz4's obtuse command line design requires explicit specification of the output file name which seems impractical here. I don't think we're losing any useful test coverage thereby; that test stanza seems completely duplicative with the gzip and zstd cases anyway.) In passing, centralize management of the lo_buf used to hold data while restoring blobs. The code previously had each format handler create lo_buf, which seems rather pointless given that the format handlers all make it the same way. Moreover, the format handlers never use lo_buf directly, making this setup a failure from a separation-of-concerns standpoint. Let's move the responsibility into pg_backup_archiver.c, which is the only module concerned with lo_buf. The reason to do this in this patch is that it allows a centralized fix for the now-false assumption that we never restore blobs in parallel. Also, get rid of dead code in DropLOIfExists: it's been a long time since we had any need to be able to restore to a pre-9.0 server. Discussion: https://postgr.es/m/a9f9376f1c3343a6bb319dce294e20ac@EX13D05UWC001.ant.amazon.com
2024-04-01 22:25:56 +02:00
* Called by the archiver when finishing saving BLOB DATA.
*
* Optional.
*/
static void
_EndLOs(ArchiveHandle *AH, TocEntry *te)
{
ahprintf(AH, "COMMIT;\n\n");
}
/*------
* Called as part of a RestoreArchive call; for the NULL archive, this
* just sends the data for a given TOC entry to the output.
*------
*/
static void
_PrintTocData(ArchiveHandle *AH, TocEntry *te)
{
2001-03-25 00:11:14 +01:00
if (te->dataDumper)
{
AH->currToc = te;
if (strcmp(te->desc, "BLOBS") == 0)
_StartLOs(AH, te);
te->dataDumper((Archive *) AH, te->dataDumperArg);
if (strcmp(te->desc, "BLOBS") == 0)
_EndLOs(AH, te);
AH->currToc = NULL;
}
}
static int
_WriteByte(ArchiveHandle *AH, const int i)
{
/* Don't do anything */
return 0;
}
static void
_WriteBuf(ArchiveHandle *AH, const void *buf, size_t len)
{
/* Don't do anything */
}
static void
_CloseArchive(ArchiveHandle *AH)
{
/* Nothing to do */
}