Use TRUNCATE to preserve relfilenode for pg_largeobject + index.
Commit9a974cbcba
arranged to preserve the relfilenode of user tables across pg_upgrade, but failed to notice that pg_upgrade treats pg_largeobject as a user table and thus it needs the same treatment. Otherwise, large objects will appear to vanish after a pg_upgrade. Commitd498e052b4
fixed this problem by teaching pg_dump to UPDATE pg_class.relfilenode for pg_largeobject and its index. However, because an UPDATE on the catalog rows doesn't change anything on disk, this can leave stray files behind in the new cluster. They will normally be empty, but it's a little bit untidy. Hence, this commit arranges to do the same thing using DDL. Specifically, it makes TRUNCATE work for the pg_largeobject catalog when in binary-upgrade mode, and it then uses that command in binary-upgrade dumps as a way of setting pg_class.relfilenode for pg_largeobject and its index. That way, the old files are removed from the new cluster. Discussion: http://postgr.es/m/CA+TgmoYYMXGUJO5GZk1-MByJGu_bB8CbOL6GJQC8=Bzt6x6vDg@mail.gmail.com
This commit is contained in:
parent
e09d7a1262
commit
bbe08b8869
|
@ -40,6 +40,7 @@
|
|||
#include "catalog/pg_depend.h"
|
||||
#include "catalog/pg_foreign_table.h"
|
||||
#include "catalog/pg_inherits.h"
|
||||
#include "catalog/pg_largeobject.h"
|
||||
#include "catalog/pg_namespace.h"
|
||||
#include "catalog/pg_opclass.h"
|
||||
#include "catalog/pg_statistic_ext.h"
|
||||
|
@ -2185,7 +2186,15 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
|
|||
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
||||
errmsg("\"%s\" is not a table", relname)));
|
||||
|
||||
if (!allowSystemTableMods && IsSystemClass(relid, reltuple))
|
||||
/*
|
||||
* Most system catalogs can't be truncated at all, or at least not unless
|
||||
* allow_system_table_mods=on. As an exception, however, we allow
|
||||
* pg_largeobject to be truncated as part of pg_upgrade, because we need
|
||||
* to change its relfilenode to match the old cluster, and allowing a
|
||||
* TRUNCATE command to be executed is the easiest way of doing that.
|
||||
*/
|
||||
if (!allowSystemTableMods && IsSystemClass(relid, reltuple)
|
||||
&& (!IsBinaryUpgrade || relid != LargeObjectRelationId))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
||||
errmsg("permission denied: \"%s\" is a system catalog",
|
||||
|
|
|
@ -319,6 +319,7 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forkNum, bool isRedo)
|
|||
{
|
||||
char *path;
|
||||
int ret;
|
||||
BlockNumber segno = 0;
|
||||
|
||||
path = relpath(rlocator, forkNum);
|
||||
|
||||
|
@ -353,8 +354,22 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forkNum, bool isRedo)
|
|||
/* Prevent other backends' fds from holding on to the disk space */
|
||||
ret = do_truncate(path);
|
||||
|
||||
/* Register request to unlink first segment later */
|
||||
register_unlink_segment(rlocator, forkNum, 0 /* first seg */ );
|
||||
/*
|
||||
* Except during a binary upgrade, register request to unlink first
|
||||
* segment later, rather than now.
|
||||
*
|
||||
* If we're performing a binary upgrade, the dangers described in the
|
||||
* header comments for mdunlink() do not exist, since after a crash
|
||||
* or even a simple ERROR, the upgrade fails and the whole new cluster
|
||||
* must be recreated from scratch. And, on the other hand, it is
|
||||
* important to remove the files from disk immediately, because we
|
||||
* may be about to reuse the same relfilenumber.
|
||||
*/
|
||||
if (!IsBinaryUpgrade)
|
||||
{
|
||||
register_unlink_segment(rlocator, forkNum, 0 /* first seg */ );
|
||||
++segno;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -363,15 +378,17 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forkNum, bool isRedo)
|
|||
if (ret >= 0)
|
||||
{
|
||||
char *segpath = (char *) palloc(strlen(path) + 12);
|
||||
BlockNumber segno;
|
||||
|
||||
/*
|
||||
* Note that because we loop until getting ENOENT, we will correctly
|
||||
* remove all inactive segments as well as active ones.
|
||||
*/
|
||||
for (segno = 1;; segno++)
|
||||
for (;; segno++)
|
||||
{
|
||||
sprintf(segpath, "%s.%u", path, segno);
|
||||
if (segno == 0)
|
||||
strcpy(segpath, path);
|
||||
else
|
||||
sprintf(segpath, "%s.%u", path, segno);
|
||||
|
||||
if (!RelFileLocatorBackendIsTemp(rlocator))
|
||||
{
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include "access/tupdesc_details.h"
|
||||
#include "access/xact.h"
|
||||
#include "access/xlog.h"
|
||||
#include "catalog/binary_upgrade.h"
|
||||
#include "catalog/catalog.h"
|
||||
#include "catalog/indexing.h"
|
||||
#include "catalog/namespace.h"
|
||||
|
@ -3707,9 +3708,36 @@ RelationSetNewRelfilenumber(Relation relation, char persistence)
|
|||
TransactionId freezeXid = InvalidTransactionId;
|
||||
RelFileLocator newrlocator;
|
||||
|
||||
/* Allocate a new relfilenumber */
|
||||
newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace,
|
||||
NULL, persistence);
|
||||
if (!IsBinaryUpgrade)
|
||||
{
|
||||
/* Allocate a new relfilenumber */
|
||||
newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace,
|
||||
NULL, persistence);
|
||||
}
|
||||
else if (relation->rd_rel->relkind == RELKIND_INDEX)
|
||||
{
|
||||
if (!OidIsValid(binary_upgrade_next_index_pg_class_relfilenumber))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("index relfilenumber value not set when in binary upgrade mode")));
|
||||
|
||||
newrelfilenumber = binary_upgrade_next_index_pg_class_relfilenumber;
|
||||
binary_upgrade_next_index_pg_class_relfilenumber = InvalidOid;
|
||||
}
|
||||
else if (relation->rd_rel->relkind == RELKIND_RELATION)
|
||||
{
|
||||
if (!OidIsValid(binary_upgrade_next_heap_pg_class_relfilenumber))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("heap relfilenumber value not set when in binary upgrade mode")));
|
||||
|
||||
newrelfilenumber = binary_upgrade_next_heap_pg_class_relfilenumber;
|
||||
binary_upgrade_next_heap_pg_class_relfilenumber = InvalidOid;
|
||||
}
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("unexpected request for new relfilenumber in binary upgrade mode")));
|
||||
|
||||
/*
|
||||
* Get a writable copy of the pg_class tuple for the given relation.
|
||||
|
@ -3724,9 +3752,37 @@ RelationSetNewRelfilenumber(Relation relation, char persistence)
|
|||
classform = (Form_pg_class) GETSTRUCT(tuple);
|
||||
|
||||
/*
|
||||
* Schedule unlinking of the old storage at transaction commit.
|
||||
* Schedule unlinking of the old storage at transaction commit, except
|
||||
* when performing a binary upgrade, when we must do it immediately.
|
||||
*/
|
||||
RelationDropStorage(relation);
|
||||
if (IsBinaryUpgrade)
|
||||
{
|
||||
SMgrRelation srel;
|
||||
|
||||
/*
|
||||
* During a binary upgrade, we use this code path to ensure that
|
||||
* pg_largeobject and its index have the same relfilenumbers as in
|
||||
* the old cluster. This is necessary because pg_upgrade treats
|
||||
* pg_largeobject like a user table, not a system table. It is however
|
||||
* possible that a table or index may need to end up with the same
|
||||
* relfilenumber in the new cluster as what it had in the old cluster.
|
||||
* Hence, we can't wait until commit time to remove the old storage.
|
||||
*
|
||||
* In general, this function needs to have transactional semantics,
|
||||
* and removing the old storage before commit time surely isn't.
|
||||
* However, it doesn't really matter, because if a binary upgrade
|
||||
* fails at this stage, the new cluster will need to be recreated
|
||||
* anyway.
|
||||
*/
|
||||
srel = smgropen(relation->rd_locator, relation->rd_backend);
|
||||
smgrdounlinkall(&srel, 1, false);
|
||||
smgrclose(srel);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Not a binary upgrade, so just schedule it to happen later. */
|
||||
RelationDropStorage(relation);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create storage for the main fork of the new relfilenumber. If it's a
|
||||
|
|
|
@ -3141,6 +3141,7 @@ dumpDatabase(Archive *fout)
|
|||
PGresult *lo_res;
|
||||
PQExpBuffer loFrozenQry = createPQExpBuffer();
|
||||
PQExpBuffer loOutQry = createPQExpBuffer();
|
||||
PQExpBuffer loVacQry = createPQExpBuffer();
|
||||
int i_relfrozenxid,
|
||||
i_relfilenode,
|
||||
i_oid,
|
||||
|
@ -3167,15 +3168,36 @@ dumpDatabase(Archive *fout)
|
|||
i_relfilenode = PQfnumber(lo_res, "relfilenode");
|
||||
i_oid = PQfnumber(lo_res, "oid");
|
||||
|
||||
appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, preserve values for pg_largeobject and its index\n");
|
||||
appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, set pg_largeobject relfrozenxid and relminmxid\n");
|
||||
appendPQExpBufferStr(loVacQry, "\n-- For binary upgrade, preserve pg_largeobject and index relfilenodes\n");
|
||||
for (int i = 0; i < PQntuples(lo_res); ++i)
|
||||
{
|
||||
Oid oid;
|
||||
RelFileNumber relfilenumber;
|
||||
|
||||
appendPQExpBuffer(loOutQry, "UPDATE pg_catalog.pg_class\n"
|
||||
"SET relfrozenxid = '%u', relminmxid = '%u', relfilenode = '%u'\n"
|
||||
"SET relfrozenxid = '%u', relminmxid = '%u'\n"
|
||||
"WHERE oid = %u;\n",
|
||||
atooid(PQgetvalue(lo_res, i, i_relfrozenxid)),
|
||||
atooid(PQgetvalue(lo_res, i, i_relminmxid)),
|
||||
atooid(PQgetvalue(lo_res, i, i_relfilenode)),
|
||||
atooid(PQgetvalue(lo_res, i, i_oid)));
|
||||
atooid(PQgetvalue(lo_res, i, i_relfilenode)));
|
||||
|
||||
oid = atooid(PQgetvalue(lo_res, i, i_oid));
|
||||
relfilenumber = atooid(PQgetvalue(lo_res, i, i_relfilenode));
|
||||
|
||||
if (oid == LargeObjectRelationId)
|
||||
appendPQExpBuffer(loVacQry,
|
||||
"SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n",
|
||||
relfilenumber);
|
||||
else if (oid == LargeObjectLOidPNIndexId)
|
||||
appendPQExpBuffer(loVacQry,
|
||||
"SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n",
|
||||
relfilenumber);
|
||||
}
|
||||
|
||||
appendPQExpBufferStr(loVacQry,
|
||||
"TRUNCATE pg_catalog.pg_largeobject;\n");
|
||||
appendPQExpBufferStr(loOutQry, loVacQry->data);
|
||||
|
||||
ArchiveEntry(fout, nilCatalogId, createDumpId(),
|
||||
ARCHIVE_OPTS(.tag = "pg_largeobject",
|
||||
|
@ -3187,6 +3209,7 @@ dumpDatabase(Archive *fout)
|
|||
|
||||
destroyPQExpBuffer(loFrozenQry);
|
||||
destroyPQExpBuffer(loOutQry);
|
||||
destroyPQExpBuffer(loVacQry);
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
|
Loading…
Reference in New Issue