Increase width of RelFileNumbers from 32 bits to 56 bits.

RelFileNumbers are now assigned using a separate counter, instead of
being assigned from the OID counter. This counter never wraps around:
if all 2^56 possible RelFileNumbers are used, an internal error
occurs. As the cluster is limited to 2^64 total bytes of WAL, this
limitation should not cause a problem in practice.

If the counter were 64 bits wide rather than 56 bits wide, we would
need to increase the width of the BufferTag, which might adversely
impact buffer lookup performance. Also, this lets us use bigint for
pg_class.relfilenode and other places where these values are exposed
at the SQL level without worrying about overflow.

This should remove the need to keep "tombstone" files around until
the next checkpoint when relations are removed. We do that to keep
RelFileNumbers from being recycled, but now that won't happen
anyway. However, this patch doesn't actually change anything in
this area; it just makes it possible for a future patch to do so.

Dilip Kumar, based on an idea from Andres Freund, who also reviewed
some earlier versions of the patch. Further review and some
wordsmithing by me. Also reviewed at various points by Ashutosh
Sharma, Vignesh C, Amul Sul, Álvaro Herrera, and Tom Lane.

Discussion: http://postgr.es/m/CA+Tgmobp7+7kmi4gkq7Y+4AM9fTvL+O1oQ4-5gFTT+6Ng-dQ=g@mail.gmail.com
This commit is contained in:
Robert Haas 2022-09-27 13:25:21 -04:00
parent 2f47715cc8
commit 05d4cbf9b6
70 changed files with 693 additions and 289 deletions

View File

@ -6,8 +6,8 @@ OBJS = \
pg_buffercache_pages.o pg_buffercache_pages.o
EXTENSION = pg_buffercache EXTENSION = pg_buffercache
DATA = pg_buffercache--1.2.sql pg_buffercache--1.2--1.3.sql \ DATA = pg_buffercache--1.0--1.1.sql pg_buffercache--1.1--1.2.sql pg_buffercache--1.2.sql \
pg_buffercache--1.1--1.2.sql pg_buffercache--1.0--1.1.sql pg_buffercache--1.2--1.3.sql pg_buffercache--1.3--1.4.sql
PGFILEDESC = "pg_buffercache - monitoring of shared buffer cache in real-time" PGFILEDESC = "pg_buffercache - monitoring of shared buffer cache in real-time"
REGRESS = pg_buffercache REGRESS = pg_buffercache

View File

@ -0,0 +1,30 @@
/* contrib/pg_buffercache/pg_buffercache--1.3--1.4.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION pg_buffercache UPDATE TO '1.4'" to load this file. \quit
/* First we have to remove them from the extension */
ALTER EXTENSION pg_buffercache DROP VIEW pg_buffercache;
ALTER EXTENSION pg_buffercache DROP FUNCTION pg_buffercache_pages();
/* Then we can drop them */
DROP VIEW pg_buffercache;
DROP FUNCTION pg_buffercache_pages();
/* Now redefine */
CREATE FUNCTION pg_buffercache_pages()
RETURNS SETOF RECORD
AS 'MODULE_PATHNAME', 'pg_buffercache_pages_v1_4'
LANGUAGE C PARALLEL SAFE;
CREATE VIEW pg_buffercache AS
SELECT P.* FROM pg_buffercache_pages() AS P
(bufferid integer, relfilenode int8, reltablespace oid, reldatabase oid,
relforknumber int2, relblocknumber int8, isdirty bool, usagecount int2,
pinning_backends int4);
-- Don't want these to be available to public.
REVOKE ALL ON FUNCTION pg_buffercache_pages() FROM PUBLIC;
REVOKE ALL ON pg_buffercache FROM PUBLIC;
GRANT EXECUTE ON FUNCTION pg_buffercache_pages() TO pg_monitor;
GRANT SELECT ON pg_buffercache TO pg_monitor;

View File

@ -1,5 +1,5 @@
# pg_buffercache extension # pg_buffercache extension
comment = 'examine the shared buffer cache' comment = 'examine the shared buffer cache'
default_version = '1.3' default_version = '1.4'
module_pathname = '$libdir/pg_buffercache' module_pathname = '$libdir/pg_buffercache'
relocatable = true relocatable = true

View File

@ -59,9 +59,10 @@ typedef struct
* relation node/tablespace/database/blocknum and dirty indicator. * relation node/tablespace/database/blocknum and dirty indicator.
*/ */
PG_FUNCTION_INFO_V1(pg_buffercache_pages); PG_FUNCTION_INFO_V1(pg_buffercache_pages);
PG_FUNCTION_INFO_V1(pg_buffercache_pages_v1_4);
Datum static Datum
pg_buffercache_pages(PG_FUNCTION_ARGS) pg_buffercache_pages_internal(PG_FUNCTION_ARGS, Oid rfn_typid)
{ {
FuncCallContext *funcctx; FuncCallContext *funcctx;
Datum result; Datum result;
@ -103,7 +104,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid", TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
INT4OID, -1, 0); INT4OID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode", TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
OIDOID, -1, 0); rfn_typid, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace", TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
OIDOID, -1, 0); OIDOID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase", TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
@ -209,7 +210,24 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
} }
else else
{ {
values[1] = ObjectIdGetDatum(fctx->record[i].relfilenumber); if (rfn_typid == INT8OID)
values[1] =
Int64GetDatum((int64) fctx->record[i].relfilenumber);
else
{
Assert(rfn_typid == OIDOID);
if (fctx->record[i].relfilenumber > OID_MAX)
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("relfilenode %llu is too large to be represented as an OID",
(unsigned long long) fctx->record[i].relfilenumber),
errhint("Upgrade the extension using ALTER EXTENSION pg_buffercache UPDATE"));
values[1] =
ObjectIdGetDatum((Oid) fctx->record[i].relfilenumber);
}
nulls[1] = false; nulls[1] = false;
values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace); values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
nulls[2] = false; nulls[2] = false;
@ -237,3 +255,16 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
else else
SRF_RETURN_DONE(funcctx); SRF_RETURN_DONE(funcctx);
} }
/* entry point for old extension version */
Datum
pg_buffercache_pages(PG_FUNCTION_ARGS)
{
return pg_buffercache_pages_internal(fcinfo, OIDOID);
}
Datum
pg_buffercache_pages_v1_4(PG_FUNCTION_ARGS)
{
return pg_buffercache_pages_internal(fcinfo, INT8OID);
}

View File

@ -345,7 +345,7 @@ apw_load_buffers(void)
{ {
unsigned forknum; unsigned forknum;
if (fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database, if (fscanf(file, "%u,%u," UINT64_FORMAT ",%u,%u\n", &blkinfo[i].database,
&blkinfo[i].tablespace, &blkinfo[i].filenumber, &blkinfo[i].tablespace, &blkinfo[i].filenumber,
&forknum, &blkinfo[i].blocknum) != 5) &forknum, &blkinfo[i].blocknum) != 5)
ereport(ERROR, ereport(ERROR,
@ -669,7 +669,7 @@ apw_dump_now(bool is_bgworker, bool dump_unlogged)
{ {
CHECK_FOR_INTERRUPTS(); CHECK_FOR_INTERRUPTS();
ret = fprintf(file, "%u,%u,%u,%u,%u\n", ret = fprintf(file, "%u,%u," UINT64_FORMAT ",%u,%u\n",
block_info_array[i].database, block_info_array[i].database,
block_info_array[i].tablespace, block_info_array[i].tablespace,
block_info_array[i].filenumber, block_info_array[i].filenumber,

View File

@ -54,9 +54,9 @@ SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats_till_end_of_wal(:'wal_lsn1');
-- =================================================================== -- ===================================================================
-- Test for filtering out WAL records of a particular table -- Test for filtering out WAL records of a particular table
-- =================================================================== -- ===================================================================
SELECT oid AS sample_tbl_oid FROM pg_class WHERE relname = 'sample_tbl' \gset SELECT relfilenode AS sample_tbl_relfilenode FROM pg_class WHERE relname = 'sample_tbl' \gset
SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2') SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2')
WHERE block_ref LIKE concat('%', :'sample_tbl_oid', '%') AND resource_manager = 'Heap'; WHERE block_ref LIKE concat('%', :'sample_tbl_relfilenode', '%') AND resource_manager = 'Heap';
ok ok
---- ----
t t

View File

@ -39,10 +39,10 @@ SELECT COUNT(*) >= 0 AS ok FROM pg_get_wal_stats_till_end_of_wal(:'wal_lsn1');
-- Test for filtering out WAL records of a particular table -- Test for filtering out WAL records of a particular table
-- =================================================================== -- ===================================================================
SELECT oid AS sample_tbl_oid FROM pg_class WHERE relname = 'sample_tbl' \gset SELECT relfilenode AS sample_tbl_relfilenode FROM pg_class WHERE relname = 'sample_tbl' \gset
SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2') SELECT COUNT(*) >= 1 AS ok FROM pg_get_wal_records_info(:'wal_lsn1', :'wal_lsn2')
WHERE block_ref LIKE concat('%', :'sample_tbl_oid', '%') AND resource_manager = 'Heap'; WHERE block_ref LIKE concat('%', :'sample_tbl_relfilenode', '%') AND resource_manager = 'Heap';
-- =================================================================== -- ===================================================================
-- Test for filtering out WAL records based on resource_manager and -- Test for filtering out WAL records based on resource_manager and

View File

@ -1984,7 +1984,7 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
<row> <row>
<entry role="catalog_table_entry"><para role="column_definition"> <entry role="catalog_table_entry"><para role="column_definition">
<structfield>relfilenode</structfield> <type>oid</type> <structfield>relfilenode</structfield> <type>int8</type>
</para> </para>
<para> <para>
Name of the on-disk file of this relation; zero means this Name of the on-disk file of this relation; zero means this

View File

@ -25210,6 +25210,11 @@ SELECT collation for ('foo' COLLATE "de_DE");
<entry><type>timestamp with time zone</type></entry> <entry><type>timestamp with time zone</type></entry>
</row> </row>
<row>
<entry><structfield>next_relfilenumber</structfield></entry>
<entry><type>timestamp with time zone</type></entry>
</row>
</tbody> </tbody>
</tgroup> </tgroup>
</table> </table>

View File

@ -62,7 +62,7 @@
<row> <row>
<entry role="catalog_table_entry"><para role="column_definition"> <entry role="catalog_table_entry"><para role="column_definition">
<structfield>relfilenode</structfield> <type>oid</type> <structfield>relfilenode</structfield> <type>int8</type>
(references <link linkend="catalog-pg-class"><structname>pg_class</structname></link>.<structfield>relfilenode</structfield>) (references <link linkend="catalog-pg-class"><structname>pg_class</structname></link>.<structfield>relfilenode</structfield>)
</para> </para>
<para> <para>

View File

@ -217,11 +217,12 @@ with the suffix <literal>_init</literal> (see <xref linkend="storage-init"/>).
<caution> <caution>
<para> <para>
Note that while a table's filenode often matches its OID, this is Note that a table's filenode will normally be different than the OID. For
<emphasis>not</emphasis> necessarily the case; some operations, like system tables, the initial filenode will be equal to the table OID, but it will
<command>TRUNCATE</command>, <command>REINDEX</command>, <command>CLUSTER</command> and some forms be different if the table has ever been subjected to a rewriting operation,
of <command>ALTER TABLE</command>, can change the filenode while preserving the OID. such as <command>TRUNCATE</command>, <command>REINDEX</command>,
Avoid assuming that filenode and table OID are the same. <command>CLUSTER</command> or some forms of <command>ALTER TABLE</command>.
For user tables, even the initial filenode will be different than the table OID.
Also, for certain system catalogs including <structname>pg_class</structname> itself, Also, for certain system catalogs including <structname>pg_class</structname> itself,
<structname>pg_class</structname>.<structfield>relfilenode</structfield> contains zero. The <structname>pg_class</structname>.<structfield>relfilenode</structfield> contains zero. The
actual filenode number of these catalogs is stored in a lower-level data actual filenode number of these catalogs is stored in a lower-level data

View File

@ -100,7 +100,7 @@ ginRedoInsertEntry(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rda
BlockNumber blknum; BlockNumber blknum;
BufferGetTag(buffer, &locator, &forknum, &blknum); BufferGetTag(buffer, &locator, &forknum, &blknum);
elog(ERROR, "failed to add item to index page in %u/%u/%u", elog(ERROR, "failed to add item to index page in %u/%u/" UINT64_FORMAT,
locator.spcOid, locator.dbOid, locator.relNumber); locator.spcOid, locator.dbOid, locator.relNumber);
} }
} }

View File

@ -26,7 +26,7 @@ out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec)
static void static void
out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec) out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec)
{ {
appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %u:%u", appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT "; blk %u; latestRemovedXid %u:%u",
xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.spcOid, xlrec->locator.dbOid,
xlrec->locator.relNumber, xlrec->block, xlrec->locator.relNumber, xlrec->block,
EpochFromFullTransactionId(xlrec->latestRemovedFullXid), EpochFromFullTransactionId(xlrec->latestRemovedFullXid),

View File

@ -169,7 +169,7 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
{ {
xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec; xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec;
appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u", appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT "; tid %u/%u",
xlrec->target_locator.spcOid, xlrec->target_locator.spcOid,
xlrec->target_locator.dbOid, xlrec->target_locator.dbOid,
xlrec->target_locator.relNumber, xlrec->target_locator.relNumber,

View File

@ -100,7 +100,7 @@ btree_desc(StringInfo buf, XLogReaderState *record)
{ {
xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) rec; xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) rec;
appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u:%u", appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT "; latestRemovedXid %u:%u",
xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.spcOid, xlrec->locator.dbOid,
xlrec->locator.relNumber, xlrec->locator.relNumber,
EpochFromFullTransactionId(xlrec->latestRemovedFullXid), EpochFromFullTransactionId(xlrec->latestRemovedFullXid),

View File

@ -25,7 +25,7 @@ seq_desc(StringInfo buf, XLogReaderState *record)
xl_seq_rec *xlrec = (xl_seq_rec *) rec; xl_seq_rec *xlrec = (xl_seq_rec *) rec;
if (info == XLOG_SEQ_LOG) if (info == XLOG_SEQ_LOG)
appendStringInfo(buf, "rel %u/%u/%u", appendStringInfo(buf, "rel %u/%u/" UINT64_FORMAT,
xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.spcOid, xlrec->locator.dbOid,
xlrec->locator.relNumber); xlrec->locator.relNumber);
} }

View File

@ -45,8 +45,8 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
CheckPoint *checkpoint = (CheckPoint *) rec; CheckPoint *checkpoint = (CheckPoint *) rec;
appendStringInfo(buf, "redo %X/%X; " appendStringInfo(buf, "redo %X/%X; "
"tli %u; prev tli %u; fpw %s; xid %u:%u; oid %u; multi %u; offset %u; " "tli %u; prev tli %u; fpw %s; xid %u:%u; relfilenumber " UINT64_FORMAT ";oid %u; "
"oldest xid %u in DB %u; oldest multi %u in DB %u; " "multi %u; offset %u; oldest xid %u in DB %u; oldest multi %u in DB %u; "
"oldest/newest commit timestamp xid: %u/%u; " "oldest/newest commit timestamp xid: %u/%u; "
"oldest running xid %u; %s", "oldest running xid %u; %s",
LSN_FORMAT_ARGS(checkpoint->redo), LSN_FORMAT_ARGS(checkpoint->redo),
@ -55,6 +55,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
checkpoint->fullPageWrites ? "true" : "false", checkpoint->fullPageWrites ? "true" : "false",
EpochFromFullTransactionId(checkpoint->nextXid), EpochFromFullTransactionId(checkpoint->nextXid),
XidFromFullTransactionId(checkpoint->nextXid), XidFromFullTransactionId(checkpoint->nextXid),
checkpoint->nextRelFileNumber,
checkpoint->nextOid, checkpoint->nextOid,
checkpoint->nextMulti, checkpoint->nextMulti,
checkpoint->nextMultiOffset, checkpoint->nextMultiOffset,
@ -74,6 +75,13 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
memcpy(&nextOid, rec, sizeof(Oid)); memcpy(&nextOid, rec, sizeof(Oid));
appendStringInfo(buf, "%u", nextOid); appendStringInfo(buf, "%u", nextOid);
} }
else if (info == XLOG_NEXT_RELFILENUMBER)
{
RelFileNumber nextRelFileNumber;
memcpy(&nextRelFileNumber, rec, sizeof(RelFileNumber));
appendStringInfo(buf, UINT64_FORMAT, nextRelFileNumber);
}
else if (info == XLOG_RESTORE_POINT) else if (info == XLOG_RESTORE_POINT)
{ {
xl_restore_point *xlrec = (xl_restore_point *) rec; xl_restore_point *xlrec = (xl_restore_point *) rec;
@ -169,6 +177,9 @@ xlog_identify(uint8 info)
case XLOG_NEXTOID: case XLOG_NEXTOID:
id = "NEXTOID"; id = "NEXTOID";
break; break;
case XLOG_NEXT_RELFILENUMBER:
id = "NEXT_RELFILENUMBER";
break;
case XLOG_SWITCH: case XLOG_SWITCH:
id = "SWITCH"; id = "SWITCH";
break; break;
@ -237,7 +248,7 @@ XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty,
appendStringInfoChar(buf, ' '); appendStringInfoChar(buf, ' ');
appendStringInfo(buf, appendStringInfo(buf,
"blkref #%d: rel %u/%u/%u fork %s blk %u", "blkref #%d: rel %u/%u/" UINT64_FORMAT " fork %s blk %u",
block_id, block_id,
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber,
forkNames[forknum], forkNames[forknum],
@ -297,7 +308,7 @@ XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty,
if (forknum != MAIN_FORKNUM) if (forknum != MAIN_FORKNUM)
{ {
appendStringInfo(buf, appendStringInfo(buf,
", blkref #%d: rel %u/%u/%u fork %s blk %u", ", blkref #%d: rel %u/%u/" UINT64_FORMAT " fork %s blk %u",
block_id, block_id,
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber,
forkNames[forknum], forkNames[forknum],
@ -306,7 +317,7 @@ XLogRecGetBlockRefInfo(XLogReaderState *record, bool pretty,
else else
{ {
appendStringInfo(buf, appendStringInfo(buf,
", blkref #%d: rel %u/%u/%u blk %u", ", blkref #%d: rel %u/%u/" UINT64_FORMAT " blk %u",
block_id, block_id,
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber,
blk); blk);

View File

@ -692,8 +692,9 @@ by having database restart search for files that don't have any committed
entry in pg_class, but that currently isn't done because of the possibility entry in pg_class, but that currently isn't done because of the possibility
of deleting data that is useful for forensic analysis of the crash. of deleting data that is useful for forensic analysis of the crash.
Orphan files are harmless --- at worst they waste a bit of disk space --- Orphan files are harmless --- at worst they waste a bit of disk space ---
because we check for on-disk collisions when allocating new relfilenumber because the relfilenumber counter is monotonically increasing. The maximum
OIDs. So cleaning up isn't really necessary. value is 2^56-1, and there is no provision for wraparound. Thus, on-disk
collisions aren't possible.
3. Deleting a table, which requires an unlink() that could fail. 3. Deleting a table, which requires an unlink() that could fail.

View File

@ -13,12 +13,16 @@
#include "postgres.h" #include "postgres.h"
#include <unistd.h>
#include "access/clog.h" #include "access/clog.h"
#include "access/commit_ts.h" #include "access/commit_ts.h"
#include "access/subtrans.h" #include "access/subtrans.h"
#include "access/transam.h" #include "access/transam.h"
#include "access/xact.h" #include "access/xact.h"
#include "access/xlogutils.h" #include "access/xlogutils.h"
#include "catalog/pg_class.h"
#include "catalog/pg_tablespace.h"
#include "commands/dbcommands.h" #include "commands/dbcommands.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "postmaster/autovacuum.h" #include "postmaster/autovacuum.h"
@ -30,6 +34,15 @@
/* Number of OIDs to prefetch (preallocate) per XLOG write */ /* Number of OIDs to prefetch (preallocate) per XLOG write */
#define VAR_OID_PREFETCH 8192 #define VAR_OID_PREFETCH 8192
/* Number of RelFileNumbers to be logged per XLOG write */
#define VAR_RELNUMBER_PER_XLOG 512
/*
* Need to log more if remaining logged RelFileNumbers are less than the
* threshold. Valid range could be between 0 to VAR_RELNUMBER_PER_XLOG - 1.
*/
#define VAR_RELNUMBER_NEW_XLOG_THRESHOLD 256
/* pointer to "variable cache" in shared memory (set up by shmem.c) */ /* pointer to "variable cache" in shared memory (set up by shmem.c) */
VariableCache ShmemVariableCache = NULL; VariableCache ShmemVariableCache = NULL;
@ -521,8 +534,7 @@ ForceTransactionIdLimitUpdate(void)
* wide, counter wraparound will occur eventually, and therefore it is unwise * wide, counter wraparound will occur eventually, and therefore it is unwise
* to assume they are unique unless precautions are taken to make them so. * to assume they are unique unless precautions are taken to make them so.
* Hence, this routine should generally not be used directly. The only direct * Hence, this routine should generally not be used directly. The only direct
* callers should be GetNewOidWithIndex() and GetNewRelFileNumber() in * caller should be GetNewOidWithIndex() in catalog/catalog.c.
* catalog/catalog.c.
*/ */
Oid Oid
GetNewObjectId(void) GetNewObjectId(void)
@ -612,6 +624,199 @@ SetNextObjectId(Oid nextOid)
LWLockRelease(OidGenLock); LWLockRelease(OidGenLock);
} }
/*
* GetNewRelFileNumber
*
* Similar to GetNewObjectId but instead of new Oid it generates new
* relfilenumber.
*/
RelFileNumber
GetNewRelFileNumber(Oid reltablespace, char relpersistence)
{
RelFileNumber result;
RelFileNumber nextRelFileNumber,
loggedRelFileNumber,
flushedRelFileNumber;
StaticAssertStmt(VAR_RELNUMBER_NEW_XLOG_THRESHOLD < VAR_RELNUMBER_PER_XLOG,
"VAR_RELNUMBER_NEW_XLOG_THRESHOLD must be smaller than VAR_RELNUMBER_PER_XLOG");
/* safety check, we should never get this far in a HS standby */
if (RecoveryInProgress())
elog(ERROR, "cannot assign RelFileNumber during recovery");
if (IsBinaryUpgrade)
elog(ERROR, "cannot assign RelFileNumber during binary upgrade");
LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE);
nextRelFileNumber = ShmemVariableCache->nextRelFileNumber;
loggedRelFileNumber = ShmemVariableCache->loggedRelFileNumber;
flushedRelFileNumber = ShmemVariableCache->flushedRelFileNumber;
Assert(nextRelFileNumber <= flushedRelFileNumber);
Assert(flushedRelFileNumber <= loggedRelFileNumber);
/* check for the wraparound for the relfilenumber counter */
if (unlikely(nextRelFileNumber > MAX_RELFILENUMBER))
elog(ERROR, "relfilenumber is too large");
/*
* If the remaining logged relfilenumbers values are less than the
* threshold value then log more. Ideally, we can wait until all
* relfilenumbers have been consumed before logging more. Nevertheless, if
* we do that, we must immediately flush the logged wal record because we
* want to ensure that the nextRelFileNumber is always larger than any
* relfilenumber already in use on disk. And, to maintain that invariant,
* we must make sure that the record we log reaches the disk before any new
* files are created with the newly logged range.
*
* So in order to avoid flushing the wal immediately, we always log before
* consuming all the relfilenumber, and now we only have to flush the newly
* logged relfilenumber wal before consuming the relfilenumber from this
* new range. By the time we need to flush this wal, hopefully, those have
* already been flushed with some other XLogFlush operation.
*/
if (loggedRelFileNumber - nextRelFileNumber <=
VAR_RELNUMBER_NEW_XLOG_THRESHOLD)
{
XLogRecPtr recptr;
loggedRelFileNumber = loggedRelFileNumber + VAR_RELNUMBER_PER_XLOG;
recptr = LogNextRelFileNumber(loggedRelFileNumber);
ShmemVariableCache->loggedRelFileNumber = loggedRelFileNumber;
/* remember for the future flush */
ShmemVariableCache->loggedRelFileNumberRecPtr = recptr;
}
/*
* If the nextRelFileNumber is already reached to the already flushed
* relfilenumber then flush the WAL for previously logged relfilenumber.
*/
if (nextRelFileNumber >= flushedRelFileNumber)
{
XLogFlush(ShmemVariableCache->loggedRelFileNumberRecPtr);
ShmemVariableCache->flushedRelFileNumber = loggedRelFileNumber;
}
result = ShmemVariableCache->nextRelFileNumber;
/* we should never be using any relfilenumber outside the flushed range */
Assert(result <= ShmemVariableCache->flushedRelFileNumber);
(ShmemVariableCache->nextRelFileNumber)++;
LWLockRelease(RelFileNumberGenLock);
/*
* Because the RelFileNumber counter only ever increases and never wraps
* around, it should be impossible for the newly-allocated RelFileNumber to
* already be in use. But, if Asserts are enabled, double check that
* there's no main-fork relation file with the new RelFileNumber already on
* disk.
*/
#ifdef USE_ASSERT_CHECKING
{
RelFileLocatorBackend rlocator;
char *rpath;
BackendId backend;
switch (relpersistence)
{
case RELPERSISTENCE_TEMP:
backend = BackendIdForTempRelations();
break;
case RELPERSISTENCE_UNLOGGED:
case RELPERSISTENCE_PERMANENT:
backend = InvalidBackendId;
break;
default:
elog(ERROR, "invalid relpersistence: %c", relpersistence);
}
/* this logic should match RelationInitPhysicalAddr */
rlocator.locator.spcOid =
reltablespace ? reltablespace : MyDatabaseTableSpace;
rlocator.locator.dbOid = (reltablespace == GLOBALTABLESPACE_OID) ?
InvalidOid : MyDatabaseId;
rlocator.locator.relNumber = result;
/*
* The relpath will vary based on the backend ID, so we must
* initialize that properly here to make sure that any collisions
* based on filename are properly detected.
*/
rlocator.backend = backend;
/* check for existing file of same name. */
rpath = relpath(rlocator, MAIN_FORKNUM);
Assert(access(rpath, F_OK) != 0);
}
#endif
return result;
}
/*
* SetNextRelFileNumber
*
* This may only be called during pg_upgrade; it advances the RelFileNumber
* counter to the specified value if the current value is smaller than the
* input value.
*/
void
SetNextRelFileNumber(RelFileNumber relnumber)
{
/* safety check, we should never get this far in a HS standby */
if (RecoveryInProgress())
elog(ERROR, "cannot set RelFileNumber during recovery");
if (!IsBinaryUpgrade)
elog(ERROR, "RelFileNumber can be set only during binary upgrade");
LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE);
/*
* If previous assigned value of the nextRelFileNumber is already higher
* than the current value then nothing to be done. This is possible
* because during upgrade the objects are not created in relfilenumber
* order.
*/
if (relnumber <= ShmemVariableCache->nextRelFileNumber)
{
LWLockRelease(RelFileNumberGenLock);
return;
}
/*
* If the new relfilenumber to be set is greater than or equal to already
* flushed relfilenumber then log more and flush immediately.
*
* (This is less efficient than GetNewRelFileNumber, which arranges to
* log some new relfilenumbers before the old batch is exhausted in the
* hope that a flush will happen in the background before any values are
* needed from the new batch. However, since thais is only used during
* binary upgrade, it shouldn't really matter.)
*/
if (relnumber >= ShmemVariableCache->flushedRelFileNumber)
{
RelFileNumber newlogrelnum;
newlogrelnum = relnumber + VAR_RELNUMBER_PER_XLOG;
XLogFlush(LogNextRelFileNumber(newlogrelnum));
/* we have flushed whatever we have logged so no pending flush */
ShmemVariableCache->loggedRelFileNumber = newlogrelnum;
ShmemVariableCache->flushedRelFileNumber = newlogrelnum;
ShmemVariableCache->loggedRelFileNumberRecPtr = InvalidXLogRecPtr;
}
ShmemVariableCache->nextRelFileNumber = relnumber;
LWLockRelease(RelFileNumberGenLock);
}
/* /*
* StopGeneratingPinnedObjectIds * StopGeneratingPinnedObjectIds
* *

View File

@ -4712,6 +4712,7 @@ BootStrapXLOG(void)
checkPoint.nextXid = checkPoint.nextXid =
FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
checkPoint.nextOid = FirstGenbkiObjectId; checkPoint.nextOid = FirstGenbkiObjectId;
checkPoint.nextRelFileNumber = FirstNormalRelFileNumber;
checkPoint.nextMulti = FirstMultiXactId; checkPoint.nextMulti = FirstMultiXactId;
checkPoint.nextMultiOffset = 0; checkPoint.nextMultiOffset = 0;
checkPoint.oldestXid = FirstNormalTransactionId; checkPoint.oldestXid = FirstNormalTransactionId;
@ -4725,7 +4726,11 @@ BootStrapXLOG(void)
ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber;
ShmemVariableCache->oidCount = 0; ShmemVariableCache->oidCount = 0;
ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber;
ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber;
ShmemVariableCache->loggedRelFileNumberRecPtr = InvalidXLogRecPtr;
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
AdvanceOldestClogXid(checkPoint.oldestXid); AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
@ -5191,7 +5196,10 @@ StartupXLOG(void)
/* initialize shared memory variables from the checkpoint record */ /* initialize shared memory variables from the checkpoint record */
ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber;
ShmemVariableCache->oidCount = 0; ShmemVariableCache->oidCount = 0;
ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber;
ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber;
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
AdvanceOldestClogXid(checkPoint.oldestXid); AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
@ -6663,6 +6671,24 @@ CreateCheckPoint(int flags)
checkPoint.nextOid += ShmemVariableCache->oidCount; checkPoint.nextOid += ShmemVariableCache->oidCount;
LWLockRelease(OidGenLock); LWLockRelease(OidGenLock);
/*
* If this is a shutdown checkpoint then we can safely start allocating
* relfilenumber from the nextRelFileNumber value after the restart because
* no one one else can use the relfilenumber beyond that number before the
* shutdown. OTOH, if it is a normal checkpoint then if there is a crash
* after this point then we might end up reusing the same relfilenumbers
* after the restart so we need to set the nextRelFileNumber to the already
* logged relfilenumber as no one will use number beyond this limit without
* logging again.
*/
LWLockAcquire(RelFileNumberGenLock, LW_SHARED);
if (shutdown)
checkPoint.nextRelFileNumber = ShmemVariableCache->nextRelFileNumber;
else
checkPoint.nextRelFileNumber = ShmemVariableCache->loggedRelFileNumber;
LWLockRelease(RelFileNumberGenLock);
MultiXactGetCheckptMulti(shutdown, MultiXactGetCheckptMulti(shutdown,
&checkPoint.nextMulti, &checkPoint.nextMulti,
&checkPoint.nextMultiOffset, &checkPoint.nextMultiOffset,
@ -7540,6 +7566,24 @@ XLogPutNextOid(Oid nextOid)
*/ */
} }
/*
* Similar to the XLogPutNextOid but instead of writing NEXTOID log record it
* writes a NEXT_RELFILENUMBER log record. It also returns the XLogRecPtr of
* the currently logged relfilenumber record, so that the caller can flush it
* at the appropriate time.
*/
XLogRecPtr
LogNextRelFileNumber(RelFileNumber nextrelnumber)
{
XLogRecPtr recptr;
XLogBeginInsert();
XLogRegisterData((char *) (&nextrelnumber), sizeof(RelFileNumber));
recptr = XLogInsert(RM_XLOG_ID, XLOG_NEXT_RELFILENUMBER);
return recptr;
}
/* /*
* Write an XLOG SWITCH record. * Write an XLOG SWITCH record.
* *
@ -7755,6 +7799,17 @@ xlog_redo(XLogReaderState *record)
ShmemVariableCache->oidCount = 0; ShmemVariableCache->oidCount = 0;
LWLockRelease(OidGenLock); LWLockRelease(OidGenLock);
} }
if (info == XLOG_NEXT_RELFILENUMBER)
{
RelFileNumber nextRelFileNumber;
memcpy(&nextRelFileNumber, XLogRecGetData(record), sizeof(RelFileNumber));
LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE);
ShmemVariableCache->nextRelFileNumber = nextRelFileNumber;
ShmemVariableCache->loggedRelFileNumber = nextRelFileNumber;
ShmemVariableCache->flushedRelFileNumber = nextRelFileNumber;
LWLockRelease(RelFileNumberGenLock);
}
else if (info == XLOG_CHECKPOINT_SHUTDOWN) else if (info == XLOG_CHECKPOINT_SHUTDOWN)
{ {
CheckPoint checkPoint; CheckPoint checkPoint;
@ -7769,6 +7824,11 @@ xlog_redo(XLogReaderState *record)
ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0; ShmemVariableCache->oidCount = 0;
LWLockRelease(OidGenLock); LWLockRelease(OidGenLock);
LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE);
ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber;
ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber;
ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber;
LWLockRelease(RelFileNumberGenLock);
MultiXactSetNextMXact(checkPoint.nextMulti, MultiXactSetNextMXact(checkPoint.nextMulti,
checkPoint.nextMultiOffset); checkPoint.nextMultiOffset);

View File

@ -613,7 +613,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
#ifdef XLOGPREFETCHER_DEBUG_LEVEL #ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL, elog(XLOGPREFETCHER_DEBUG_LEVEL,
"suppressing prefetch in relation %u/%u/%u until %X/%X is replayed, which creates the relation", "suppressing prefetch in relation %u/%u/" UINT64_FORMAT " until %X/%X is replayed, which creates the relation",
xlrec->rlocator.spcOid, xlrec->rlocator.spcOid,
xlrec->rlocator.dbOid, xlrec->rlocator.dbOid,
xlrec->rlocator.relNumber, xlrec->rlocator.relNumber,
@ -636,7 +636,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
#ifdef XLOGPREFETCHER_DEBUG_LEVEL #ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL, elog(XLOGPREFETCHER_DEBUG_LEVEL,
"suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, which truncates the relation", "suppressing prefetch in relation %u/%u/" UINT64_FORMAT " from block %u until %X/%X is replayed, which truncates the relation",
xlrec->rlocator.spcOid, xlrec->rlocator.spcOid,
xlrec->rlocator.dbOid, xlrec->rlocator.dbOid,
xlrec->rlocator.relNumber, xlrec->rlocator.relNumber,
@ -735,7 +735,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
{ {
#ifdef XLOGPREFETCHER_DEBUG_LEVEL #ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL, elog(XLOGPREFETCHER_DEBUG_LEVEL,
"suppressing all prefetch in relation %u/%u/%u until %X/%X is replayed, because the relation does not exist on disk", "suppressing all prefetch in relation %u/%u/" UINT64_FORMAT " until %X/%X is replayed, because the relation does not exist on disk",
reln->smgr_rlocator.locator.spcOid, reln->smgr_rlocator.locator.spcOid,
reln->smgr_rlocator.locator.dbOid, reln->smgr_rlocator.locator.dbOid,
reln->smgr_rlocator.locator.relNumber, reln->smgr_rlocator.locator.relNumber,
@ -756,7 +756,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
{ {
#ifdef XLOGPREFETCHER_DEBUG_LEVEL #ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL, elog(XLOGPREFETCHER_DEBUG_LEVEL,
"suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, because the relation is too small", "suppressing prefetch in relation %u/%u/" UINT64_FORMAT " from block %u until %X/%X is replayed, because the relation is too small",
reln->smgr_rlocator.locator.spcOid, reln->smgr_rlocator.locator.spcOid,
reln->smgr_rlocator.locator.dbOid, reln->smgr_rlocator.locator.dbOid,
reln->smgr_rlocator.locator.relNumber, reln->smgr_rlocator.locator.relNumber,
@ -795,7 +795,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
* truncated beneath our feet? * truncated beneath our feet?
*/ */
elog(ERROR, elog(ERROR,
"could not prefetch relation %u/%u/%u block %u", "could not prefetch relation %u/%u/" UINT64_FORMAT " block %u",
reln->smgr_rlocator.locator.spcOid, reln->smgr_rlocator.locator.spcOid,
reln->smgr_rlocator.locator.dbOid, reln->smgr_rlocator.locator.dbOid,
reln->smgr_rlocator.locator.relNumber, reln->smgr_rlocator.locator.relNumber,
@ -934,7 +934,7 @@ XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator,
{ {
#ifdef XLOGPREFETCHER_DEBUG_LEVEL #ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL, elog(XLOGPREFETCHER_DEBUG_LEVEL,
"prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (blocks >= %u filtered)", "prefetch of %u/%u/" UINT64_FORMAT " block %u suppressed; filtering until LSN %X/%X is replayed (blocks >= %u filtered)",
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno,
LSN_FORMAT_ARGS(filter->filter_until_replayed), LSN_FORMAT_ARGS(filter->filter_until_replayed),
filter->filter_from_block); filter->filter_from_block);
@ -950,7 +950,7 @@ XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator,
{ {
#ifdef XLOGPREFETCHER_DEBUG_LEVEL #ifdef XLOGPREFETCHER_DEBUG_LEVEL
elog(XLOGPREFETCHER_DEBUG_LEVEL, elog(XLOGPREFETCHER_DEBUG_LEVEL,
"prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (whole database)", "prefetch of %u/%u/" UINT64_FORMAT " block %u suppressed; filtering until LSN %X/%X is replayed (whole database)",
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno,
LSN_FORMAT_ARGS(filter->filter_until_replayed)); LSN_FORMAT_ARGS(filter->filter_until_replayed));
#endif #endif

View File

@ -2228,14 +2228,14 @@ xlog_block_info(StringInfo buf, XLogReaderState *record)
continue; continue;
if (forknum != MAIN_FORKNUM) if (forknum != MAIN_FORKNUM)
appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, fork %u, blk %u", appendStringInfo(buf, "; blkref #%d: rel %u/%u/" UINT64_FORMAT ", fork %u, blk %u",
block_id, block_id,
rlocator.spcOid, rlocator.dbOid, rlocator.spcOid, rlocator.dbOid,
rlocator.relNumber, rlocator.relNumber,
forknum, forknum,
blk); blk);
else else
appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, blk %u", appendStringInfo(buf, "; blkref #%d: rel %u/%u/" UINT64_FORMAT ", blk %u",
block_id, block_id,
rlocator.spcOid, rlocator.dbOid, rlocator.spcOid, rlocator.dbOid,
rlocator.relNumber, rlocator.relNumber,
@ -2433,7 +2433,7 @@ verifyBackupPageConsistency(XLogReaderState *record)
if (memcmp(replay_image_masked, primary_image_masked, BLCKSZ) != 0) if (memcmp(replay_image_masked, primary_image_masked, BLCKSZ) != 0)
{ {
elog(FATAL, elog(FATAL,
"inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u", "inconsistent page found, rel %u/%u/" UINT64_FORMAT ", forknum %u, blkno %u",
rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber,
forknum, blkno); forknum, blkno);
} }

View File

@ -619,17 +619,17 @@ CreateFakeRelcacheEntry(RelFileLocator rlocator)
rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT; rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
/* We don't know the name of the relation; use relfilenumber instead */ /* We don't know the name of the relation; use relfilenumber instead */
sprintf(RelationGetRelationName(rel), "%u", rlocator.relNumber); sprintf(RelationGetRelationName(rel), UINT64_FORMAT, rlocator.relNumber);
/* /*
* We set up the lockRelId in case anything tries to lock the dummy * We set up the lockRelId in case anything tries to lock the dummy
* relation. Note that this is fairly bogus since relNumber may be * relation. Note that this is fairly bogus since relNumber are completely
* different from the relation's OID. It shouldn't really matter though. * different from the relation's OID. It shouldn't really matter though.
* In recovery, we are running by ourselves and can't have any lock * In recovery, we are running by ourselves and can't have any lock
* conflicts. While syncing, we already hold AccessExclusiveLock. * conflicts. While syncing, we already hold AccessExclusiveLock.
*/ */
rel->rd_lockInfo.lockRelId.dbId = rlocator.dbOid; rel->rd_lockInfo.lockRelId.dbId = rlocator.dbOid;
rel->rd_lockInfo.lockRelId.relId = rlocator.relNumber; rel->rd_lockInfo.lockRelId.relId = (Oid) rlocator.relNumber;
rel->rd_smgr = NULL; rel->rd_smgr = NULL;

View File

@ -1246,7 +1246,7 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
if (relForkNum != INIT_FORKNUM) if (relForkNum != INIT_FORKNUM)
{ {
char initForkFile[MAXPGPATH]; char initForkFile[MAXPGPATH];
char relNumber[OIDCHARS + 1]; char relNumber[RELNUMBERCHARS + 1];
/* /*
* If any other type of fork, check if there is an init fork * If any other type of fork, check if there is an init fork

View File

@ -482,101 +482,6 @@ GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn)
return newOid; return newOid;
} }
/*
* GetNewRelFileNumber
* Generate a new relfilenumber that is unique within the
* database of the given tablespace.
*
* If the relfilenumber will also be used as the relation's OID, pass the
* opened pg_class catalog, and this routine will guarantee that the result
* is also an unused OID within pg_class. If the result is to be used only
* as a relfilenumber for an existing relation, pass NULL for pg_class.
*
* As with GetNewOidWithIndex(), there is some theoretical risk of a race
* condition, but it doesn't seem worth worrying about.
*
* Note: we don't support using this in bootstrap mode. All relations
* created by bootstrap have preassigned OIDs, so there's no need.
*/
RelFileNumber
GetNewRelFileNumber(Oid reltablespace, Relation pg_class, char relpersistence)
{
RelFileLocatorBackend rlocator;
char *rpath;
bool collides;
BackendId backend;
/*
* If we ever get here during pg_upgrade, there's something wrong; all
* relfilenumber assignments during a binary-upgrade run should be
* determined by commands in the dump script.
*/
Assert(!IsBinaryUpgrade);
switch (relpersistence)
{
case RELPERSISTENCE_TEMP:
backend = BackendIdForTempRelations();
break;
case RELPERSISTENCE_UNLOGGED:
case RELPERSISTENCE_PERMANENT:
backend = InvalidBackendId;
break;
default:
elog(ERROR, "invalid relpersistence: %c", relpersistence);
return InvalidRelFileNumber; /* placate compiler */
}
/* This logic should match RelationInitPhysicalAddr */
rlocator.locator.spcOid = reltablespace ? reltablespace : MyDatabaseTableSpace;
rlocator.locator.dbOid =
(rlocator.locator.spcOid == GLOBALTABLESPACE_OID) ?
InvalidOid : MyDatabaseId;
/*
* The relpath will vary based on the backend ID, so we must initialize
* that properly here to make sure that any collisions based on filename
* are properly detected.
*/
rlocator.backend = backend;
do
{
CHECK_FOR_INTERRUPTS();
/* Generate the OID */
if (pg_class)
rlocator.locator.relNumber = GetNewOidWithIndex(pg_class, ClassOidIndexId,
Anum_pg_class_oid);
else
rlocator.locator.relNumber = GetNewObjectId();
/* Check for existing file of same name */
rpath = relpath(rlocator, MAIN_FORKNUM);
if (access(rpath, F_OK) == 0)
{
/* definite collision */
collides = true;
}
else
{
/*
* Here we have a little bit of a dilemma: if errno is something
* other than ENOENT, should we declare a collision and loop? In
* practice it seems best to go ahead regardless of the errno. If
* there is a colliding file we will get an smgr failure when we
* attempt to create the new relation file.
*/
collides = false;
}
pfree(rpath);
} while (collides);
return rlocator.locator.relNumber;
}
/* /*
* SQL callable interface for GetNewOidWithIndex(). Outside of initdb's * SQL callable interface for GetNewOidWithIndex(). Outside of initdb's
* direct insertions into catalog tables, and recovering from corruption, this * direct insertions into catalog tables, and recovering from corruption, this

View File

@ -341,11 +341,19 @@ heap_create(const char *relname,
else else
{ {
/* /*
* If relfilenumber is unspecified by the caller then create storage * If relfilenumber is unspecified by the caller then allocate a new
* with oid same as relid. * one, except for system tables, for which we make the initial
* relfilenumber the same as the table OID. See the comments for
* FirstNormalRelFileNumber for an explanation of why we do this.
*/ */
if (!RelFileNumberIsValid(relfilenumber)) if (!RelFileNumberIsValid(relfilenumber))
relfilenumber = relid; {
if (relid < FirstNormalObjectId)
relfilenumber = relid;
else
relfilenumber = GetNewRelFileNumber(reltablespace,
relpersistence);
}
} }
/* /*
@ -901,7 +909,7 @@ InsertPgClassTuple(Relation pg_class_desc,
values[Anum_pg_class_reloftype - 1] = ObjectIdGetDatum(rd_rel->reloftype); values[Anum_pg_class_reloftype - 1] = ObjectIdGetDatum(rd_rel->reloftype);
values[Anum_pg_class_relowner - 1] = ObjectIdGetDatum(rd_rel->relowner); values[Anum_pg_class_relowner - 1] = ObjectIdGetDatum(rd_rel->relowner);
values[Anum_pg_class_relam - 1] = ObjectIdGetDatum(rd_rel->relam); values[Anum_pg_class_relam - 1] = ObjectIdGetDatum(rd_rel->relam);
values[Anum_pg_class_relfilenode - 1] = ObjectIdGetDatum(rd_rel->relfilenode); values[Anum_pg_class_relfilenode - 1] = Int64GetDatum(rd_rel->relfilenode);
values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace); values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace);
values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages); values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages);
values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples); values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples);
@ -1173,12 +1181,7 @@ heap_create_with_catalog(const char *relname,
if (shared_relation && reltablespace != GLOBALTABLESPACE_OID) if (shared_relation && reltablespace != GLOBALTABLESPACE_OID)
elog(ERROR, "shared relations must be placed in pg_global tablespace"); elog(ERROR, "shared relations must be placed in pg_global tablespace");
/* /* Allocate an OID for the relation, unless we were told what to use. */
* Allocate an OID for the relation, unless we were told what to use.
*
* The OID will be the relfilenumber as well, so make sure it doesn't
* collide with either pg_class OIDs or existing physical files.
*/
if (!OidIsValid(relid)) if (!OidIsValid(relid))
{ {
/* Use binary-upgrade override for pg_class.oid and relfilenumber */ /* Use binary-upgrade override for pg_class.oid and relfilenumber */
@ -1232,8 +1235,8 @@ heap_create_with_catalog(const char *relname,
} }
if (!OidIsValid(relid)) if (!OidIsValid(relid))
relid = GetNewRelFileNumber(reltablespace, pg_class_desc, relid = GetNewOidWithIndex(pg_class_desc, ClassOidIndexId,
relpersistence); Anum_pg_class_oid);
} }
/* /*

View File

@ -898,12 +898,7 @@ index_create(Relation heapRelation,
collationObjectId, collationObjectId,
classObjectId); classObjectId);
/* /* Allocate an OID for the index, unless we were told what to use. */
* Allocate an OID for the index, unless we were told what to use.
*
* The OID will be the relfilenumber as well, so make sure it doesn't
* collide with either pg_class OIDs or existing physical files.
*/
if (!OidIsValid(indexRelationId)) if (!OidIsValid(indexRelationId))
{ {
/* Use binary-upgrade override for pg_class.oid and relfilenumber */ /* Use binary-upgrade override for pg_class.oid and relfilenumber */
@ -935,8 +930,8 @@ index_create(Relation heapRelation,
} }
else else
{ {
indexRelationId = indexRelationId = GetNewOidWithIndex(pg_class, ClassOidIndexId,
GetNewRelFileNumber(tableSpaceId, pg_class, relpersistence); Anum_pg_class_oid);
} }
} }

View File

@ -968,6 +968,10 @@ smgr_redo(XLogReaderState *record)
xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record); xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
SMgrRelation reln; SMgrRelation reln;
if (xlrec->rlocator.relNumber > ShmemVariableCache->nextRelFileNumber)
elog(ERROR, "unexpected relnumber " UINT64_FORMAT " that is bigger than nextRelFileNumber " UINT64_FORMAT,
xlrec->rlocator.relNumber, ShmemVariableCache->nextRelFileNumber);
reln = smgropen(xlrec->rlocator, InvalidBackendId); reln = smgropen(xlrec->rlocator, InvalidBackendId);
smgrcreate(reln, xlrec->forkNum, true); smgrcreate(reln, xlrec->forkNum, true);
} }
@ -981,6 +985,10 @@ smgr_redo(XLogReaderState *record)
int nforks = 0; int nforks = 0;
bool need_fsm_vacuum = false; bool need_fsm_vacuum = false;
if (xlrec->rlocator.relNumber > ShmemVariableCache->nextRelFileNumber)
elog(ERROR, "unexpected relnumber " UINT64_FORMAT "that is bigger than nextRelFileNumber " UINT64_FORMAT,
xlrec->rlocator.relNumber, ShmemVariableCache->nextRelFileNumber);
reln = smgropen(xlrec->rlocator, InvalidBackendId); reln = smgropen(xlrec->rlocator, InvalidBackendId);
/* /*

View File

@ -14375,10 +14375,14 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
} }
/* /*
* Relfilenumbers are not unique in databases across tablespaces, so we * Generate a new relfilenumber. We cannot reuse the old relfilenumber
* need to allocate a new one in the new tablespace. * because of the possibility that that relation will be moved back to the
* original tablespace before the next checkpoint. At that point, the
* first segment of the main fork won't have been unlinked yet, and an
* attempt to create new relation storage with that same relfilenumber
* will fail.
*/ */
newrelfilenumber = GetNewRelFileNumber(newTableSpace, NULL, newrelfilenumber = GetNewRelFileNumber(newTableSpace,
rel->rd_rel->relpersistence); rel->rd_rel->relpersistence);
/* Open old and new relation */ /* Open old and new relation */

View File

@ -267,7 +267,7 @@ CreateTableSpace(CreateTableSpaceStmt *stmt)
* parts. * parts.
*/ */
if (strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + if (strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 +
OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS > MAXPGPATH) OIDCHARS + 1 + RELNUMBERCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS > MAXPGPATH)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("tablespace location \"%s\" is too long", errmsg("tablespace location \"%s\" is too long",

View File

@ -961,12 +961,12 @@ _read${n}(void)
print $off "\tWRITE_UINT_FIELD($f);\n"; print $off "\tWRITE_UINT_FIELD($f);\n";
print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read; print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read;
} }
elsif ($t eq 'uint64') elsif ($t eq 'uint64' || $t eq 'RelFileNumber')
{ {
print $off "\tWRITE_UINT64_FIELD($f);\n"; print $off "\tWRITE_UINT64_FIELD($f);\n";
print $rff "\tREAD_UINT64_FIELD($f);\n" unless $no_read; print $rff "\tREAD_UINT64_FIELD($f);\n" unless $no_read;
} }
elsif ($t eq 'Oid' || $t eq 'RelFileNumber') elsif ($t eq 'Oid')
{ {
print $off "\tWRITE_OID_FIELD($f);\n"; print $off "\tWRITE_OID_FIELD($f);\n";
print $rff "\tREAD_OID_FIELD($f);\n" unless $no_read; print $rff "\tREAD_OID_FIELD($f);\n" unless $no_read;

View File

@ -154,6 +154,7 @@ xlog_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
break; break;
case XLOG_NOOP: case XLOG_NOOP:
case XLOG_NEXTOID: case XLOG_NEXTOID:
case XLOG_NEXT_RELFILENUMBER:
case XLOG_SWITCH: case XLOG_SWITCH:
case XLOG_BACKUP_END: case XLOG_BACKUP_END:
case XLOG_PARAMETER_CHANGE: case XLOG_PARAMETER_CHANGE:

View File

@ -4932,7 +4932,7 @@ DisplayMapping(HTAB *tuplecid_data)
hash_seq_init(&hstat, tuplecid_data); hash_seq_init(&hstat, tuplecid_data);
while ((ent = (ReorderBufferTupleCidEnt *) hash_seq_search(&hstat)) != NULL) while ((ent = (ReorderBufferTupleCidEnt *) hash_seq_search(&hstat)) != NULL)
{ {
elog(DEBUG3, "mapping: node: %u/%u/%u tid: %u/%u cmin: %u, cmax: %u", elog(DEBUG3, "mapping: node: %u/%u/" UINT64_FORMAT " tid: %u/%u cmin: %u, cmax: %u",
ent->key.rlocator.dbOid, ent->key.rlocator.dbOid,
ent->key.rlocator.spcOid, ent->key.rlocator.spcOid,
ent->key.rlocator.relNumber, ent->key.rlocator.relNumber,

View File

@ -31,7 +31,7 @@ static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
typedef struct typedef struct
{ {
Oid reloid; /* hash key */ RelFileNumber relnumber; /* hash key */
} unlogged_relation_entry; } unlogged_relation_entry;
/* /*
@ -184,10 +184,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
* need to be reset. Otherwise, this cleanup operation would be * need to be reset. Otherwise, this cleanup operation would be
* O(n^2). * O(n^2).
*/ */
ctl.keysize = sizeof(Oid); ctl.keysize = sizeof(RelFileNumber);
ctl.entrysize = sizeof(unlogged_relation_entry); ctl.entrysize = sizeof(unlogged_relation_entry);
ctl.hcxt = CurrentMemoryContext; ctl.hcxt = CurrentMemoryContext;
hash = hash_create("unlogged relation OIDs", 32, &ctl, hash = hash_create("unlogged relation RelFileNumbers", 32, &ctl,
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
/* Scan the directory. */ /* Scan the directory. */
@ -208,10 +208,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
continue; continue;
/* /*
* Put the OID portion of the name into the hash table, if it * Put the RELFILENUMBER portion of the name into the hash table,
* isn't already. * if it isn't already.
*/ */
ent.reloid = atooid(de->d_name); ent.relnumber = atorelnumber(de->d_name);
(void) hash_search(hash, &ent, HASH_ENTER, NULL); (void) hash_search(hash, &ent, HASH_ENTER, NULL);
} }
@ -248,10 +248,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
continue; continue;
/* /*
* See whether the OID portion of the name shows up in the hash * See whether the RELFILENUMBER portion of the name shows up in
* table. If so, nuke it! * the hash table. If so, nuke it!
*/ */
ent.reloid = atooid(de->d_name); ent.relnumber = atorelnumber(de->d_name);
if (hash_search(hash, &ent, HASH_FIND, NULL)) if (hash_search(hash, &ent, HASH_FIND, NULL))
{ {
snprintf(rm_path, sizeof(rm_path), "%s/%s", snprintf(rm_path, sizeof(rm_path), "%s/%s",
@ -286,7 +286,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
{ {
ForkNumber forkNum; ForkNumber forkNum;
int relnumchars; int relnumchars;
char relnumbuf[OIDCHARS + 1]; char relnumbuf[RELNUMBERCHARS + 1];
char srcpath[MAXPGPATH * 2]; char srcpath[MAXPGPATH * 2];
char dstpath[MAXPGPATH]; char dstpath[MAXPGPATH];
@ -329,7 +329,7 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
{ {
ForkNumber forkNum; ForkNumber forkNum;
int relnumchars; int relnumchars;
char relnumbuf[OIDCHARS + 1]; char relnumbuf[RELNUMBERCHARS + 1];
char mainpath[MAXPGPATH]; char mainpath[MAXPGPATH];
/* Skip anything that doesn't look like a relation data file. */ /* Skip anything that doesn't look like a relation data file. */
@ -372,8 +372,8 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
* for a non-temporary relation and false otherwise. * for a non-temporary relation and false otherwise.
* *
* NB: If this function returns true, the caller is entitled to assume that * NB: If this function returns true, the caller is entitled to assume that
* *relnumchars has been set to a value no more than OIDCHARS, and thus * *relnumchars has been set to a value no more than RELNUMBERCHARS, and thus
* that a buffer of OIDCHARS+1 characters is sufficient to hold the * that a buffer of RELNUMBERCHARS+1 characters is sufficient to hold the
* RelFileNumber portion of the filename. This is critical to protect against * RelFileNumber portion of the filename. This is critical to protect against
* a possible buffer overrun. * a possible buffer overrun.
*/ */
@ -386,7 +386,7 @@ parse_filename_for_nontemp_relation(const char *name, int *relnumchars,
/* Look for a non-empty string of digits (that isn't too long). */ /* Look for a non-empty string of digits (that isn't too long). */
for (pos = 0; isdigit((unsigned char) name[pos]); ++pos) for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
; ;
if (pos == 0 || pos > OIDCHARS) if (pos == 0 || pos > RELNUMBERCHARS)
return false; return false;
*relnumchars = pos; *relnumchars = pos;

View File

@ -273,7 +273,7 @@ restart:
BlockNumber blknum; BlockNumber blknum;
BufferGetTag(buf, &rlocator, &forknum, &blknum); BufferGetTag(buf, &rlocator, &forknum, &blknum);
elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u", elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/" UINT64_FORMAT,
blknum, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber); blknum, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber);
/* make sure we hold an exclusive lock */ /* make sure we hold an exclusive lock */

View File

@ -53,3 +53,4 @@ XactTruncationLock 44
# 45 was XactTruncationLock until removal of BackendRandomLock # 45 was XactTruncationLock until removal of BackendRandomLock
WrapLimitsVacuumLock 46 WrapLimitsVacuumLock 46
NotifyQueueTailLock 47 NotifyQueueTailLock 47
RelFileNumberGenLock 48

View File

@ -257,6 +257,13 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
* next checkpoint, we prevent reassignment of the relfilenumber until it's * next checkpoint, we prevent reassignment of the relfilenumber until it's
* safe, because relfilenumber assignment skips over any existing file. * safe, because relfilenumber assignment skips over any existing file.
* *
* XXX. Although all of this was true when relfilenumbers were 32 bits wide,
* they are now 56 bits wide and do not wrap around, so in the future we can
* change the code to immediately unlink the first segment of the relation
* along with all the others. We still do reuse relfilenumbers when createdb()
* is performed using the file-copy method or during movedb(), but the scenario
* described above can only happen when creating a new relation.
*
* We do not need to go through this dance for temp relations, though, because * We do not need to go through this dance for temp relations, though, because
* we never make WAL entries for temp rels, and so a temp rel poses no threat * we never make WAL entries for temp rels, and so a temp rel poses no threat
* to the health of a regular rel that has taken over its relfilenumber. * to the health of a regular rel that has taken over its relfilenumber.

View File

@ -154,7 +154,7 @@ smgropen(RelFileLocator rlocator, BackendId backend)
/* First time through: initialize the hash table */ /* First time through: initialize the hash table */
HASHCTL ctl; HASHCTL ctl;
ctl.keysize = sizeof(RelFileLocatorBackend); ctl.keysize = SizeOfRelFileLocatorBackend;
ctl.entrysize = sizeof(SMgrRelationData); ctl.entrysize = sizeof(SMgrRelationData);
SMgrRelationHash = hash_create("smgr relation table", 400, SMgrRelationHash = hash_create("smgr relation table", 400,
&ctl, HASH_ELEM | HASH_BLOBS); &ctl, HASH_ELEM | HASH_BLOBS);

View File

@ -878,7 +878,7 @@ pg_relation_filenode(PG_FUNCTION_ARGS)
if (!RelFileNumberIsValid(result)) if (!RelFileNumberIsValid(result))
PG_RETURN_NULL(); PG_RETURN_NULL();
PG_RETURN_OID(result); PG_RETURN_INT64(result);
} }
/* /*
@ -898,9 +898,12 @@ Datum
pg_filenode_relation(PG_FUNCTION_ARGS) pg_filenode_relation(PG_FUNCTION_ARGS)
{ {
Oid reltablespace = PG_GETARG_OID(0); Oid reltablespace = PG_GETARG_OID(0);
RelFileNumber relfilenumber = PG_GETARG_OID(1); RelFileNumber relfilenumber = PG_GETARG_INT64(1);
Oid heaprel; Oid heaprel;
/* check whether the relfilenumber is within a valid range */
CHECK_RELFILENUMBER_RANGE(relfilenumber);
/* test needed so RelidByRelfilenumber doesn't misbehave */ /* test needed so RelidByRelfilenumber doesn't misbehave */
if (!RelFileNumberIsValid(relfilenumber)) if (!RelFileNumberIsValid(relfilenumber))
PG_RETURN_NULL(); PG_RETURN_NULL();

View File

@ -17,6 +17,7 @@
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "commands/extension.h" #include "commands/extension.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "storage/relfilelocator.h"
#include "utils/array.h" #include "utils/array.h"
#include "utils/builtins.h" #include "utils/builtins.h"
@ -98,10 +99,12 @@ binary_upgrade_set_next_heap_pg_class_oid(PG_FUNCTION_ARGS)
Datum Datum
binary_upgrade_set_next_heap_relfilenode(PG_FUNCTION_ARGS) binary_upgrade_set_next_heap_relfilenode(PG_FUNCTION_ARGS)
{ {
RelFileNumber relfilenumber = PG_GETARG_OID(0); RelFileNumber relfilenumber = PG_GETARG_INT64(0);
CHECK_IS_BINARY_UPGRADE; CHECK_IS_BINARY_UPGRADE;
CHECK_RELFILENUMBER_RANGE(relfilenumber);
binary_upgrade_next_heap_pg_class_relfilenumber = relfilenumber; binary_upgrade_next_heap_pg_class_relfilenumber = relfilenumber;
SetNextRelFileNumber(relfilenumber + 1);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
@ -120,10 +123,12 @@ binary_upgrade_set_next_index_pg_class_oid(PG_FUNCTION_ARGS)
Datum Datum
binary_upgrade_set_next_index_relfilenode(PG_FUNCTION_ARGS) binary_upgrade_set_next_index_relfilenode(PG_FUNCTION_ARGS)
{ {
RelFileNumber relfilenumber = PG_GETARG_OID(0); RelFileNumber relfilenumber = PG_GETARG_INT64(0);
CHECK_IS_BINARY_UPGRADE; CHECK_IS_BINARY_UPGRADE;
CHECK_RELFILENUMBER_RANGE(relfilenumber);
binary_upgrade_next_index_pg_class_relfilenumber = relfilenumber; binary_upgrade_next_index_pg_class_relfilenumber = relfilenumber;
SetNextRelFileNumber(relfilenumber + 1);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }
@ -142,10 +147,12 @@ binary_upgrade_set_next_toast_pg_class_oid(PG_FUNCTION_ARGS)
Datum Datum
binary_upgrade_set_next_toast_relfilenode(PG_FUNCTION_ARGS) binary_upgrade_set_next_toast_relfilenode(PG_FUNCTION_ARGS)
{ {
RelFileNumber relfilenumber = PG_GETARG_OID(0); RelFileNumber relfilenumber = PG_GETARG_INT64(0);
CHECK_IS_BINARY_UPGRADE; CHECK_IS_BINARY_UPGRADE;
CHECK_RELFILENUMBER_RANGE(relfilenumber);
binary_upgrade_next_toast_pg_class_relfilenumber = relfilenumber; binary_upgrade_next_toast_pg_class_relfilenumber = relfilenumber;
SetNextRelFileNumber(relfilenumber + 1);
PG_RETURN_VOID(); PG_RETURN_VOID();
} }

View File

@ -3712,7 +3712,7 @@ RelationSetNewRelfilenumber(Relation relation, char persistence)
{ {
/* Allocate a new relfilenumber */ /* Allocate a new relfilenumber */
newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace, newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace,
NULL, persistence); persistence);
} }
else if (relation->rd_rel->relkind == RELKIND_INDEX) else if (relation->rd_rel->relkind == RELKIND_INDEX)
{ {

View File

@ -196,7 +196,7 @@ RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber)
/* set scan arguments */ /* set scan arguments */
skey[0].sk_argument = ObjectIdGetDatum(reltablespace); skey[0].sk_argument = ObjectIdGetDatum(reltablespace);
skey[1].sk_argument = ObjectIdGetDatum(relfilenumber); skey[1].sk_argument = Int64GetDatum((int64) relfilenumber);
scandesc = systable_beginscan(relation, scandesc = systable_beginscan(relation,
ClassTblspcRelfilenodeIndexId, ClassTblspcRelfilenodeIndexId,
@ -213,7 +213,7 @@ RelidByRelfilenumber(Oid reltablespace, RelFileNumber relfilenumber)
if (found) if (found)
elog(ERROR, elog(ERROR,
"unexpected duplicate for tablespace %u, relfilenumber %u", "unexpected duplicate for tablespace %u, relfilenumber " UINT64_FORMAT,
reltablespace, relfilenumber); reltablespace, relfilenumber);
found = true; found = true;

View File

@ -79,8 +79,8 @@ pg_control_system(PG_FUNCTION_ARGS)
Datum Datum
pg_control_checkpoint(PG_FUNCTION_ARGS) pg_control_checkpoint(PG_FUNCTION_ARGS)
{ {
Datum values[18]; Datum values[19];
bool nulls[18]; bool nulls[19];
TupleDesc tupdesc; TupleDesc tupdesc;
HeapTuple htup; HeapTuple htup;
ControlFileData *ControlFile; ControlFileData *ControlFile;
@ -129,6 +129,8 @@ pg_control_checkpoint(PG_FUNCTION_ARGS)
XIDOID, -1, 0); XIDOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 18, "checkpoint_time", TupleDescInitEntry(tupdesc, (AttrNumber) 18, "checkpoint_time",
TIMESTAMPTZOID, -1, 0); TIMESTAMPTZOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 19, "next_relfilenumber",
INT8OID, -1, 0);
tupdesc = BlessTupleDesc(tupdesc); tupdesc = BlessTupleDesc(tupdesc);
/* Read the control file. */ /* Read the control file. */
@ -202,6 +204,9 @@ pg_control_checkpoint(PG_FUNCTION_ARGS)
values[17] = TimestampTzGetDatum(time_t_to_timestamptz(ControlFile->checkPointCopy.time)); values[17] = TimestampTzGetDatum(time_t_to_timestamptz(ControlFile->checkPointCopy.time));
nulls[17] = false; nulls[17] = false;
values[18] = Int64GetDatum((int64) ControlFile->checkPointCopy.nextRelFileNumber);
nulls[18] = false;
htup = heap_form_tuple(tupdesc, values, nulls); htup = heap_form_tuple(tupdesc, values, nulls);
PG_RETURN_DATUM(HeapTupleGetDatum(htup)); PG_RETURN_DATUM(HeapTupleGetDatum(htup));

View File

@ -485,9 +485,7 @@ main(int argc, char *argv[])
mode = PG_MODE_ENABLE; mode = PG_MODE_ENABLE;
break; break;
case 'f': case 'f':
if (!option_parse_int(optarg, "-f/--filenode", 0, if (!option_parse_relfilenumber(optarg, "-f/--filenode"))
INT_MAX,
NULL))
exit(1); exit(1);
only_filenode = pstrdup(optarg); only_filenode = pstrdup(optarg);
break; break;

View File

@ -250,6 +250,8 @@ main(int argc, char *argv[])
printf(_("Latest checkpoint's NextXID: %u:%u\n"), printf(_("Latest checkpoint's NextXID: %u:%u\n"),
EpochFromFullTransactionId(ControlFile->checkPointCopy.nextXid), EpochFromFullTransactionId(ControlFile->checkPointCopy.nextXid),
XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid)); XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid));
printf(_("Latest checkpoint's NextRelFileNumber:%llu\n"),
(unsigned long long) ControlFile->checkPointCopy.nextRelFileNumber);
printf(_("Latest checkpoint's NextOID: %u\n"), printf(_("Latest checkpoint's NextOID: %u\n"),
ControlFile->checkPointCopy.nextOid); ControlFile->checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %u\n"), printf(_("Latest checkpoint's NextMultiXactId: %u\n"),

View File

@ -3184,15 +3184,15 @@ dumpDatabase(Archive *fout)
atooid(PQgetvalue(lo_res, i, ii_oid))); atooid(PQgetvalue(lo_res, i, ii_oid)));
oid = atooid(PQgetvalue(lo_res, i, ii_oid)); oid = atooid(PQgetvalue(lo_res, i, ii_oid));
relfilenumber = atooid(PQgetvalue(lo_res, i, ii_relfilenode)); relfilenumber = atorelnumber(PQgetvalue(lo_res, i, ii_relfilenode));
if (oid == LargeObjectRelationId) if (oid == LargeObjectRelationId)
appendPQExpBuffer(loOutQry, appendPQExpBuffer(loOutQry,
"SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n", "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
relfilenumber); relfilenumber);
else if (oid == LargeObjectLOidPNIndexId) else if (oid == LargeObjectLOidPNIndexId)
appendPQExpBuffer(loOutQry, appendPQExpBuffer(loOutQry,
"SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
relfilenumber); relfilenumber);
} }
@ -4877,16 +4877,16 @@ binary_upgrade_set_pg_class_oids(Archive *fout,
relkind = *PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "relkind")); relkind = *PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "relkind"));
relfilenumber = atooid(PQgetvalue(upgrade_res, 0, relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0,
PQfnumber(upgrade_res, "relfilenode"))); PQfnumber(upgrade_res, "relfilenode")));
toast_oid = atooid(PQgetvalue(upgrade_res, 0, toast_oid = atooid(PQgetvalue(upgrade_res, 0,
PQfnumber(upgrade_res, "reltoastrelid"))); PQfnumber(upgrade_res, "reltoastrelid")));
toast_relfilenumber = atooid(PQgetvalue(upgrade_res, 0, toast_relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0,
PQfnumber(upgrade_res, "toast_relfilenode"))); PQfnumber(upgrade_res, "toast_relfilenode")));
toast_index_oid = atooid(PQgetvalue(upgrade_res, 0, toast_index_oid = atooid(PQgetvalue(upgrade_res, 0,
PQfnumber(upgrade_res, "indexrelid"))); PQfnumber(upgrade_res, "indexrelid")));
toast_index_relfilenumber = atooid(PQgetvalue(upgrade_res, 0, toast_index_relfilenumber = atorelnumber(PQgetvalue(upgrade_res, 0,
PQfnumber(upgrade_res, "toast_index_relfilenode"))); PQfnumber(upgrade_res, "toast_index_relfilenode")));
appendPQExpBufferStr(upgrade_buffer, appendPQExpBufferStr(upgrade_buffer,
"\n-- For binary upgrade, must preserve pg_class oids and relfilenodes\n"); "\n-- For binary upgrade, must preserve pg_class oids and relfilenodes\n");
@ -4904,7 +4904,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout,
*/ */
if (RelFileNumberIsValid(relfilenumber) && relkind != RELKIND_PARTITIONED_TABLE) if (RelFileNumberIsValid(relfilenumber) && relkind != RELKIND_PARTITIONED_TABLE)
appendPQExpBuffer(upgrade_buffer, appendPQExpBuffer(upgrade_buffer,
"SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n", "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
relfilenumber); relfilenumber);
/* /*
@ -4918,7 +4918,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout,
"SELECT pg_catalog.binary_upgrade_set_next_toast_pg_class_oid('%u'::pg_catalog.oid);\n", "SELECT pg_catalog.binary_upgrade_set_next_toast_pg_class_oid('%u'::pg_catalog.oid);\n",
toast_oid); toast_oid);
appendPQExpBuffer(upgrade_buffer, appendPQExpBuffer(upgrade_buffer,
"SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('%u'::pg_catalog.oid);\n", "SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
toast_relfilenumber); toast_relfilenumber);
/* every toast table has an index */ /* every toast table has an index */
@ -4926,7 +4926,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout,
"SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n", "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n",
toast_index_oid); toast_index_oid);
appendPQExpBuffer(upgrade_buffer, appendPQExpBuffer(upgrade_buffer,
"SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
toast_index_relfilenumber); toast_index_relfilenumber);
} }
@ -4939,7 +4939,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout,
"SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n", "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n",
pg_class_oid); pg_class_oid);
appendPQExpBuffer(upgrade_buffer, appendPQExpBuffer(upgrade_buffer,
"SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" UINT64_FORMAT "'::pg_catalog.int8);\n",
relfilenumber); relfilenumber);
} }

View File

@ -538,7 +538,7 @@ isRelDataFile(const char *path)
segNo = 0; segNo = 0;
matched = false; matched = false;
nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo); nmatch = sscanf(path, "global/" UINT64_FORMAT ".%u", &rlocator.relNumber, &segNo);
if (nmatch == 1 || nmatch == 2) if (nmatch == 1 || nmatch == 2)
{ {
rlocator.spcOid = GLOBALTABLESPACE_OID; rlocator.spcOid = GLOBALTABLESPACE_OID;
@ -547,7 +547,7 @@ isRelDataFile(const char *path)
} }
else else
{ {
nmatch = sscanf(path, "base/%u/%u.%u", nmatch = sscanf(path, "base/%u/" UINT64_FORMAT ".%u",
&rlocator.dbOid, &rlocator.relNumber, &segNo); &rlocator.dbOid, &rlocator.relNumber, &segNo);
if (nmatch == 2 || nmatch == 3) if (nmatch == 2 || nmatch == 3)
{ {
@ -556,7 +556,7 @@ isRelDataFile(const char *path)
} }
else else
{ {
nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u", nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/" UINT64_FORMAT ".%u",
&rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber, &rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber,
&segNo); &segNo);
if (nmatch == 3 || nmatch == 4) if (nmatch == 3 || nmatch == 4)

View File

@ -527,7 +527,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
relname = PQgetvalue(res, relnum, i_relname); relname = PQgetvalue(res, relnum, i_relname);
curr->relname = pg_strdup(relname); curr->relname = pg_strdup(relname);
curr->relfilenumber = atooid(PQgetvalue(res, relnum, i_relfilenumber)); curr->relfilenumber =
atorelnumber(PQgetvalue(res, relnum, i_relfilenumber));
curr->tblsp_alloc = false; curr->tblsp_alloc = false;
/* Is the tablespace oid non-default? */ /* Is the tablespace oid non-default? */

View File

@ -15,10 +15,8 @@
* oids are the same between old and new clusters. This is important * oids are the same between old and new clusters. This is important
* because toast oids are stored as toast pointers in user tables. * because toast oids are stored as toast pointers in user tables.
* *
* While pg_class.oid and pg_class.relfilenode are initially the same in a * We control assignments of pg_class.relfilenode because we want the
* cluster, they can diverge due to CLUSTER, REINDEX, or VACUUM FULL. We * filenames to match between the old and new cluster.
* control assignments of pg_class.relfilenode because we want the filenames
* to match between the old and new cluster.
* *
* We control assignment of pg_tablespace.oid because we want the oid to match * We control assignment of pg_tablespace.oid because we want the oid to match
* between the old and new cluster. * between the old and new cluster.

View File

@ -190,14 +190,14 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
else else
snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno); snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno);
snprintf(old_file, sizeof(old_file), "%s%s/%u/%u%s%s", snprintf(old_file, sizeof(old_file), "%s%s/%u/" UINT64_FORMAT "%s%s",
map->old_tablespace, map->old_tablespace,
map->old_tablespace_suffix, map->old_tablespace_suffix,
map->db_oid, map->db_oid,
map->relfilenumber, map->relfilenumber,
type_suffix, type_suffix,
extent_suffix); extent_suffix);
snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s", snprintf(new_file, sizeof(new_file), "%s%s/%u/" UINT64_FORMAT "%s%s",
map->new_tablespace, map->new_tablespace,
map->new_tablespace_suffix, map->new_tablespace_suffix,
map->db_oid, map->db_oid,

View File

@ -884,7 +884,7 @@ main(int argc, char **argv)
} }
break; break;
case 'R': case 'R':
if (sscanf(optarg, "%u/%u/%u", if (sscanf(optarg, "%u/%u/" UINT64_FORMAT,
&config.filter_by_relation.spcOid, &config.filter_by_relation.spcOid,
&config.filter_by_relation.dbOid, &config.filter_by_relation.dbOid,
&config.filter_by_relation.relNumber) != 3 || &config.filter_by_relation.relNumber) != 3 ||

View File

@ -40,7 +40,7 @@ my $toast_index = $node->safe_psql('postgres',
# REINDEX operations. A set of relfilenodes is saved from the catalogs # REINDEX operations. A set of relfilenodes is saved from the catalogs
# and then compared with pg_class. # and then compared with pg_class.
$node->safe_psql('postgres', $node->safe_psql('postgres',
'CREATE TABLE index_relfilenodes (parent regclass, indname text, indoid oid, relfilenode oid);' 'CREATE TABLE index_relfilenodes (parent regclass, indname text, indoid oid, relfilenode int8);'
); );
# Save the relfilenode of a set of toast indexes, one from the catalog # Save the relfilenode of a set of toast indexes, one from the catalog
# pg_constraint and one from the test table. # pg_constraint and one from the test table.

View File

@ -149,10 +149,10 @@ GetRelationPath(Oid dbOid, Oid spcOid, RelFileNumber relNumber,
Assert(dbOid == 0); Assert(dbOid == 0);
Assert(backendId == InvalidBackendId); Assert(backendId == InvalidBackendId);
if (forkNumber != MAIN_FORKNUM) if (forkNumber != MAIN_FORKNUM)
path = psprintf("global/%u_%s", path = psprintf("global/" UINT64_FORMAT "_%s",
relNumber, forkNames[forkNumber]); relNumber, forkNames[forkNumber]);
else else
path = psprintf("global/%u", relNumber); path = psprintf("global/" UINT64_FORMAT, relNumber);
} }
else if (spcOid == DEFAULTTABLESPACE_OID) else if (spcOid == DEFAULTTABLESPACE_OID)
{ {
@ -160,21 +160,21 @@ GetRelationPath(Oid dbOid, Oid spcOid, RelFileNumber relNumber,
if (backendId == InvalidBackendId) if (backendId == InvalidBackendId)
{ {
if (forkNumber != MAIN_FORKNUM) if (forkNumber != MAIN_FORKNUM)
path = psprintf("base/%u/%u_%s", path = psprintf("base/%u/" UINT64_FORMAT "_%s",
dbOid, relNumber, dbOid, relNumber,
forkNames[forkNumber]); forkNames[forkNumber]);
else else
path = psprintf("base/%u/%u", path = psprintf("base/%u/" UINT64_FORMAT,
dbOid, relNumber); dbOid, relNumber);
} }
else else
{ {
if (forkNumber != MAIN_FORKNUM) if (forkNumber != MAIN_FORKNUM)
path = psprintf("base/%u/t%d_%u_%s", path = psprintf("base/%u/t%d_" UINT64_FORMAT "_%s",
dbOid, backendId, relNumber, dbOid, backendId, relNumber,
forkNames[forkNumber]); forkNames[forkNumber]);
else else
path = psprintf("base/%u/t%d_%u", path = psprintf("base/%u/t%d_" UINT64_FORMAT,
dbOid, backendId, relNumber); dbOid, backendId, relNumber);
} }
} }
@ -184,24 +184,24 @@ GetRelationPath(Oid dbOid, Oid spcOid, RelFileNumber relNumber,
if (backendId == InvalidBackendId) if (backendId == InvalidBackendId)
{ {
if (forkNumber != MAIN_FORKNUM) if (forkNumber != MAIN_FORKNUM)
path = psprintf("pg_tblspc/%u/%s/%u/%u_%s", path = psprintf("pg_tblspc/%u/%s/%u/" UINT64_FORMAT "_%s",
spcOid, TABLESPACE_VERSION_DIRECTORY, spcOid, TABLESPACE_VERSION_DIRECTORY,
dbOid, relNumber, dbOid, relNumber,
forkNames[forkNumber]); forkNames[forkNumber]);
else else
path = psprintf("pg_tblspc/%u/%s/%u/%u", path = psprintf("pg_tblspc/%u/%s/%u/" UINT64_FORMAT,
spcOid, TABLESPACE_VERSION_DIRECTORY, spcOid, TABLESPACE_VERSION_DIRECTORY,
dbOid, relNumber); dbOid, relNumber);
} }
else else
{ {
if (forkNumber != MAIN_FORKNUM) if (forkNumber != MAIN_FORKNUM)
path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u_%s", path = psprintf("pg_tblspc/%u/%s/%u/t%d_" UINT64_FORMAT "_%s",
spcOid, TABLESPACE_VERSION_DIRECTORY, spcOid, TABLESPACE_VERSION_DIRECTORY,
dbOid, backendId, relNumber, dbOid, backendId, relNumber,
forkNames[forkNumber]); forkNames[forkNumber]);
else else
path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u", path = psprintf("pg_tblspc/%u/%s/%u/t%d_" UINT64_FORMAT,
spcOid, TABLESPACE_VERSION_DIRECTORY, spcOid, TABLESPACE_VERSION_DIRECTORY,
dbOid, backendId, relNumber); dbOid, backendId, relNumber);
} }

View File

@ -13,6 +13,7 @@
#include "postgres_fe.h" #include "postgres_fe.h"
#include "common/logging.h" #include "common/logging.h"
#include "common/relpath.h"
#include "common/string.h" #include "common/string.h"
#include "fe_utils/option_utils.h" #include "fe_utils/option_utils.h"
@ -82,3 +83,42 @@ option_parse_int(const char *optarg, const char *optname,
*result = val; *result = val;
return true; return true;
} }
/*
* option_parse_relfilenumber
*
* Parse relfilenumber value for an option. If the parsing is successful,
* returns; if parsing fails, returns false.
*/
bool
option_parse_relfilenumber(const char *optarg, const char *optname)
{
char *endptr;
uint64 val;
errno = 0;
val = strtou64(optarg, &endptr, 10);
/*
* Skip any trailing whitespace; if anything but whitespace remains before
* the terminating character, fail.
*/
while (*endptr != '\0' && isspace((unsigned char) *endptr))
endptr++;
if (*endptr != '\0')
{
pg_log_error("invalid value \"%s\" for option %s",
optarg, optname);
return false;
}
if (val > MAX_RELFILENUMBER)
{
pg_log_error("%s must be in range " UINT64_FORMAT ".." UINT64_FORMAT,
optname, UINT64CONST(0), MAX_RELFILENUMBER);
return false;
}
return true;
}

View File

@ -15,6 +15,7 @@
#define TRANSAM_H #define TRANSAM_H
#include "access/xlogdefs.h" #include "access/xlogdefs.h"
#include "common/relpath.h"
/* ---------------- /* ----------------
@ -196,6 +197,33 @@ FullTransactionIdAdvance(FullTransactionId *dest)
#define FirstUnpinnedObjectId 12000 #define FirstUnpinnedObjectId 12000
#define FirstNormalObjectId 16384 #define FirstNormalObjectId 16384
/* ----------
* RelFileNumbers are normally assigned sequentially beginning with
* FirstNormalRelFileNumber, but for system tables the initial RelFileNumber
* is equal to the table OID. This scheme allows pg_upgrade to work: we expect
* that the new cluster will contain only system tables, and that none of those
* will have previously been rewritten, so any RelFileNumber which is in use
* in both the old and new clusters will be used for the same relation in both
* places.
*
* This is important because pg_upgrade can't reactively move conflicting
* relations out of the way. If it tries to set the RelFileNumber for a
* relation to some value that's already in use by a different relation, the
* upgrade will just fail. It's OK if the same RelFileNumber is used for the
* same relation, though, since then nothing needs to be changed.
* ----------
*/
#define FirstNormalRelFileNumber ((RelFileNumber) 100000)
#define CHECK_RELFILENUMBER_RANGE(relfilenumber) \
do { \
if ((relfilenumber) < 0 || (relfilenumber) > MAX_RELFILENUMBER) \
ereport(ERROR, \
errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
errmsg("relfilenumber %llu is out of range", \
(unsigned long long) (relfilenumber))); \
} while (0)
/* /*
* VariableCache is a data structure in shared memory that is used to track * VariableCache is a data structure in shared memory that is used to track
* OID and XID assignment state. For largely historical reasons, there is * OID and XID assignment state. For largely historical reasons, there is
@ -214,6 +242,15 @@ typedef struct VariableCacheData
Oid nextOid; /* next OID to assign */ Oid nextOid; /* next OID to assign */
uint32 oidCount; /* OIDs available before must do XLOG work */ uint32 oidCount; /* OIDs available before must do XLOG work */
/*
* These fields are protected by RelFileNumberGenLock.
*/
RelFileNumber nextRelFileNumber; /* next relfilenumber to assign */
RelFileNumber loggedRelFileNumber; /* last logged relfilenumber */
RelFileNumber flushedRelFileNumber; /* last flushed relfilenumber */
XLogRecPtr loggedRelFileNumberRecPtr; /* xlog record pointer w.r.t.
* loggedRelFileNumber */
/* /*
* These fields are protected by XidGenLock. * These fields are protected by XidGenLock.
*/ */
@ -293,6 +330,9 @@ extern void SetTransactionIdLimit(TransactionId oldest_datfrozenxid,
extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid); extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid);
extern bool ForceTransactionIdLimitUpdate(void); extern bool ForceTransactionIdLimitUpdate(void);
extern Oid GetNewObjectId(void); extern Oid GetNewObjectId(void);
extern RelFileNumber GetNewRelFileNumber(Oid reltablespace,
char relpersistence);
extern void SetNextRelFileNumber(RelFileNumber relnumber);
extern void StopGeneratingPinnedObjectIds(void); extern void StopGeneratingPinnedObjectIds(void);
#ifdef USE_ASSERT_CHECKING #ifdef USE_ASSERT_CHECKING

View File

@ -236,6 +236,7 @@ extern void CreateCheckPoint(int flags);
extern bool CreateRestartPoint(int flags); extern bool CreateRestartPoint(int flags);
extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN); extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN);
extern void XLogPutNextOid(Oid nextOid); extern void XLogPutNextOid(Oid nextOid);
extern XLogRecPtr LogNextRelFileNumber(RelFileNumber nextrelnumber);
extern XLogRecPtr XLogRestorePoint(const char *rpName); extern XLogRecPtr XLogRestorePoint(const char *rpName);
extern void UpdateFullPageWrites(void); extern void UpdateFullPageWrites(void);
extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p); extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p);

View File

@ -38,8 +38,5 @@ extern bool IsPinnedObject(Oid classId, Oid objectId);
extern Oid GetNewOidWithIndex(Relation relation, Oid indexId, extern Oid GetNewOidWithIndex(Relation relation, Oid indexId,
AttrNumber oidcolumn); AttrNumber oidcolumn);
extern RelFileNumber GetNewRelFileNumber(Oid reltablespace,
Relation pg_class,
char relpersistence);
#endif /* CATALOG_H */ #endif /* CATALOG_H */

View File

@ -57,6 +57,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 202209261 #define CATALOG_VERSION_NO 202209271
#endif #endif

View File

@ -34,6 +34,13 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat
/* oid */ /* oid */
Oid oid; Oid oid;
/* access method; 0 if not a table / index */
Oid relam BKI_DEFAULT(heap) BKI_LOOKUP_OPT(pg_am);
/* identifier of physical storage file */
/* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */
int64 relfilenode BKI_DEFAULT(0);
/* class name */ /* class name */
NameData relname; NameData relname;
@ -49,13 +56,6 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat
/* class owner */ /* class owner */
Oid relowner BKI_DEFAULT(POSTGRES) BKI_LOOKUP(pg_authid); Oid relowner BKI_DEFAULT(POSTGRES) BKI_LOOKUP(pg_authid);
/* access method; 0 if not a table / index */
Oid relam BKI_DEFAULT(heap) BKI_LOOKUP_OPT(pg_am);
/* identifier of physical storage file */
/* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */
Oid relfilenode BKI_DEFAULT(0);
/* identifier of table space for relation (0 means default for database) */ /* identifier of table space for relation (0 means default for database) */
Oid reltablespace BKI_DEFAULT(0) BKI_LOOKUP_OPT(pg_tablespace); Oid reltablespace BKI_DEFAULT(0) BKI_LOOKUP_OPT(pg_tablespace);
@ -154,7 +154,7 @@ typedef FormData_pg_class *Form_pg_class;
DECLARE_UNIQUE_INDEX_PKEY(pg_class_oid_index, 2662, ClassOidIndexId, on pg_class using btree(oid oid_ops)); DECLARE_UNIQUE_INDEX_PKEY(pg_class_oid_index, 2662, ClassOidIndexId, on pg_class using btree(oid oid_ops));
DECLARE_UNIQUE_INDEX(pg_class_relname_nsp_index, 2663, ClassNameNspIndexId, on pg_class using btree(relname name_ops, relnamespace oid_ops)); DECLARE_UNIQUE_INDEX(pg_class_relname_nsp_index, 2663, ClassNameNspIndexId, on pg_class using btree(relname name_ops, relnamespace oid_ops));
DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, ClassTblspcRelfilenodeIndexId, on pg_class using btree(reltablespace oid_ops, relfilenode oid_ops)); DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, ClassTblspcRelfilenodeIndexId, on pg_class using btree(reltablespace oid_ops, relfilenode int8_ops));
#ifdef EXPOSE_TO_CLIENT_CODE #ifdef EXPOSE_TO_CLIENT_CODE

View File

@ -41,6 +41,7 @@ typedef struct CheckPoint
* timeline (equals ThisTimeLineID otherwise) */ * timeline (equals ThisTimeLineID otherwise) */
bool fullPageWrites; /* current full_page_writes */ bool fullPageWrites; /* current full_page_writes */
FullTransactionId nextXid; /* next free transaction ID */ FullTransactionId nextXid; /* next free transaction ID */
RelFileNumber nextRelFileNumber; /* next relfilenumber */
Oid nextOid; /* next free OID */ Oid nextOid; /* next free OID */
MultiXactId nextMulti; /* next free MultiXactId */ MultiXactId nextMulti; /* next free MultiXactId */
MultiXactOffset nextMultiOffset; /* next free MultiXact offset */ MultiXactOffset nextMultiOffset; /* next free MultiXact offset */
@ -78,6 +79,7 @@ typedef struct CheckPoint
#define XLOG_FPI 0xB0 #define XLOG_FPI 0xB0
/* 0xC0 is used in Postgres 9.5-11 */ /* 0xC0 is used in Postgres 9.5-11 */
#define XLOG_OVERWRITE_CONTRECORD 0xD0 #define XLOG_OVERWRITE_CONTRECORD 0xD0
#define XLOG_NEXT_RELFILENUMBER 0xE0
/* /*

View File

@ -7329,11 +7329,11 @@
proname => 'pg_indexes_size', provolatile => 'v', prorettype => 'int8', proname => 'pg_indexes_size', provolatile => 'v', prorettype => 'int8',
proargtypes => 'regclass', prosrc => 'pg_indexes_size' }, proargtypes => 'regclass', prosrc => 'pg_indexes_size' },
{ oid => '2999', descr => 'filenode identifier of relation', { oid => '2999', descr => 'filenode identifier of relation',
proname => 'pg_relation_filenode', provolatile => 's', prorettype => 'oid', proname => 'pg_relation_filenode', provolatile => 's', prorettype => 'int8',
proargtypes => 'regclass', prosrc => 'pg_relation_filenode' }, proargtypes => 'regclass', prosrc => 'pg_relation_filenode' },
{ oid => '3454', descr => 'relation OID for filenode and tablespace', { oid => '3454', descr => 'relation OID for filenode and tablespace',
proname => 'pg_filenode_relation', provolatile => 's', proname => 'pg_filenode_relation', provolatile => 's',
prorettype => 'regclass', proargtypes => 'oid oid', prorettype => 'regclass', proargtypes => 'oid int8',
prosrc => 'pg_filenode_relation' }, prosrc => 'pg_filenode_relation' },
{ oid => '3034', descr => 'file path of relation', { oid => '3034', descr => 'file path of relation',
proname => 'pg_relation_filepath', provolatile => 's', prorettype => 'text', proname => 'pg_relation_filepath', provolatile => 's', prorettype => 'text',
@ -11125,15 +11125,15 @@
prosrc => 'binary_upgrade_set_missing_value' }, prosrc => 'binary_upgrade_set_missing_value' },
{ oid => '4545', descr => 'for use by pg_upgrade', { oid => '4545', descr => 'for use by pg_upgrade',
proname => 'binary_upgrade_set_next_heap_relfilenode', provolatile => 'v', proname => 'binary_upgrade_set_next_heap_relfilenode', provolatile => 'v',
proparallel => 'u', prorettype => 'void', proargtypes => 'oid', proparallel => 'u', prorettype => 'void', proargtypes => 'int8',
prosrc => 'binary_upgrade_set_next_heap_relfilenode' }, prosrc => 'binary_upgrade_set_next_heap_relfilenode' },
{ oid => '4546', descr => 'for use by pg_upgrade', { oid => '4546', descr => 'for use by pg_upgrade',
proname => 'binary_upgrade_set_next_index_relfilenode', provolatile => 'v', proname => 'binary_upgrade_set_next_index_relfilenode', provolatile => 'v',
proparallel => 'u', prorettype => 'void', proargtypes => 'oid', proparallel => 'u', prorettype => 'void', proargtypes => 'int8',
prosrc => 'binary_upgrade_set_next_index_relfilenode' }, prosrc => 'binary_upgrade_set_next_index_relfilenode' },
{ oid => '4547', descr => 'for use by pg_upgrade', { oid => '4547', descr => 'for use by pg_upgrade',
proname => 'binary_upgrade_set_next_toast_relfilenode', provolatile => 'v', proname => 'binary_upgrade_set_next_toast_relfilenode', provolatile => 'v',
proparallel => 'u', prorettype => 'void', proargtypes => 'oid', proparallel => 'u', prorettype => 'void', proargtypes => 'int8',
prosrc => 'binary_upgrade_set_next_toast_relfilenode' }, prosrc => 'binary_upgrade_set_next_toast_relfilenode' },
{ oid => '4548', descr => 'for use by pg_upgrade', { oid => '4548', descr => 'for use by pg_upgrade',
proname => 'binary_upgrade_set_next_pg_tablespace_oid', provolatile => 'v', proname => 'binary_upgrade_set_next_pg_tablespace_oid', provolatile => 'v',

View File

@ -22,10 +22,12 @@
/* /*
* RelFileNumber data type identifies the specific relation file name. * RelFileNumber data type identifies the specific relation file name.
*/ */
typedef Oid RelFileNumber; typedef uint64 RelFileNumber;
#define InvalidRelFileNumber ((RelFileNumber) InvalidOid) #define InvalidRelFileNumber ((RelFileNumber) 0)
#define RelFileNumberIsValid(relnumber) \ #define RelFileNumberIsValid(relnumber) \
((bool) ((relnumber) != InvalidRelFileNumber)) ((bool) ((relnumber) != InvalidRelFileNumber))
#define atorelnumber(x) ((RelFileNumber) strtou64((x), NULL, 10))
#define MAX_RELFILENUMBER UINT64CONST(0x00FFFFFFFFFFFFFF)
/* /*
* Name of major-version-specific tablespace subdirectories * Name of major-version-specific tablespace subdirectories
@ -35,6 +37,7 @@ typedef Oid RelFileNumber;
/* Characters to allow for an OID in a relation path */ /* Characters to allow for an OID in a relation path */
#define OIDCHARS 10 /* max chars printed by %u */ #define OIDCHARS 10 /* max chars printed by %u */
#define RELNUMBERCHARS 20 /* max chars printed by UINT64_FORMAT */
/* /*
* Stuff for fork names. * Stuff for fork names.

View File

@ -22,5 +22,7 @@ extern void handle_help_version_opts(int argc, char *argv[],
extern bool option_parse_int(const char *optarg, const char *optname, extern bool option_parse_int(const char *optarg, const char *optname,
int min_range, int max_range, int min_range, int max_range,
int *result); int *result);
extern bool option_parse_relfilenumber(const char *optarg,
const char *optname);
#endif /* OPTION_UTILS_H */ #endif /* OPTION_UTILS_H */

View File

@ -92,29 +92,66 @@ typedef struct buftag
{ {
Oid spcOid; /* tablespace oid */ Oid spcOid; /* tablespace oid */
Oid dbOid; /* database oid */ Oid dbOid; /* database oid */
RelFileNumber relNumber; /* relation file number */
ForkNumber forkNum; /* fork number */ /*
* relForkDetails[] stores the fork number in the high 8 bits of the first
* integer; the remaining 56 bits are used to store the relfilenmber.
* Expanding the relfilenumber to a full 64 bits would require widening
* the BufferTag, which is undesirable for performance reasons. We use
* two 32-bit values here rather than a single 64-bit value to avoid
* padding the struct out to a multiple of 8 bytes.
*/
uint32 relForkDetails[2];
BlockNumber blockNum; /* blknum relative to begin of reln */ BlockNumber blockNum; /* blknum relative to begin of reln */
} BufferTag; } BufferTag;
/* High relNumber bits in relForkDetails[0] */
#define BUFTAG_RELNUM_HIGH_BITS 24
/* Low relNumber bits in relForkDetails[1] */
#define BUFTAG_RELNUM_LOW_BITS 32
/* Mask to fetch high bits of relNumber from relForkDetails[0] */
#define BUFTAG_RELNUM_HIGH_MASK ((1U << BUFTAG_RELNUM_HIGH_BITS) - 1)
/* Mask to fetch low bits of relNumber from relForkDetails[1] */
#define BUFTAG_RELNUM_LOW_MASK 0XFFFFFFFF
static inline RelFileNumber static inline RelFileNumber
BufTagGetRelNumber(const BufferTag *tag) BufTagGetRelNumber(const BufferTag *tag)
{ {
return tag->relNumber; uint64 relnum;
relnum = ((uint64) tag->relForkDetails[0]) & BUFTAG_RELNUM_HIGH_MASK;
relnum = (relnum << BUFTAG_RELNUM_LOW_BITS) | tag->relForkDetails[1];
Assert(relnum <= MAX_RELFILENUMBER);
return (RelFileNumber) relnum;
} }
static inline ForkNumber static inline ForkNumber
BufTagGetForkNum(const BufferTag *tag) BufTagGetForkNum(const BufferTag *tag)
{ {
return tag->forkNum; ForkNumber ret;
StaticAssertStmt(MAX_FORKNUM <= INT8_MAX,
"MAX_FORKNUM can't be greater than INT8_MAX");
ret = (int8) (tag->relForkDetails[0] >> BUFTAG_RELNUM_HIGH_BITS);
return ret;
} }
static inline void static inline void
BufTagSetRelForkDetails(BufferTag *tag, RelFileNumber relnumber, BufTagSetRelForkDetails(BufferTag *tag, RelFileNumber relnumber,
ForkNumber forknum) ForkNumber forknum)
{ {
tag->relNumber = relnumber; Assert(relnumber <= MAX_RELFILENUMBER);
tag->forkNum = forknum; Assert(forknum <= MAX_FORKNUM);
tag->relForkDetails[0] = (relnumber >> BUFTAG_RELNUM_LOW_BITS) &
BUFTAG_RELNUM_HIGH_MASK;
tag->relForkDetails[0] |= (forknum << BUFTAG_RELNUM_HIGH_BITS);
tag->relForkDetails[1] = relnumber & BUFTAG_RELNUM_LOW_MASK;
} }
static inline RelFileLocator static inline RelFileLocator
@ -153,9 +190,9 @@ BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
{ {
return (tag1->spcOid == tag2->spcOid) && return (tag1->spcOid == tag2->spcOid) &&
(tag1->dbOid == tag2->dbOid) && (tag1->dbOid == tag2->dbOid) &&
(tag1->relNumber == tag2->relNumber) && (tag1->relForkDetails[0] == tag2->relForkDetails[0]) &&
(tag1->blockNum == tag2->blockNum) && (tag1->relForkDetails[1] == tag2->relForkDetails[1]) &&
(tag1->forkNum == tag2->forkNum); (tag1->blockNum == tag2->blockNum);
} }
static inline bool static inline bool

View File

@ -32,10 +32,11 @@
* Nonzero dbOid values correspond to pg_database.oid. * Nonzero dbOid values correspond to pg_database.oid.
* *
* relNumber identifies the specific relation. relNumber corresponds to * relNumber identifies the specific relation. relNumber corresponds to
* pg_class.relfilenode (NOT pg_class.oid, because we need to be able * pg_class.relfilenode. Notice that relNumber values are assigned by
* to assign new physical files to relations in some situations). * GetNewRelFileNumber(), which will only ever assign the same value once
* Notice that relNumber is only unique within a database in a particular * during the lifetime of a cluster. However, since CREATE DATABASE duplicates
* tablespace. * the relfilenumbers of the template database, the values are in practice only
* unique within a database, not globally.
* *
* Note: spcOid must be GLOBALTABLESPACE_OID if and only if dbOid is * Note: spcOid must be GLOBALTABLESPACE_OID if and only if dbOid is
* zero. We support shared relations only in the "global" tablespace. * zero. We support shared relations only in the "global" tablespace.
@ -75,6 +76,9 @@ typedef struct RelFileLocatorBackend
BackendId backend; BackendId backend;
} RelFileLocatorBackend; } RelFileLocatorBackend;
#define SizeOfRelFileLocatorBackend \
(offsetof(RelFileLocatorBackend, backend) + sizeof(BackendId))
#define RelFileLocatorBackendIsTemp(rlocator) \ #define RelFileLocatorBackendIsTemp(rlocator) \
((rlocator).backend != InvalidBackendId) ((rlocator).backend != InvalidBackendId)

View File

@ -2164,9 +2164,8 @@ select relname,
c.oid = oldoid as orig_oid, c.oid = oldoid as orig_oid,
case relfilenode case relfilenode
when 0 then 'none' when 0 then 'none'
when c.oid then 'own'
when oldfilenode then 'orig' when oldfilenode then 'orig'
else 'OTHER' else 'new'
end as storage, end as storage,
obj_description(c.oid, 'pg_class') as desc obj_description(c.oid, 'pg_class') as desc
from pg_class c left join old_oids using (relname) from pg_class c left join old_oids using (relname)
@ -2175,10 +2174,10 @@ select relname,
relname | orig_oid | storage | desc relname | orig_oid | storage | desc
------------------------------+----------+---------+--------------- ------------------------------+----------+---------+---------------
at_partitioned | t | none | at_partitioned | t | none |
at_partitioned_0 | t | own | at_partitioned_0 | t | orig |
at_partitioned_0_id_name_key | t | own | child 0 index at_partitioned_0_id_name_key | t | orig | child 0 index
at_partitioned_1 | t | own | at_partitioned_1 | t | orig |
at_partitioned_1_id_name_key | t | own | child 1 index at_partitioned_1_id_name_key | t | orig | child 1 index
at_partitioned_id_name_key | t | none | parent index at_partitioned_id_name_key | t | none | parent index
(6 rows) (6 rows)
@ -2198,9 +2197,8 @@ select relname,
c.oid = oldoid as orig_oid, c.oid = oldoid as orig_oid,
case relfilenode case relfilenode
when 0 then 'none' when 0 then 'none'
when c.oid then 'own'
when oldfilenode then 'orig' when oldfilenode then 'orig'
else 'OTHER' else 'new'
end as storage, end as storage,
obj_description(c.oid, 'pg_class') as desc obj_description(c.oid, 'pg_class') as desc
from pg_class c left join old_oids using (relname) from pg_class c left join old_oids using (relname)
@ -2209,10 +2207,10 @@ select relname,
relname | orig_oid | storage | desc relname | orig_oid | storage | desc
------------------------------+----------+---------+-------------- ------------------------------+----------+---------+--------------
at_partitioned | t | none | at_partitioned | t | none |
at_partitioned_0 | t | own | at_partitioned_0 | t | orig |
at_partitioned_0_id_name_key | f | own | parent index at_partitioned_0_id_name_key | f | new | parent index
at_partitioned_1 | t | own | at_partitioned_1 | t | orig |
at_partitioned_1_id_name_key | f | own | parent index at_partitioned_1_id_name_key | f | new | parent index
at_partitioned_id_name_key | f | none | parent index at_partitioned_id_name_key | f | none | parent index
(6 rows) (6 rows)
@ -2560,7 +2558,7 @@ CREATE FUNCTION check_ddl_rewrite(p_tablename regclass, p_ddl text)
RETURNS boolean RETURNS boolean
LANGUAGE plpgsql AS $$ LANGUAGE plpgsql AS $$
DECLARE DECLARE
v_relfilenode oid; v_relfilenode int8;
BEGIN BEGIN
v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename; v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename;

View File

@ -3,8 +3,8 @@
-- --
SET search_path = fast_default; SET search_path = fast_default;
CREATE SCHEMA fast_default; CREATE SCHEMA fast_default;
CREATE TABLE m(id OID); CREATE TABLE m(id BIGINT);
INSERT INTO m VALUES (NULL::OID); INSERT INTO m VALUES (NULL::BIGINT);
CREATE FUNCTION set(tabname name) RETURNS VOID CREATE FUNCTION set(tabname name) RETURNS VOID
AS $$ AS $$
BEGIN BEGIN

View File

@ -74,11 +74,11 @@ NOTICE: checking pg_type {typcollation} => pg_collation {oid}
NOTICE: checking pg_attribute {attrelid} => pg_class {oid} NOTICE: checking pg_attribute {attrelid} => pg_class {oid}
NOTICE: checking pg_attribute {atttypid} => pg_type {oid} NOTICE: checking pg_attribute {atttypid} => pg_type {oid}
NOTICE: checking pg_attribute {attcollation} => pg_collation {oid} NOTICE: checking pg_attribute {attcollation} => pg_collation {oid}
NOTICE: checking pg_class {relam} => pg_am {oid}
NOTICE: checking pg_class {relnamespace} => pg_namespace {oid} NOTICE: checking pg_class {relnamespace} => pg_namespace {oid}
NOTICE: checking pg_class {reltype} => pg_type {oid} NOTICE: checking pg_class {reltype} => pg_type {oid}
NOTICE: checking pg_class {reloftype} => pg_type {oid} NOTICE: checking pg_class {reloftype} => pg_type {oid}
NOTICE: checking pg_class {relowner} => pg_authid {oid} NOTICE: checking pg_class {relowner} => pg_authid {oid}
NOTICE: checking pg_class {relam} => pg_am {oid}
NOTICE: checking pg_class {reltablespace} => pg_tablespace {oid} NOTICE: checking pg_class {reltablespace} => pg_tablespace {oid}
NOTICE: checking pg_class {reltoastrelid} => pg_class {oid} NOTICE: checking pg_class {reltoastrelid} => pg_class {oid}
NOTICE: checking pg_class {relrewrite} => pg_class {oid} NOTICE: checking pg_class {relrewrite} => pg_class {oid}

View File

@ -1478,9 +1478,8 @@ select relname,
c.oid = oldoid as orig_oid, c.oid = oldoid as orig_oid,
case relfilenode case relfilenode
when 0 then 'none' when 0 then 'none'
when c.oid then 'own'
when oldfilenode then 'orig' when oldfilenode then 'orig'
else 'OTHER' else 'new'
end as storage, end as storage,
obj_description(c.oid, 'pg_class') as desc obj_description(c.oid, 'pg_class') as desc
from pg_class c left join old_oids using (relname) from pg_class c left join old_oids using (relname)
@ -1499,9 +1498,8 @@ select relname,
c.oid = oldoid as orig_oid, c.oid = oldoid as orig_oid,
case relfilenode case relfilenode
when 0 then 'none' when 0 then 'none'
when c.oid then 'own'
when oldfilenode then 'orig' when oldfilenode then 'orig'
else 'OTHER' else 'new'
end as storage, end as storage,
obj_description(c.oid, 'pg_class') as desc obj_description(c.oid, 'pg_class') as desc
from pg_class c left join old_oids using (relname) from pg_class c left join old_oids using (relname)
@ -1641,7 +1639,7 @@ CREATE FUNCTION check_ddl_rewrite(p_tablename regclass, p_ddl text)
RETURNS boolean RETURNS boolean
LANGUAGE plpgsql AS $$ LANGUAGE plpgsql AS $$
DECLARE DECLARE
v_relfilenode oid; v_relfilenode int8;
BEGIN BEGIN
v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename; v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename;

View File

@ -4,8 +4,8 @@
SET search_path = fast_default; SET search_path = fast_default;
CREATE SCHEMA fast_default; CREATE SCHEMA fast_default;
CREATE TABLE m(id OID); CREATE TABLE m(id BIGINT);
INSERT INTO m VALUES (NULL::OID); INSERT INTO m VALUES (NULL::BIGINT);
CREATE FUNCTION set(tabname name) RETURNS VOID CREATE FUNCTION set(tabname name) RETURNS VOID
AS $$ AS $$