Add infrastructure for mapping relfilenodes to relation OIDs.

Future patches are expected to introduce logical replication that
works by decoding WAL.  WAL contains relfilenodes rather than relation
OIDs, so this infrastructure will be needed to find the relation OID
based on WAL contents.

If logical replication does not make it into this release, we probably
should consider reverting this, since it will add some overhead to DDL
operations that create new relations.  One additional index insert per
pg_class row is not a large overhead, but it's more than zero.
Another way of meeting the needs of logical replication would be to
the relation OID to WAL, but that would burden DML operations, not
only DDL.

Andres Freund, with some changes by me.  Design review, in earlier
versions, by Álvaro Herrera.
This commit is contained in:
Robert Haas 2013-07-22 10:34:34 -04:00
parent b3b10c3903
commit f01d1ae3a1
14 changed files with 408 additions and 3 deletions

View File

@ -15748,6 +15748,9 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
<indexterm>
<primary>pg_relation_filepath</primary>
</indexterm>
<indexterm>
<primary>pg_filenode_relation</primary>
</indexterm>
<table id="functions-admin-dblocation">
<title>Database Object Location Functions</title>
@ -15776,6 +15779,15 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
File path name of the specified relation
</entry>
</row>
<row>
<entry>
<literal><function>pg_filenode_relation(<parameter>tablespace</parameter> <type>oid</type>, <parameter>filenode</parameter> <type>oid</type>)</function></literal>
</entry>
<entry><type>regclass</type></entry>
<entry>
Find the relation associated with a given tablespace and filenode
</entry>
</row>
</tbody>
</tgroup>
</table>
@ -15799,6 +15811,13 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
the relation.
</para>
<para>
<function>pg_filenode_relation</> is the reverse of
<function>pg_relation_filenode</>. Given a <quote>tablespace</> OID and
a <quote>filenode</> it returns the associated relation. The default
tablespace can be specified as 0.
</para>
</sect2>
<sect2 id="functions-admin-genfile">

View File

@ -28,6 +28,7 @@
#include "utils/builtins.h"
#include "utils/numeric.h"
#include "utils/rel.h"
#include "utils/relfilenodemap.h"
#include "utils/relmapper.h"
#include "utils/syscache.h"
@ -755,6 +756,33 @@ pg_relation_filenode(PG_FUNCTION_ARGS)
PG_RETURN_OID(result);
}
/*
* Get the relation via (reltablespace, relfilenode)
*
* This is expected to be used when somebody wants to match an individual file
* on the filesystem back to its table. Thats not trivially possible via
* pg_class because that doesn't contain the relfilenodes of shared and nailed
* tables.
*
* We don't fail but return NULL if we cannot find a mapping.
*
* Instead of knowing DEFAULTTABLESPACE_OID you can pass 0.
*/
Datum
pg_filenode_relation(PG_FUNCTION_ARGS)
{
Oid reltablespace = PG_GETARG_OID(0);
Oid relfilenode = PG_GETARG_OID(1);
Oid heaprel = InvalidOid;
heaprel = RelidByRelfilenode(reltablespace, relfilenode);
if (!OidIsValid(heaprel))
PG_RETURN_NULL();
else
PG_RETURN_OID(heaprel);
}
/*
* Get the pathname (relative to $PGDATA) of a relation
*

View File

@ -13,6 +13,7 @@ top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = attoptcache.o catcache.o evtcache.o inval.o plancache.o relcache.o \
relmapper.o spccache.o syscache.o lsyscache.o typcache.o ts_cache.o
relmapper.o relfilenodemap.o spccache.o syscache.o lsyscache.o \
typcache.o ts_cache.o
include $(top_srcdir)/src/backend/common.mk

View File

@ -178,7 +178,7 @@ static int maxSharedInvalidMessagesArray;
*/
#define MAX_SYSCACHE_CALLBACKS 32
#define MAX_RELCACHE_CALLBACKS 5
#define MAX_RELCACHE_CALLBACKS 10
static struct SYSCACHECALLBACK
{

247
src/backend/utils/cache/relfilenodemap.c vendored Normal file
View File

@ -0,0 +1,247 @@
/*-------------------------------------------------------------------------
*
* relfilenodemap.c
* relfilenode to oid mapping cache.
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/utils/cache/relfilenode.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "catalog/indexing.h"
#include "catalog/pg_class.h"
#include "catalog/pg_tablespace.h"
#include "miscadmin.h"
#include "utils/builtins.h"
#include "utils/catcache.h"
#include "utils/hsearch.h"
#include "utils/inval.h"
#include "utils/fmgroids.h"
#include "utils/rel.h"
#include "utils/relfilenodemap.h"
#include "utils/relmapper.h"
/* Hash table for informations about each relfilenode <-> oid pair */
static HTAB *RelfilenodeMapHash = NULL;
/* built first time through in InitializeRelfilenodeMap */
ScanKeyData relfilenode_skey[2];
typedef struct
{
Oid reltablespace;
Oid relfilenode;
} RelfilenodeMapKey;
typedef struct
{
RelfilenodeMapKey key; /* lookup key - must be first */
Oid relid; /* pg_class.oid */
} RelfilenodeMapEntry;
/*
* RelfilenodeMapInvalidateCallback
* Flush mapping entries when pg_class is updated in a relevant fashion.
*/
static void
RelfilenodeMapInvalidateCallback(Datum arg, Oid relid)
{
HASH_SEQ_STATUS status;
RelfilenodeMapEntry *entry;
/* nothing to do if not active or deleted */
if (RelfilenodeMapHash == NULL)
return;
/* if relid is InvalidOid, we must invalidate the entire cache */
if (relid == InvalidOid)
{
hash_destroy(RelfilenodeMapHash);
RelfilenodeMapHash = NULL;
return;
}
hash_seq_init(&status, RelfilenodeMapHash);
while ((entry = (RelfilenodeMapEntry *) hash_seq_search(&status)) != NULL)
{
/* Same OID may occur in more than one tablespace. */
if (entry->relid == relid)
{
if (hash_search(RelfilenodeMapHash,
(void *) &entry->key,
HASH_REMOVE,
NULL) == NULL)
elog(ERROR, "hash table corrupted");
}
}
}
/*
* RelfilenodeMapInvalidateCallback
* Initialize cache, either on first use or after a reset.
*/
static void
InitializeRelfilenodeMap(void)
{
HASHCTL ctl;
static bool initial_init_done = false;
int i;
/* Make sure we've initialized CacheMemoryContext. */
if (CacheMemoryContext == NULL)
CreateCacheMemoryContext();
/* Initialize the hash table. */
MemSet(&ctl, 0, sizeof(ctl));
ctl.keysize = sizeof(RelfilenodeMapKey);
ctl.entrysize = sizeof(RelfilenodeMapEntry);
ctl.hash = tag_hash;
ctl.hcxt = CacheMemoryContext;
RelfilenodeMapHash =
hash_create("RelfilenodeMap cache", 1024, &ctl,
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
/*
* For complete resets we simply delete the entire hash, but there's no
* need to do the other stuff multiple times. Especially the initialization
* of the relcche invalidation should only be done once.
*/
if (initial_init_done)
return;
/* build skey */
MemSet(&relfilenode_skey, 0, sizeof(relfilenode_skey));
for (i = 0; i < 2; i++)
{
fmgr_info_cxt(F_OIDEQ,
&relfilenode_skey[i].sk_func,
CacheMemoryContext);
relfilenode_skey[i].sk_strategy = BTEqualStrategyNumber;
relfilenode_skey[i].sk_subtype = InvalidOid;
relfilenode_skey[i].sk_collation = InvalidOid;
}
relfilenode_skey[0].sk_attno = Anum_pg_class_reltablespace;
relfilenode_skey[1].sk_attno = Anum_pg_class_relfilenode;
/* Watch for invalidation events. */
CacheRegisterRelcacheCallback(RelfilenodeMapInvalidateCallback,
(Datum) 0);
initial_init_done = true;
}
/*
* Map a relation's (tablespace, filenode) to a relation's oid and cache the
* result.
*
* Returns InvalidOid if no relation matching the criteria could be found.
*/
Oid
RelidByRelfilenode(Oid reltablespace, Oid relfilenode)
{
RelfilenodeMapKey key;
RelfilenodeMapEntry *entry;
bool found;
SysScanDesc scandesc;
Relation relation;
HeapTuple ntp;
ScanKeyData skey[2];
if (RelfilenodeMapHash == NULL)
InitializeRelfilenodeMap();
/* pg_class will show 0 when the value is actually MyDatabaseTableSpace */
if (reltablespace == MyDatabaseTableSpace)
reltablespace = 0;
MemSet(&key, 0, sizeof(key));
key.reltablespace = reltablespace;
key.relfilenode = relfilenode;
/*
* Check cache and enter entry if nothing could be found. Even if no target
* relation can be found later on we store the negative match and return a
* InvalidOid from cache. That's not really necessary for performance since
* querying invalid values isn't supposed to be a frequent thing, but the
* implementation is simpler this way.
*/
entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_ENTER, &found);
if (found)
return entry->relid;
/* ok, no previous cache entry, do it the hard way */
/* check shared tables */
if (reltablespace == GLOBALTABLESPACE_OID)
{
entry->relid = RelationMapFilenodeToOid(relfilenode, true);
return entry->relid;
}
/* check plain relations by looking in pg_class */
relation = heap_open(RelationRelationId, AccessShareLock);
/* copy scankey to local copy, it will be modified during the scan */
memcpy(skey, relfilenode_skey, sizeof(skey));
/* set scan arguments */
skey[0].sk_argument = ObjectIdGetDatum(reltablespace);
skey[1].sk_argument = ObjectIdGetDatum(relfilenode);
scandesc = systable_beginscan(relation,
ClassTblspcRelfilenodeIndexId,
true,
NULL,
2,
skey);
found = false;
while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
{
bool isnull;
if (found)
elog(ERROR,
"unexpected duplicate for tablespace %u, relfilenode %u",
reltablespace, relfilenode);
found = true;
#ifdef USE_ASSERT_CHECKING
if (assert_enabled)
{
Oid check;
check = fastgetattr(ntp, Anum_pg_class_reltablespace,
RelationGetDescr(relation),
&isnull);
Assert(!isnull && check == reltablespace);
check = fastgetattr(ntp, Anum_pg_class_relfilenode,
RelationGetDescr(relation),
&isnull);
Assert(!isnull && check == relfilenode);
}
#endif
entry->relid = HeapTupleGetOid(ntp);
}
systable_endscan(scandesc);
heap_close(relation, AccessShareLock);
/* check for tables that are mapped but not shared */
if (!found)
entry->relid = RelationMapFilenodeToOid(relfilenode, false);
return entry->relid;
}

View File

@ -180,6 +180,59 @@ RelationMapOidToFilenode(Oid relationId, bool shared)
return InvalidOid;
}
/*
* RelationMapFilenodeToOid
*
* Do the reverse of the normal direction of mapping done in
* RelationMapOidToFilenode.
*
* This is not supposed to be used during normal running but rather for
* information purposes when looking at the filesystem or xlog.
*
* Returns InvalidOid if the OID is not known; this can easily happen if the
* relfilenode doesn't pertain to a mapped relation.
*/
Oid
RelationMapFilenodeToOid(Oid filenode, bool shared)
{
const RelMapFile *map;
int32 i;
/* If there are active updates, believe those over the main maps */
if (shared)
{
map = &active_shared_updates;
for (i = 0; i < map->num_mappings; i++)
{
if (filenode == map->mappings[i].mapfilenode)
return map->mappings[i].mapoid;
}
map = &shared_map;
for (i = 0; i < map->num_mappings; i++)
{
if (filenode == map->mappings[i].mapfilenode)
return map->mappings[i].mapoid;
}
}
else
{
map = &active_local_updates;
for (i = 0; i < map->num_mappings; i++)
{
if (filenode == map->mappings[i].mapfilenode)
return map->mappings[i].mapoid;
}
map = &local_map;
for (i = 0; i < map->num_mappings; i++)
{
if (filenode == map->mappings[i].mapfilenode)
return map->mappings[i].mapoid;
}
}
return InvalidOid;
}
/*
* RelationMapUpdateMap
*

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201307181
#define CATALOG_VERSION_NO 201307221
#endif

View File

@ -106,6 +106,8 @@ DECLARE_UNIQUE_INDEX(pg_class_oid_index, 2662, on pg_class using btree(oid oid_o
#define ClassOidIndexId 2662
DECLARE_UNIQUE_INDEX(pg_class_relname_nsp_index, 2663, on pg_class using btree(relname name_ops, relnamespace oid_ops));
#define ClassNameNspIndexId 2663
DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, on pg_class using btree(reltablespace oid_ops, relfilenode oid_ops));
#define ClassTblspcRelfilenodeIndexId 3455
DECLARE_UNIQUE_INDEX(pg_collation_name_enc_nsp_index, 3164, on pg_collation using btree(collname name_ops, collencoding int4_ops, collnamespace oid_ops));
#define CollationNameEncNspIndexId 3164

View File

@ -3448,6 +3448,8 @@ DATA(insert OID = 2998 ( pg_indexes_size PGNSP PGUID 12 1 0 0 0 f f f f t f v 1
DESCR("disk space usage for all indexes attached to the specified table");
DATA(insert OID = 2999 ( pg_relation_filenode PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 26 "2205" _null_ _null_ _null_ _null_ pg_relation_filenode _null_ _null_ _null_ ));
DESCR("filenode identifier of relation");
DATA(insert OID = 3454 ( pg_filenode_relation PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 2205 "26 26" _null_ _null_ _null_ _null_ pg_filenode_relation _null_ _null_ _null_ ));
DESCR("relation OID for filenode and tablespace");
DATA(insert OID = 3034 ( pg_relation_filepath PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 25 "2205" _null_ _null_ _null_ _null_ pg_relation_filepath _null_ _null_ _null_ ));
DESCR("file path of relation");

View File

@ -460,6 +460,7 @@ extern Datum pg_size_pretty_numeric(PG_FUNCTION_ARGS);
extern Datum pg_table_size(PG_FUNCTION_ARGS);
extern Datum pg_indexes_size(PG_FUNCTION_ARGS);
extern Datum pg_relation_filenode(PG_FUNCTION_ARGS);
extern Datum pg_filenode_relation(PG_FUNCTION_ARGS);
extern Datum pg_relation_filepath(PG_FUNCTION_ARGS);
/* genfile.c */

View File

@ -0,0 +1,18 @@
/*-------------------------------------------------------------------------
*
* relfilenodemap.h
* relfilenode to oid mapping cache.
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/utils/relfilenodemap.h
*
*-------------------------------------------------------------------------
*/
#ifndef RELFILENODEMAP_H
#define RELFILENODEMAP_H
extern Oid RelidByRelfilenode(Oid reltablespace, Oid relfilenode);
#endif /* RELFILENODEMAP_H */

View File

@ -36,6 +36,8 @@ typedef struct xl_relmap_update
extern Oid RelationMapOidToFilenode(Oid relationId, bool shared);
extern Oid RelationMapFilenodeToOid(Oid relationId, bool shared);
extern void RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared,
bool immediate);

View File

@ -2305,3 +2305,21 @@ Check constraints:
DROP TABLE alter2.tt8;
DROP SCHEMA alter2;
-- Check that we map relation oids to filenodes and back correctly.
-- Don't display all the mappings so the test output doesn't change
-- all the time, but make sure we actually do test some values.
SELECT
SUM((mapped_oid != oid OR mapped_oid IS NULL)::int) incorrectly_mapped,
count(*) > 200 have_mappings
FROM (
SELECT
oid, reltablespace, relfilenode, relname,
pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) mapped_oid
FROM pg_class
WHERE relkind IN ('r', 'i', 'S', 't', 'm')
) mapped;
incorrectly_mapped | have_mappings
--------------------+---------------
0 | t
(1 row)

View File

@ -1544,3 +1544,17 @@ ALTER TABLE IF EXISTS tt8 SET SCHEMA alter2;
DROP TABLE alter2.tt8;
DROP SCHEMA alter2;
-- Check that we map relation oids to filenodes and back correctly.
-- Don't display all the mappings so the test output doesn't change
-- all the time, but make sure we actually do test some values.
SELECT
SUM((mapped_oid != oid OR mapped_oid IS NULL)::int) incorrectly_mapped,
count(*) > 200 have_mappings
FROM (
SELECT
oid, reltablespace, relfilenode, relname,
pg_filenode_relation(reltablespace, pg_relation_filenode(oid)) mapped_oid
FROM pg_class
WHERE relkind IN ('r', 'i', 'S', 't', 'm')
) mapped;