postgresql/src/backend/catalog/catalog.c

610 lines
18 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* catalog.c
* routines concerned with catalog naming conventions and other
* bits of hard-wired knowledge
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/catalog/catalog.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <fcntl.h>
#include <unistd.h>
#include "access/genam.h"
#include "access/sysattr.h"
1999-07-16 07:00:38 +02:00
#include "access/transam.h"
#include "catalog/catalog.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/pg_auth_members.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_database.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_pltemplate.h"
#include "catalog/pg_db_role_setting.h"
#include "catalog/pg_shdepend.h"
#include "catalog/pg_shdescription.h"
#include "catalog/pg_shseclabel.h"
#include "catalog/pg_tablespace.h"
#include "catalog/toasting.h"
1999-07-16 07:00:38 +02:00
#include "miscadmin.h"
#include "storage/fd.h"
#include "utils/fmgroids.h"
#include "utils/rel.h"
#include "utils/tqual.h"
#define FORKNAMECHARS 4 /* max chars for a fork name */
/*
* Lookup table of fork name by fork number.
*
* If you add a new entry, remember to update the errhint below, and the
* documentation for pg_relation_size(). Also keep FORKNAMECHARS above
* up-to-date.
*/
const char *forkNames[] = {
"main", /* MAIN_FORKNUM */
"fsm", /* FSM_FORKNUM */
"vm", /* VISIBILITYMAP_FORKNUM */
"init" /* INIT_FORKNUM */
};
/*
* forkname_to_number - look up fork number by name
*/
ForkNumber
forkname_to_number(char *forkName)
{
ForkNumber forkNum;
for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
if (strcmp(forkNames[forkNum], forkName) == 0)
return forkNum;
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid fork name"),
errhint("Valid fork names are \"main\", \"fsm\", and \"vm\".")));
return InvalidForkNumber; /* keep compiler quiet */
}
/*
* forkname_chars
2011-04-10 17:42:00 +02:00
* We use this to figure out whether a filename could be a relation
* fork (as opposed to an oddly named stray file that somehow ended
* up in the database directory). If the passed string begins with
* a fork name (other than the main fork name), we return its length,
* and set *fork (if not NULL) to the fork number. If not, we return 0.
*
* Note that the present coding assumes that there are no fork names which
* are prefixes of other fork names.
*/
int
forkname_chars(const char *str, ForkNumber *fork)
{
ForkNumber forkNum;
for (forkNum = 1; forkNum <= MAX_FORKNUM; forkNum++)
{
2011-04-10 17:42:00 +02:00
int len = strlen(forkNames[forkNum]);
if (strncmp(forkNames[forkNum], str, len) == 0)
{
if (fork)
*fork = forkNum;
return len;
}
}
return 0;
}
/*
* relpathbackend - construct path to a relation's file
*
* Result is a palloc'd string.
*/
char *
relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum)
{
int pathlen;
char *path;
if (rnode.spcNode == GLOBALTABLESPACE_OID)
{
/* Shared system relations live in {datadir}/global */
Assert(rnode.dbNode == 0);
Assert(backend == InvalidBackendId);
pathlen = 7 + OIDCHARS + 1 + FORKNAMECHARS + 1;
path = (char *) palloc(pathlen);
if (forknum != MAIN_FORKNUM)
snprintf(path, pathlen, "global/%u_%s",
rnode.relNode, forkNames[forknum]);
else
snprintf(path, pathlen, "global/%u", rnode.relNode);
}
else if (rnode.spcNode == DEFAULTTABLESPACE_OID)
{
/* The default tablespace is {datadir}/base */
if (backend == InvalidBackendId)
{
pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1;
path = (char *) palloc(pathlen);
if (forknum != MAIN_FORKNUM)
snprintf(path, pathlen, "base/%u/%u_%s",
rnode.dbNode, rnode.relNode,
forkNames[forknum]);
else
snprintf(path, pathlen, "base/%u/%u",
rnode.dbNode, rnode.relNode);
}
else
{
/* OIDCHARS will suffice for an integer, too */
pathlen = 5 + OIDCHARS + 2 + OIDCHARS + 1 + OIDCHARS + 1
2011-04-10 17:42:00 +02:00
+ FORKNAMECHARS + 1;
path = (char *) palloc(pathlen);
if (forknum != MAIN_FORKNUM)
snprintf(path, pathlen, "base/%u/t%d_%u_%s",
rnode.dbNode, backend, rnode.relNode,
forkNames[forknum]);
else
snprintf(path, pathlen, "base/%u/t%d_%u",
rnode.dbNode, backend, rnode.relNode);
}
}
else
{
/* All other tablespaces are accessed via symlinks */
if (backend == InvalidBackendId)
{
pathlen = 9 + 1 + OIDCHARS + 1
2011-04-10 17:42:00 +02:00
+ strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + OIDCHARS + 1
+ OIDCHARS + 1 + FORKNAMECHARS + 1;
path = (char *) palloc(pathlen);
if (forknum != MAIN_FORKNUM)
snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/%u_%s",
rnode.spcNode, TABLESPACE_VERSION_DIRECTORY,
rnode.dbNode, rnode.relNode,
forkNames[forknum]);
else
snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/%u",
rnode.spcNode, TABLESPACE_VERSION_DIRECTORY,
rnode.dbNode, rnode.relNode);
}
else
{
/* OIDCHARS will suffice for an integer, too */
pathlen = 9 + 1 + OIDCHARS + 1
2011-04-10 17:42:00 +02:00
+ strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + OIDCHARS + 2
+ OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1;
path = (char *) palloc(pathlen);
if (forknum != MAIN_FORKNUM)
snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/t%d_%u_%s",
rnode.spcNode, TABLESPACE_VERSION_DIRECTORY,
rnode.dbNode, backend, rnode.relNode,
forkNames[forknum]);
else
snprintf(path, pathlen, "pg_tblspc/%u/%s/%u/t%d_%u",
rnode.spcNode, TABLESPACE_VERSION_DIRECTORY,
rnode.dbNode, backend, rnode.relNode);
}
}
return path;
}
/*
* GetDatabasePath - construct path to a database dir
*
* Result is a palloc'd string.
*
* XXX this must agree with relpath()!
*/
char *
GetDatabasePath(Oid dbNode, Oid spcNode)
{
int pathlen;
char *path;
if (spcNode == GLOBALTABLESPACE_OID)
{
/* Shared system relations live in {datadir}/global */
Assert(dbNode == 0);
pathlen = 6 + 1;
path = (char *) palloc(pathlen);
snprintf(path, pathlen, "global");
}
else if (spcNode == DEFAULTTABLESPACE_OID)
{
/* The default tablespace is {datadir}/base */
pathlen = 5 + OIDCHARS + 1;
path = (char *) palloc(pathlen);
snprintf(path, pathlen, "base/%u",
dbNode);
}
else
{
/* All other tablespaces are accessed via symlinks */
pathlen = 9 + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) +
2010-02-26 03:01:40 +01:00
1 + OIDCHARS + 1;
path = (char *) palloc(pathlen);
snprintf(path, pathlen, "pg_tblspc/%u/%s/%u",
spcNode, TABLESPACE_VERSION_DIRECTORY, dbNode);
}
return path;
}
/*
* IsSystemRelation
* True iff the relation is a system catalog relation.
*
* NB: TOAST relations are considered system relations by this test
* for compatibility with the old IsSystemRelationName function.
* This is appropriate in many places but not all. Where it's not,
* also check IsToastRelation.
*
* We now just test if the relation is in the system catalog namespace;
* so it's no longer necessary to forbid user relations from having
* names starting with pg_.
*/
bool
IsSystemRelation(Relation relation)
{
2002-09-04 22:31:48 +02:00
return IsSystemNamespace(RelationGetNamespace(relation)) ||
IsToastNamespace(RelationGetNamespace(relation));
}
/*
* IsSystemClass
* Like the above, but takes a Form_pg_class as argument.
* Used when we do not want to open the relation and have to
* search pg_class directly.
*/
bool
IsSystemClass(Form_pg_class reltuple)
{
2002-09-04 22:31:48 +02:00
Oid relnamespace = reltuple->relnamespace;
2002-09-04 22:31:48 +02:00
return IsSystemNamespace(relnamespace) ||
IsToastNamespace(relnamespace);
}
/*
* IsToastRelation
* True iff relation is a TOAST support relation (or index).
*/
bool
IsToastRelation(Relation relation)
{
return IsToastNamespace(RelationGetNamespace(relation));
}
/*
* IsToastClass
* Like the above, but takes a Form_pg_class as argument.
* Used when we do not want to open the relation and have to
* search pg_class directly.
*/
bool
IsToastClass(Form_pg_class reltuple)
{
2002-09-04 22:31:48 +02:00
Oid relnamespace = reltuple->relnamespace;
2002-09-04 22:31:48 +02:00
return IsToastNamespace(relnamespace);
}
/*
* IsSystemNamespace
* True iff namespace is pg_catalog.
*
* NOTE: the reason this isn't a macro is to avoid having to include
* catalog/pg_namespace.h in a lot of places.
*/
bool
IsSystemNamespace(Oid namespaceId)
{
return namespaceId == PG_CATALOG_NAMESPACE;
}
/*
* IsToastNamespace
* True iff namespace is pg_toast or my temporary-toast-table namespace.
*
* Note: this will return false for temporary-toast-table namespaces belonging
* to other backends. Those are treated the same as other backends' regular
* temp table namespaces, and access is prevented where appropriate.
*/
bool
IsToastNamespace(Oid namespaceId)
{
return (namespaceId == PG_TOAST_NAMESPACE) ||
isTempToastNamespace(namespaceId);
}
/*
* IsReservedName
* True iff name starts with the pg_ prefix.
*
* For some classes of objects, the prefix pg_ is reserved for
* system objects only. As of 8.0, this is only true for
* schema and tablespace names.
*/
bool
IsReservedName(const char *name)
{
/* ugly coding for speed */
return (name[0] == 'p' &&
name[1] == 'g' &&
name[2] == '_');
}
/*
* IsSharedRelation
* Given the OID of a relation, determine whether it's supposed to be
* shared across an entire database cluster.
*
* Hard-wiring this list is pretty grotty, but we really need it so that
* we can compute the locktag for a relation (and then lock it) without
2006-10-04 02:30:14 +02:00
* having already read its pg_class entry. If we try to retrieve relisshared
* from pg_class with no pre-existing lock, there is a race condition against
* anyone who is concurrently committing a change to the pg_class entry:
* since we read system catalog entries under SnapshotNow, it's possible
* that both the old and new versions of the row are invalid at the instants
* we scan them. We fix this by insisting that updaters of a pg_class
* row must hold exclusive lock on the corresponding rel, and that users
* of a relation must hold at least AccessShareLock on the rel *before*
* trying to open its relcache entry. But to lock a rel, you have to
* know if it's shared. Fortunately, the set of shared relations is
* fairly static, so a hand-maintained list of their OIDs isn't completely
* impractical.
*/
bool
IsSharedRelation(Oid relationId)
{
/* These are the shared catalogs (look for BKI_SHARED_RELATION) */
if (relationId == AuthIdRelationId ||
relationId == AuthMemRelationId ||
relationId == DatabaseRelationId ||
relationId == PLTemplateRelationId ||
relationId == SharedDescriptionRelationId ||
relationId == SharedDependRelationId ||
relationId == SharedSecLabelRelationId ||
relationId == TableSpaceRelationId ||
relationId == DbRoleSettingRelationId)
return true;
/* These are their indexes (see indexing.h) */
if (relationId == AuthIdRolnameIndexId ||
relationId == AuthIdOidIndexId ||
relationId == AuthMemRoleMemIndexId ||
relationId == AuthMemMemRoleIndexId ||
relationId == DatabaseNameIndexId ||
relationId == DatabaseOidIndexId ||
relationId == PLTemplateNameIndexId ||
relationId == SharedDescriptionObjIndexId ||
relationId == SharedDependDependerIndexId ||
relationId == SharedDependReferenceIndexId ||
relationId == SharedSecLabelObjectIndexId ||
relationId == TablespaceOidIndexId ||
relationId == TablespaceNameIndexId ||
relationId == DbRoleSettingDatidRolidIndexId)
return true;
/* These are their toast tables and toast indexes (see toasting.h) */
Arrange for client authentication to occur before we select a specific database to connect to. This is necessary for the walsender code to work properly (it was previously using an untenable assumption that template1 would always be available to connect to). This also gets rid of a small security shortcoming that was introduced in the original patch to eliminate the flat authentication files: before, you could find out whether or not the requested database existed even if you couldn't pass the authentication checks. The changes needed to support this are mainly just to treat pg_authid and pg_auth_members as nailed relations, so that we can read them without having to be able to locate real pg_class entries for them. This mechanism was already debugged for pg_database, but we hadn't recognized the value of applying it to those catalogs too. Since the current code doesn't have support for accessing toast tables before we've brought up all of the relcache, remove pg_authid's toast table to ensure that no one can store an out-of-line toasted value of rolpassword. The case seems quite unlikely to occur in practice, and was effectively unsupported anyway in the old "flatfiles" implementation. Update genbki.pl to actually implement the same rules as bootstrap.c does for not-nullability of catalog columns. The previous coding was a bit cheesy but worked all right for the previous set of bootstrap catalogs. It does not work for pg_authid, where rolvaliduntil needs to be nullable. Initdb forced due to minor catalog changes (mainly the toast table removal).
2010-04-21 01:48:47 +02:00
if (relationId == PgDatabaseToastTable ||
relationId == PgDatabaseToastIndex ||
relationId == PgShdescriptionToastTable ||
relationId == PgShdescriptionToastIndex ||
relationId == PgDbRoleSettingToastTable ||
relationId == PgDbRoleSettingToastIndex)
return true;
return false;
}
/*
* GetNewOid
* Generate a new OID that is unique within the given relation.
*
* Caller must have a suitable lock on the relation.
*
* Uniqueness is promised only if the relation has a unique index on OID.
* This is true for all system catalogs that have OIDs, but might not be
* true for user tables. Note that we are effectively assuming that the
* table has a relatively small number of entries (much less than 2^32)
* and there aren't very long runs of consecutive existing OIDs. Again,
* this is reasonable for system catalogs but less so for user tables.
*
* Since the OID is not immediately inserted into the table, there is a
* race condition here; but a problem could occur only if someone else
* managed to cycle through 2^32 OIDs and generate the same OID before we
2005-10-15 04:49:52 +02:00
* finish inserting our row. This seems unlikely to be a problem. Note
* that if we had to *commit* the row to end the race condition, the risk
* would be rather higher; therefore we use SnapshotDirty in the test,
* so that we will see uncommitted rows.
*/
Oid
GetNewOid(Relation relation)
{
Oid oidIndex;
/* If relation doesn't have OIDs at all, caller is confused */
Assert(relation->rd_rel->relhasoids);
/* In bootstrap mode, we don't have any indexes to use */
if (IsBootstrapProcessingMode())
return GetNewObjectId();
/* The relcache will cache the identity of the OID index for us */
oidIndex = RelationGetOidIndex(relation);
/* If no OID index, just hand back the next OID counter value */
if (!OidIsValid(oidIndex))
{
/*
2005-10-15 04:49:52 +02:00
* System catalogs that have OIDs should *always* have a unique OID
* index; we should only take this path for user tables. Give a
* warning if it looks like somebody forgot an index.
*/
if (IsSystemRelation(relation))
elog(WARNING, "generating possibly-non-unique OID for \"%s\"",
RelationGetRelationName(relation));
return GetNewObjectId();
}
/* Otherwise, use the index to find a nonconflicting OID */
return GetNewOidWithIndex(relation, oidIndex, ObjectIdAttributeNumber);
}
/*
* GetNewOidWithIndex
* Guts of GetNewOid: use the supplied index
*
* This is exported separately because there are cases where we want to use
* an index that will not be recognized by RelationGetOidIndex: TOAST tables
* and pg_largeobject have indexes that are usable, but have multiple columns
* and are on ordinary columns rather than a true OID column. This code
* will work anyway, so long as the OID is the index's first column. The
* caller must pass in the actual heap attnum of the OID column, however.
*
* Caller must have a suitable lock on the relation.
*/
Oid
GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn)
{
Oid newOid;
SnapshotData SnapshotDirty;
SysScanDesc scan;
ScanKeyData key;
bool collides;
InitDirtySnapshot(SnapshotDirty);
/* Generate new OIDs until we find one not in the table */
do
{
CHECK_FOR_INTERRUPTS();
newOid = GetNewObjectId();
ScanKeyInit(&key,
oidcolumn,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(newOid));
/* see notes above about using SnapshotDirty */
scan = systable_beginscan(relation, indexId, true,
&SnapshotDirty, 1, &key);
collides = HeapTupleIsValid(systable_getnext(scan));
systable_endscan(scan);
} while (collides);
return newOid;
}
/*
* GetNewRelFileNode
* Generate a new relfilenode number that is unique within the given
* tablespace.
*
* If the relfilenode will also be used as the relation's OID, pass the
* opened pg_class catalog, and this routine will guarantee that the result
* is also an unused OID within pg_class. If the result is to be used only
* as a relfilenode for an existing relation, pass NULL for pg_class.
*
* As with GetNewOid, there is some theoretical risk of a race condition,
* but it doesn't seem worth worrying about.
*
* Note: we don't support using this in bootstrap mode. All relations
* created by bootstrap have preassigned OIDs, so there's no need.
*/
Oid
GetNewRelFileNode(Oid reltablespace, Relation pg_class, char relpersistence)
{
RelFileNodeBackend rnode;
char *rpath;
int fd;
bool collides;
BackendId backend;
switch (relpersistence)
{
case RELPERSISTENCE_TEMP:
backend = MyBackendId;
break;
case RELPERSISTENCE_UNLOGGED:
case RELPERSISTENCE_PERMANENT:
backend = InvalidBackendId;
break;
default:
elog(ERROR, "invalid relpersistence: %c", relpersistence);
return InvalidOid; /* placate compiler */
}
/* This logic should match RelationInitPhysicalAddr */
rnode.node.spcNode = reltablespace ? reltablespace : MyDatabaseTableSpace;
rnode.node.dbNode = (rnode.node.spcNode == GLOBALTABLESPACE_OID) ? InvalidOid : MyDatabaseId;
/*
* The relpath will vary based on the backend ID, so we must initialize
* that properly here to make sure that any collisions based on filename
* are properly detected.
*/
rnode.backend = backend;
do
{
CHECK_FOR_INTERRUPTS();
/* Generate the OID */
if (pg_class)
rnode.node.relNode = GetNewOid(pg_class);
else
rnode.node.relNode = GetNewObjectId();
/* Check for existing file of same name */
rpath = relpath(rnode, MAIN_FORKNUM);
fd = BasicOpenFile(rpath, O_RDONLY | PG_BINARY, 0);
if (fd >= 0)
{
/* definite collision */
close(fd);
collides = true;
}
else
{
/*
* Here we have a little bit of a dilemma: if errno is something
2005-10-15 04:49:52 +02:00
* other than ENOENT, should we declare a collision and loop? In
* particular one might think this advisable for, say, EPERM.
* However there really shouldn't be any unreadable files in a
* tablespace directory, and if the EPERM is actually complaining
* that we can't read the directory itself, we'd be in an infinite
* loop. In practice it seems best to go ahead regardless of the
2005-10-15 04:49:52 +02:00
* errno. If there is a colliding file we will get an smgr
* failure when we attempt to create the new relation file.
*/
collides = false;
}
pfree(rpath);
} while (collides);
return rnode.node.relNode;
}