postgresql/src/backend/catalog/toasting.c
Robert Haas debcec7dc3 Include the backend ID in the relpath of temporary relations.
This allows us to reliably remove all leftover temporary relation
files on cluster startup without reference to system catalogs or WAL;
therefore, we no longer include temporary relations in XLOG_XACT_COMMIT
and XLOG_XACT_ABORT WAL records.

Since these changes require including a backend ID in each
SharedInvalSmgrMsg, the size of the SharedInvalidationMessage.id
field has been reduced from two bytes to one, and the maximum number
of connections has been reduced from INT_MAX / 4 to 2^23-1.  It would
be possible to remove these restrictions by increasing the size of
SharedInvalidationMessage by 4 bytes, but right now that doesn't seem
like a good trade-off.

Review by Jaime Casanova and Tom Lane.
2010-08-13 20:10:54 +00:00

382 lines
11 KiB
C

/*-------------------------------------------------------------------------
*
* toasting.c
* This file contains routines to support creation of toast tables
*
*
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/toasting.c,v 1.34 2010/08/13 20:10:50 rhaas Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/heapam.h"
#include "access/tuptoaster.h"
#include "access/xact.h"
#include "catalog/dependency.h"
#include "catalog/heap.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_type.h"
#include "catalog/toasting.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "utils/builtins.h"
#include "utils/syscache.h"
/* Kluges for upgrade-in-place support */
extern Oid binary_upgrade_next_toast_relfilenode;
Oid binary_upgrade_next_pg_type_toast_oid = InvalidOid;
static bool create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
Datum reloptions);
static bool needs_toast_table(Relation rel);
/*
* AlterTableCreateToastTable
* If the table needs a toast table, and doesn't already have one,
* then create a toast table for it.
*
* reloptions for the toast table can be passed, too. Pass (Datum) 0
* for default reloptions.
*
* We expect the caller to have verified that the relation is a table and have
* already done any necessary permission checks. Callers expect this function
* to end with CommandCounterIncrement if it makes any changes.
*/
void
AlterTableCreateToastTable(Oid relOid, Datum reloptions)
{
Relation rel;
/*
* Grab an exclusive lock on the target table, which we will NOT release
* until end of transaction. (This is probably redundant in all present
* uses...)
*/
rel = heap_open(relOid, AccessExclusiveLock);
/* create_toast_table does all the work */
(void) create_toast_table(rel, InvalidOid, InvalidOid, reloptions);
heap_close(rel, NoLock);
}
/*
* Create a toast table during bootstrap
*
* Here we need to prespecify the OIDs of the toast table and its index
*/
void
BootstrapToastTable(char *relName, Oid toastOid, Oid toastIndexOid)
{
Relation rel;
rel = heap_openrv(makeRangeVar(NULL, relName, -1), AccessExclusiveLock);
/* Note: during bootstrap may see uncataloged relation */
if (rel->rd_rel->relkind != RELKIND_RELATION &&
rel->rd_rel->relkind != RELKIND_UNCATALOGED)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table",
relName)));
/* create_toast_table does all the work */
if (!create_toast_table(rel, toastOid, toastIndexOid, (Datum) 0))
elog(ERROR, "\"%s\" does not require a toast table",
relName);
heap_close(rel, NoLock);
}
/*
* create_toast_table --- internal workhorse
*
* rel is already opened and exclusive-locked
* toastOid and toastIndexOid are normally InvalidOid, but during
* bootstrap they can be nonzero to specify hand-assigned OIDs
*/
static bool
create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, Datum reloptions)
{
Oid relOid = RelationGetRelid(rel);
HeapTuple reltup;
TupleDesc tupdesc;
bool shared_relation;
bool mapped_relation;
Relation class_rel;
Oid toast_relid;
Oid toast_idxid;
Oid toast_typid = InvalidOid;
Oid namespaceid;
char toast_relname[NAMEDATALEN];
char toast_idxname[NAMEDATALEN];
IndexInfo *indexInfo;
Oid classObjectId[2];
int16 coloptions[2];
ObjectAddress baseobject,
toastobject;
/*
* Toast table is shared if and only if its parent is.
*
* We cannot allow toasting a shared relation after initdb (because
* there's no way to mark it toasted in other databases' pg_class).
*/
shared_relation = rel->rd_rel->relisshared;
if (shared_relation && !IsBootstrapProcessingMode())
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("shared tables cannot be toasted after initdb")));
/* It's mapped if and only if its parent is, too */
mapped_relation = RelationIsMapped(rel);
/*
* Is it already toasted?
*/
if (rel->rd_rel->reltoastrelid != InvalidOid)
return false;
/*
* Check to see whether the table actually needs a TOAST table.
*
* If an update-in-place toast relfilenode is specified, force toast file
* creation even if it seems not to need one.
*/
if (!needs_toast_table(rel) &&
!OidIsValid(binary_upgrade_next_toast_relfilenode))
return false;
/*
* Create the toast table and its index
*/
snprintf(toast_relname, sizeof(toast_relname),
"pg_toast_%u", relOid);
snprintf(toast_idxname, sizeof(toast_idxname),
"pg_toast_%u_index", relOid);
/* this is pretty painful... need a tuple descriptor */
tupdesc = CreateTemplateTupleDesc(3, false);
TupleDescInitEntry(tupdesc, (AttrNumber) 1,
"chunk_id",
OIDOID,
-1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 2,
"chunk_seq",
INT4OID,
-1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 3,
"chunk_data",
BYTEAOID,
-1, 0);
/*
* Ensure that the toast table doesn't itself get toasted, or we'll be
* toast :-(. This is essential for chunk_data because type bytea is
* toastable; hit the other two just to be sure.
*/
tupdesc->attrs[0]->attstorage = 'p';
tupdesc->attrs[1]->attstorage = 'p';
tupdesc->attrs[2]->attstorage = 'p';
/*
* Toast tables for regular relations go in pg_toast; those for temp
* relations go into the per-backend temp-toast-table namespace.
*/
if (rel->rd_backend == MyBackendId)
namespaceid = GetTempToastNamespace();
else
namespaceid = PG_TOAST_NAMESPACE;
if (OidIsValid(binary_upgrade_next_pg_type_toast_oid))
{
toast_typid = binary_upgrade_next_pg_type_toast_oid;
binary_upgrade_next_pg_type_toast_oid = InvalidOid;
}
toast_relid = heap_create_with_catalog(toast_relname,
namespaceid,
rel->rd_rel->reltablespace,
toastOid,
toast_typid,
InvalidOid,
rel->rd_rel->relowner,
tupdesc,
NIL,
RELKIND_TOASTVALUE,
shared_relation,
mapped_relation,
true,
0,
ONCOMMIT_NOOP,
reloptions,
false,
true,
false);
Assert(toast_relid != InvalidOid);
/* make the toast relation visible, else index creation will fail */
CommandCounterIncrement();
/*
* Create unique index on chunk_id, chunk_seq.
*
* NOTE: the normal TOAST access routines could actually function with a
* single-column index on chunk_id only. However, the slice access
* routines use both columns for faster access to an individual chunk. In
* addition, we want it to be unique as a check against the possibility of
* duplicate TOAST chunk OIDs. The index might also be a little more
* efficient this way, since btree isn't all that happy with large numbers
* of equal keys.
*/
indexInfo = makeNode(IndexInfo);
indexInfo->ii_NumIndexAttrs = 2;
indexInfo->ii_KeyAttrNumbers[0] = 1;
indexInfo->ii_KeyAttrNumbers[1] = 2;
indexInfo->ii_Expressions = NIL;
indexInfo->ii_ExpressionsState = NIL;
indexInfo->ii_Predicate = NIL;
indexInfo->ii_PredicateState = NIL;
indexInfo->ii_ExclusionOps = NULL;
indexInfo->ii_ExclusionProcs = NULL;
indexInfo->ii_ExclusionStrats = NULL;
indexInfo->ii_Unique = true;
indexInfo->ii_ReadyForInserts = true;
indexInfo->ii_Concurrent = false;
indexInfo->ii_BrokenHotChain = false;
classObjectId[0] = OID_BTREE_OPS_OID;
classObjectId[1] = INT4_BTREE_OPS_OID;
coloptions[0] = 0;
coloptions[1] = 0;
toast_idxid = index_create(toast_relid, toast_idxname, toastIndexOid,
indexInfo,
list_make2("chunk_id", "chunk_seq"),
BTREE_AM_OID,
rel->rd_rel->reltablespace,
classObjectId, coloptions, (Datum) 0,
true, false, false, false,
true, false, false);
/*
* Store the toast table's OID in the parent relation's pg_class row
*/
class_rel = heap_open(RelationRelationId, RowExclusiveLock);
reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid));
if (!HeapTupleIsValid(reltup))
elog(ERROR, "cache lookup failed for relation %u", relOid);
((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid;
if (!IsBootstrapProcessingMode())
{
/* normal case, use a transactional update */
simple_heap_update(class_rel, &reltup->t_self, reltup);
/* Keep catalog indexes current */
CatalogUpdateIndexes(class_rel, reltup);
}
else
{
/* While bootstrapping, we cannot UPDATE, so overwrite in-place */
heap_inplace_update(class_rel, reltup);
}
heap_freetuple(reltup);
heap_close(class_rel, RowExclusiveLock);
/*
* Register dependency from the toast table to the master, so that the
* toast table will be deleted if the master is. Skip this in bootstrap
* mode.
*/
if (!IsBootstrapProcessingMode())
{
baseobject.classId = RelationRelationId;
baseobject.objectId = relOid;
baseobject.objectSubId = 0;
toastobject.classId = RelationRelationId;
toastobject.objectId = toast_relid;
toastobject.objectSubId = 0;
recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
}
/*
* Make changes visible
*/
CommandCounterIncrement();
return true;
}
/*
* Check to see whether the table needs a TOAST table. It does only if
* (1) there are any toastable attributes, and (2) the maximum length
* of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
* create a toast table for something like "f1 varchar(20)".)
*/
static bool
needs_toast_table(Relation rel)
{
int32 data_length = 0;
bool maxlength_unknown = false;
bool has_toastable_attrs = false;
TupleDesc tupdesc;
Form_pg_attribute *att;
int32 tuple_length;
int i;
tupdesc = rel->rd_att;
att = tupdesc->attrs;
for (i = 0; i < tupdesc->natts; i++)
{
if (att[i]->attisdropped)
continue;
data_length = att_align_nominal(data_length, att[i]->attalign);
if (att[i]->attlen > 0)
{
/* Fixed-length types are never toastable */
data_length += att[i]->attlen;
}
else
{
int32 maxlen = type_maximum_size(att[i]->atttypid,
att[i]->atttypmod);
if (maxlen < 0)
maxlength_unknown = true;
else
data_length += maxlen;
if (att[i]->attstorage != 'p')
has_toastable_attrs = true;
}
}
if (!has_toastable_attrs)
return false; /* nothing to toast? */
if (maxlength_unknown)
return true; /* any unlimited-length attrs? */
tuple_length = MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) +
BITMAPLEN(tupdesc->natts)) +
MAXALIGN(data_length);
return (tuple_length > TOAST_TUPLE_THRESHOLD);
}