postgresql/src/backend/catalog/toasting.c
Tom Lane 4431758229 Support ORDER BY ... NULLS FIRST/LAST, and add ASC/DESC/NULLS FIRST/NULLS LAST
per-column options for btree indexes.  The planner's support for this is still
pretty rudimentary; it does not yet know how to plan mergejoins with
nondefault ordering options.  The documentation is pretty rudimentary, too.
I'll work on improving that stuff later.

Note incompatible change from prior behavior: ORDER BY ... USING will now be
rejected if the operator is not a less-than or greater-than member of some
btree opclass.  This prevents less-than-sane behavior if an operator that
doesn't actually define a proper sort ordering is selected.
2007-01-09 02:14:16 +00:00

345 lines
9.5 KiB
C

/*-------------------------------------------------------------------------
*
* toasting.c
* This file contains routines to support creation of toast tables
*
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/toasting.c,v 1.5 2007/01/09 02:14:11 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/heapam.h"
#include "access/tuptoaster.h"
#include "access/xact.h"
#include "catalog/dependency.h"
#include "catalog/heap.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_type.h"
#include "catalog/toasting.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "utils/builtins.h"
#include "utils/syscache.h"
static bool create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid);
static bool needs_toast_table(Relation rel);
/*
* AlterTableCreateToastTable
* If the table needs a toast table, and doesn't already have one,
* then create a toast table for it.
*
* We expect the caller to have verified that the relation is a table and have
* already done any necessary permission checks. Callers expect this function
* to end with CommandCounterIncrement if it makes any changes.
*/
void
AlterTableCreateToastTable(Oid relOid)
{
Relation rel;
/*
* Grab an exclusive lock on the target table, which we will NOT release
* until end of transaction. (This is probably redundant in all present
* uses...)
*/
rel = heap_open(relOid, AccessExclusiveLock);
/* create_toast_table does all the work */
(void) create_toast_table(rel, InvalidOid, InvalidOid);
heap_close(rel, NoLock);
}
/*
* Create a toast table during bootstrap
*
* Here we need to prespecify the OIDs of the toast table and its index
*/
void
BootstrapToastTable(char *relName, Oid toastOid, Oid toastIndexOid)
{
Relation rel;
rel = heap_openrv(makeRangeVar(NULL, relName), AccessExclusiveLock);
/* Note: during bootstrap may see uncataloged relation */
if (rel->rd_rel->relkind != RELKIND_RELATION &&
rel->rd_rel->relkind != RELKIND_UNCATALOGED)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table",
relName)));
/* create_toast_table does all the work */
if (!create_toast_table(rel, toastOid, toastIndexOid))
elog(ERROR, "\"%s\" does not require a toast table",
relName);
heap_close(rel, NoLock);
}
/*
* create_toast_table --- internal workhorse
*
* rel is already opened and exclusive-locked
* toastOid and toastIndexOid are normally InvalidOid, but during
* bootstrap they can be nonzero to specify hand-assigned OIDs
*/
static bool
create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid)
{
Oid relOid = RelationGetRelid(rel);
HeapTuple reltup;
TupleDesc tupdesc;
bool shared_relation;
Relation class_rel;
Oid toast_relid;
Oid toast_idxid;
char toast_relname[NAMEDATALEN];
char toast_idxname[NAMEDATALEN];
IndexInfo *indexInfo;
Oid classObjectId[2];
int16 coloptions[2];
ObjectAddress baseobject,
toastobject;
/*
* Toast table is shared if and only if its parent is.
*
* We cannot allow toasting a shared relation after initdb (because
* there's no way to mark it toasted in other databases' pg_class).
*/
shared_relation = rel->rd_rel->relisshared;
if (shared_relation && !IsBootstrapProcessingMode())
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("shared tables cannot be toasted after initdb")));
/*
* Is it already toasted?
*/
if (rel->rd_rel->reltoastrelid != InvalidOid)
return false;
/*
* Check to see whether the table actually needs a TOAST table.
*/
if (!needs_toast_table(rel))
return false;
/*
* Create the toast table and its index
*/
snprintf(toast_relname, sizeof(toast_relname),
"pg_toast_%u", relOid);
snprintf(toast_idxname, sizeof(toast_idxname),
"pg_toast_%u_index", relOid);
/* this is pretty painful... need a tuple descriptor */
tupdesc = CreateTemplateTupleDesc(3, false);
TupleDescInitEntry(tupdesc, (AttrNumber) 1,
"chunk_id",
OIDOID,
-1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 2,
"chunk_seq",
INT4OID,
-1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 3,
"chunk_data",
BYTEAOID,
-1, 0);
/*
* Ensure that the toast table doesn't itself get toasted, or we'll be
* toast :-(. This is essential for chunk_data because type bytea is
* toastable; hit the other two just to be sure.
*/
tupdesc->attrs[0]->attstorage = 'p';
tupdesc->attrs[1]->attstorage = 'p';
tupdesc->attrs[2]->attstorage = 'p';
/*
* Note: the toast relation is placed in the regular pg_toast namespace
* even if its master relation is a temp table. There cannot be any
* naming collision, and the toast rel will be destroyed when its master
* is, so there's no need to handle the toast rel as temp.
*
* XXX would it make sense to apply the master's reloptions to the toast
* table?
*/
toast_relid = heap_create_with_catalog(toast_relname,
PG_TOAST_NAMESPACE,
rel->rd_rel->reltablespace,
toastOid,
rel->rd_rel->relowner,
tupdesc,
RELKIND_TOASTVALUE,
shared_relation,
true,
0,
ONCOMMIT_NOOP,
(Datum) 0,
true);
/* make the toast relation visible, else index creation will fail */
CommandCounterIncrement();
/*
* Create unique index on chunk_id, chunk_seq.
*
* NOTE: the normal TOAST access routines could actually function with a
* single-column index on chunk_id only. However, the slice access
* routines use both columns for faster access to an individual chunk. In
* addition, we want it to be unique as a check against the possibility of
* duplicate TOAST chunk OIDs. The index might also be a little more
* efficient this way, since btree isn't all that happy with large numbers
* of equal keys.
*/
indexInfo = makeNode(IndexInfo);
indexInfo->ii_NumIndexAttrs = 2;
indexInfo->ii_KeyAttrNumbers[0] = 1;
indexInfo->ii_KeyAttrNumbers[1] = 2;
indexInfo->ii_Expressions = NIL;
indexInfo->ii_ExpressionsState = NIL;
indexInfo->ii_Predicate = NIL;
indexInfo->ii_PredicateState = NIL;
indexInfo->ii_Unique = true;
indexInfo->ii_Concurrent = false;
classObjectId[0] = OID_BTREE_OPS_OID;
classObjectId[1] = INT4_BTREE_OPS_OID;
coloptions[0] = 0;
coloptions[1] = 0;
toast_idxid = index_create(toast_relid, toast_idxname, toastIndexOid,
indexInfo,
BTREE_AM_OID,
rel->rd_rel->reltablespace,
classObjectId, coloptions, (Datum) 0,
true, false, true, false, false);
/*
* Store the toast table's OID in the parent relation's pg_class row
*/
class_rel = heap_open(RelationRelationId, RowExclusiveLock);
reltup = SearchSysCacheCopy(RELOID,
ObjectIdGetDatum(relOid),
0, 0, 0);
if (!HeapTupleIsValid(reltup))
elog(ERROR, "cache lookup failed for relation %u", relOid);
((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid;
if (!IsBootstrapProcessingMode())
{
/* normal case, use a transactional update */
simple_heap_update(class_rel, &reltup->t_self, reltup);
/* Keep catalog indexes current */
CatalogUpdateIndexes(class_rel, reltup);
}
else
{
/* While bootstrapping, we cannot UPDATE, so overwrite in-place */
heap_inplace_update(class_rel, reltup);
}
heap_freetuple(reltup);
heap_close(class_rel, RowExclusiveLock);
/*
* Register dependency from the toast table to the master, so that the
* toast table will be deleted if the master is. Skip this in bootstrap
* mode.
*/
if (!IsBootstrapProcessingMode())
{
baseobject.classId = RelationRelationId;
baseobject.objectId = relOid;
baseobject.objectSubId = 0;
toastobject.classId = RelationRelationId;
toastobject.objectId = toast_relid;
toastobject.objectSubId = 0;
recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
}
/*
* Make changes visible
*/
CommandCounterIncrement();
return true;
}
/*
* Check to see whether the table needs a TOAST table. It does only if
* (1) there are any toastable attributes, and (2) the maximum length
* of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
* create a toast table for something like "f1 varchar(20)".)
*/
static bool
needs_toast_table(Relation rel)
{
int32 data_length = 0;
bool maxlength_unknown = false;
bool has_toastable_attrs = false;
TupleDesc tupdesc;
Form_pg_attribute *att;
int32 tuple_length;
int i;
tupdesc = rel->rd_att;
att = tupdesc->attrs;
for (i = 0; i < tupdesc->natts; i++)
{
if (att[i]->attisdropped)
continue;
data_length = att_align(data_length, att[i]->attalign);
if (att[i]->attlen > 0)
{
/* Fixed-length types are never toastable */
data_length += att[i]->attlen;
}
else
{
int32 maxlen = type_maximum_size(att[i]->atttypid,
att[i]->atttypmod);
if (maxlen < 0)
maxlength_unknown = true;
else
data_length += maxlen;
if (att[i]->attstorage != 'p')
has_toastable_attrs = true;
}
}
if (!has_toastable_attrs)
return false; /* nothing to toast? */
if (maxlength_unknown)
return true; /* any unlimited-length attrs? */
tuple_length = MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) +
BITMAPLEN(tupdesc->natts)) +
MAXALIGN(data_length);
return (tuple_length > TOAST_TUPLE_THRESHOLD);
}