postgresql/src/backend/catalog/toasting.c

452 lines
13 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* toasting.c
* This file contains routines to support creation of toast tables
*
*
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/catalog/toasting.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
2019-01-15 00:54:18 +01:00
#include "access/heapam.h"
#include "access/tuptoaster.h"
#include "access/xact.h"
#include "catalog/binary_upgrade.h"
#include "catalog/dependency.h"
#include "catalog/heap.h"
#include "catalog/index.h"
#include "catalog/namespace.h"
#include "catalog/pg_am.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_type.h"
#include "catalog/toasting.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "storage/lock.h"
#include "utils/builtins.h"
#include "utils/rel.h"
#include "utils/syscache.h"
/* Potentially set by pg_upgrade_support functions */
Oid binary_upgrade_next_toast_pg_type_oid = InvalidOid;
static void CheckAndCreateToastTable(Oid relOid, Datum reloptions,
LOCKMODE lockmode, bool check);
static bool create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
Datum reloptions, LOCKMODE lockmode, bool check);
static bool needs_toast_table(Relation rel);
/*
* CreateToastTable variants
* If the table needs a toast table, and doesn't already have one,
* then create a toast table for it.
*
* reloptions for the toast table can be passed, too. Pass (Datum) 0
* for default reloptions.
*
* We expect the caller to have verified that the relation is a table and have
* already done any necessary permission checks. Callers expect this function
* to end with CommandCounterIncrement if it makes any changes.
*/
void
AlterTableCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode)
{
CheckAndCreateToastTable(relOid, reloptions, lockmode, true);
}
void
NewHeapCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode)
{
CheckAndCreateToastTable(relOid, reloptions, lockmode, false);
}
void
NewRelationCreateToastTable(Oid relOid, Datum reloptions)
{
CheckAndCreateToastTable(relOid, reloptions, AccessExclusiveLock, false);
}
static void
CheckAndCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode, bool check)
{
Relation rel;
rel = table_open(relOid, lockmode);
/* create_toast_table does all the work */
(void) create_toast_table(rel, InvalidOid, InvalidOid, reloptions, lockmode, check);
table_close(rel, NoLock);
}
/*
* Create a toast table during bootstrap
*
* Here we need to prespecify the OIDs of the toast table and its index
*/
void
BootstrapToastTable(char *relName, Oid toastOid, Oid toastIndexOid)
{
2006-10-04 02:30:14 +02:00
Relation rel;
rel = table_openrv(makeRangeVar(NULL, relName, -1), AccessExclusiveLock);
if (rel->rd_rel->relkind != RELKIND_RELATION &&
rel->rd_rel->relkind != RELKIND_MATVIEW)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table or materialized view",
relName)));
/* create_toast_table does all the work */
if (!create_toast_table(rel, toastOid, toastIndexOid, (Datum) 0,
AccessExclusiveLock, false))
elog(ERROR, "\"%s\" does not require a toast table",
relName);
table_close(rel, NoLock);
}
/*
* create_toast_table --- internal workhorse
*
* rel is already opened and locked
* toastOid and toastIndexOid are normally InvalidOid, but during
* bootstrap they can be nonzero to specify hand-assigned OIDs
*/
static bool
create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
Datum reloptions, LOCKMODE lockmode, bool check)
{
Oid relOid = RelationGetRelid(rel);
HeapTuple reltup;
TupleDesc tupdesc;
bool shared_relation;
bool mapped_relation;
Relation toast_rel;
Relation class_rel;
Oid toast_relid;
Oid toast_typid = InvalidOid;
Oid namespaceid;
char toast_relname[NAMEDATALEN];
char toast_idxname[NAMEDATALEN];
IndexInfo *indexInfo;
Oid collationObjectId[2];
Oid classObjectId[2];
int16 coloptions[2];
ObjectAddress baseobject,
toastobject;
/*
* Toast table is shared if and only if its parent is.
*
* We cannot allow toasting a shared relation after initdb (because
* there's no way to mark it toasted in other databases' pg_class).
*/
shared_relation = rel->rd_rel->relisshared;
if (shared_relation && !IsBootstrapProcessingMode())
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("shared tables cannot be toasted after initdb")));
/* It's mapped if and only if its parent is, too */
mapped_relation = RelationIsMapped(rel);
/*
* Is it already toasted?
*/
if (rel->rd_rel->reltoastrelid != InvalidOid)
return false;
Fix pg_upgrade to not fail when new-cluster TOAST rules differ from old. This patch essentially reverts commit 4c6780fd17aa43ed, in favor of a much simpler solution for the case where the new cluster would choose to create a TOAST table but the old cluster doesn't have one: just don't create a TOAST table. The existing code failed in at least two different ways if the situation arose: (1) ALTER TABLE RESET didn't grab an exclusive lock, so that the lock sanity check in create_toast_table failed; (2) pg_upgrade did not provide a pg_type OID for the new toast table, so that the crosscheck in TypeCreate failed. While both these problems were introduced by later patches, they show that the hack being used to cause TOAST table creation is overwhelmingly fragile (and untested). I also note that before the TypeCreate crosscheck was added, the code would have resulted in assigning an indeterminate pg_type OID to the toast table, possibly causing a later OID conflict in that catalog; so that it didn't really work even when committed. If we simply don't create a TOAST table, there will only be a problem if the code tries to store a tuple that's wider than a page, and field compression isn't sufficient to get it under a page. Given that the TOAST creation threshold is intended to be about a quarter of a page, it's very hard to believe that cross-version differences in the do-we-need-a-toast- table heuristic could result in an observable problem. So let's just follow the old version's conclusion about whether a TOAST table is needed. (If we ever do change needs_toast_table() so much that this conclusion doesn't apply, we can devise a solution at that time, and hopefully do it in a less klugy way than 4c6780fd17aa43ed did.) Back-patch to 9.3, like the previous patch. Discussion: <8110.1462291671@sss.pgh.pa.us>
2016-05-07 04:05:51 +02:00
/*
* Check to see whether the table actually needs a TOAST table.
*/
if (!IsBinaryUpgrade)
{
Fix pg_upgrade to not fail when new-cluster TOAST rules differ from old. This patch essentially reverts commit 4c6780fd17aa43ed, in favor of a much simpler solution for the case where the new cluster would choose to create a TOAST table but the old cluster doesn't have one: just don't create a TOAST table. The existing code failed in at least two different ways if the situation arose: (1) ALTER TABLE RESET didn't grab an exclusive lock, so that the lock sanity check in create_toast_table failed; (2) pg_upgrade did not provide a pg_type OID for the new toast table, so that the crosscheck in TypeCreate failed. While both these problems were introduced by later patches, they show that the hack being used to cause TOAST table creation is overwhelmingly fragile (and untested). I also note that before the TypeCreate crosscheck was added, the code would have resulted in assigning an indeterminate pg_type OID to the toast table, possibly causing a later OID conflict in that catalog; so that it didn't really work even when committed. If we simply don't create a TOAST table, there will only be a problem if the code tries to store a tuple that's wider than a page, and field compression isn't sufficient to get it under a page. Given that the TOAST creation threshold is intended to be about a quarter of a page, it's very hard to believe that cross-version differences in the do-we-need-a-toast- table heuristic could result in an observable problem. So let's just follow the old version's conclusion about whether a TOAST table is needed. (If we ever do change needs_toast_table() so much that this conclusion doesn't apply, we can devise a solution at that time, and hopefully do it in a less klugy way than 4c6780fd17aa43ed did.) Back-patch to 9.3, like the previous patch. Discussion: <8110.1462291671@sss.pgh.pa.us>
2016-05-07 04:05:51 +02:00
/* Normal mode, normal check */
if (!needs_toast_table(rel))
return false;
}
else
{
/*
Fix pg_upgrade to not fail when new-cluster TOAST rules differ from old. This patch essentially reverts commit 4c6780fd17aa43ed, in favor of a much simpler solution for the case where the new cluster would choose to create a TOAST table but the old cluster doesn't have one: just don't create a TOAST table. The existing code failed in at least two different ways if the situation arose: (1) ALTER TABLE RESET didn't grab an exclusive lock, so that the lock sanity check in create_toast_table failed; (2) pg_upgrade did not provide a pg_type OID for the new toast table, so that the crosscheck in TypeCreate failed. While both these problems were introduced by later patches, they show that the hack being used to cause TOAST table creation is overwhelmingly fragile (and untested). I also note that before the TypeCreate crosscheck was added, the code would have resulted in assigning an indeterminate pg_type OID to the toast table, possibly causing a later OID conflict in that catalog; so that it didn't really work even when committed. If we simply don't create a TOAST table, there will only be a problem if the code tries to store a tuple that's wider than a page, and field compression isn't sufficient to get it under a page. Given that the TOAST creation threshold is intended to be about a quarter of a page, it's very hard to believe that cross-version differences in the do-we-need-a-toast- table heuristic could result in an observable problem. So let's just follow the old version's conclusion about whether a TOAST table is needed. (If we ever do change needs_toast_table() so much that this conclusion doesn't apply, we can devise a solution at that time, and hopefully do it in a less klugy way than 4c6780fd17aa43ed did.) Back-patch to 9.3, like the previous patch. Discussion: <8110.1462291671@sss.pgh.pa.us>
2016-05-07 04:05:51 +02:00
* In binary-upgrade mode, create a TOAST table if and only if
* pg_upgrade told us to (ie, a TOAST table OID has been provided).
*
Fix pg_upgrade to not fail when new-cluster TOAST rules differ from old. This patch essentially reverts commit 4c6780fd17aa43ed, in favor of a much simpler solution for the case where the new cluster would choose to create a TOAST table but the old cluster doesn't have one: just don't create a TOAST table. The existing code failed in at least two different ways if the situation arose: (1) ALTER TABLE RESET didn't grab an exclusive lock, so that the lock sanity check in create_toast_table failed; (2) pg_upgrade did not provide a pg_type OID for the new toast table, so that the crosscheck in TypeCreate failed. While both these problems were introduced by later patches, they show that the hack being used to cause TOAST table creation is overwhelmingly fragile (and untested). I also note that before the TypeCreate crosscheck was added, the code would have resulted in assigning an indeterminate pg_type OID to the toast table, possibly causing a later OID conflict in that catalog; so that it didn't really work even when committed. If we simply don't create a TOAST table, there will only be a problem if the code tries to store a tuple that's wider than a page, and field compression isn't sufficient to get it under a page. Given that the TOAST creation threshold is intended to be about a quarter of a page, it's very hard to believe that cross-version differences in the do-we-need-a-toast- table heuristic could result in an observable problem. So let's just follow the old version's conclusion about whether a TOAST table is needed. (If we ever do change needs_toast_table() so much that this conclusion doesn't apply, we can devise a solution at that time, and hopefully do it in a less klugy way than 4c6780fd17aa43ed did.) Back-patch to 9.3, like the previous patch. Discussion: <8110.1462291671@sss.pgh.pa.us>
2016-05-07 04:05:51 +02:00
* This indicates that the old cluster had a TOAST table for the
* current table. We must create a TOAST table to receive the old
* TOAST file, even if the table seems not to need one.
*
* Contrariwise, if the old cluster did not have a TOAST table, we
* should be able to get along without one even if the new version's
* needs_toast_table rules suggest we should have one. There is a lot
* of daylight between where we will create a TOAST table and where
* one is really necessary to avoid failures, so small cross-version
* differences in the when-to-create heuristic shouldn't be a problem.
* If we tried to create a TOAST table anyway, we would have the
* problem that it might take up an OID that will conflict with some
* old-cluster table we haven't seen yet.
*/
Fix pg_upgrade to not fail when new-cluster TOAST rules differ from old. This patch essentially reverts commit 4c6780fd17aa43ed, in favor of a much simpler solution for the case where the new cluster would choose to create a TOAST table but the old cluster doesn't have one: just don't create a TOAST table. The existing code failed in at least two different ways if the situation arose: (1) ALTER TABLE RESET didn't grab an exclusive lock, so that the lock sanity check in create_toast_table failed; (2) pg_upgrade did not provide a pg_type OID for the new toast table, so that the crosscheck in TypeCreate failed. While both these problems were introduced by later patches, they show that the hack being used to cause TOAST table creation is overwhelmingly fragile (and untested). I also note that before the TypeCreate crosscheck was added, the code would have resulted in assigning an indeterminate pg_type OID to the toast table, possibly causing a later OID conflict in that catalog; so that it didn't really work even when committed. If we simply don't create a TOAST table, there will only be a problem if the code tries to store a tuple that's wider than a page, and field compression isn't sufficient to get it under a page. Given that the TOAST creation threshold is intended to be about a quarter of a page, it's very hard to believe that cross-version differences in the do-we-need-a-toast- table heuristic could result in an observable problem. So let's just follow the old version's conclusion about whether a TOAST table is needed. (If we ever do change needs_toast_table() so much that this conclusion doesn't apply, we can devise a solution at that time, and hopefully do it in a less klugy way than 4c6780fd17aa43ed did.) Back-patch to 9.3, like the previous patch. Discussion: <8110.1462291671@sss.pgh.pa.us>
2016-05-07 04:05:51 +02:00
if (!OidIsValid(binary_upgrade_next_toast_pg_class_oid) ||
!OidIsValid(binary_upgrade_next_toast_pg_type_oid))
return false;
}
/*
* If requested check lockmode is sufficient. This is a cross check in
* case of errors or conflicting decisions in earlier code.
*/
if (check && lockmode != AccessExclusiveLock)
elog(ERROR, "AccessExclusiveLock required to add toast table.");
/*
* Create the toast table and its index
*/
snprintf(toast_relname, sizeof(toast_relname),
"pg_toast_%u", relOid);
snprintf(toast_idxname, sizeof(toast_idxname),
"pg_toast_%u_index", relOid);
/* this is pretty painful... need a tuple descriptor */
Remove WITH OIDS support, change oid catalog column visibility. Previously tables declared WITH OIDS, including a significant fraction of the catalog tables, stored the oid column not as a normal column, but as part of the tuple header. This special column was not shown by default, which was somewhat odd, as it's often (consider e.g. pg_class.oid) one of the more important parts of a row. Neither pg_dump nor COPY included the contents of the oid column by default. The fact that the oid column was not an ordinary column necessitated a significant amount of special case code to support oid columns. That already was painful for the existing, but upcoming work aiming to make table storage pluggable, would have required expanding and duplicating that "specialness" significantly. WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0). Remove it. Removing includes: - CREATE TABLE and ALTER TABLE syntax for declaring the table to be WITH OIDS has been removed (WITH (oids[ = true]) will error out) - pg_dump does not support dumping tables declared WITH OIDS and will issue a warning when dumping one (and ignore the oid column). - restoring an pg_dump archive with pg_restore will warn when restoring a table with oid contents (and ignore the oid column) - COPY will refuse to load binary dump that includes oids. - pg_upgrade will error out when encountering tables declared WITH OIDS, they have to be altered to remove the oid column first. - Functionality to access the oid of the last inserted row (like plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed. The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false) for CREATE TABLE) is still supported. While that requires a bit of support code, it seems unnecessary to break applications / dumps that do not use oids, and are explicit about not using them. The biggest user of WITH OID columns was postgres' catalog. This commit changes all 'magic' oid columns to be columns that are normally declared and stored. To reduce unnecessary query breakage all the newly added columns are still named 'oid', even if a table's column naming scheme would indicate 'reloid' or such. This obviously requires adapting a lot code, mostly replacing oid access via HeapTupleGetOid() with access to the underlying Form_pg_*->oid column. The bootstrap process now assigns oids for all oid columns in genbki.pl that do not have an explicit value (starting at the largest oid previously used), only oids assigned later by oids will be above FirstBootstrapObjectId. As the oid column now is a normal column the special bootstrap syntax for oids has been removed. Oids are not automatically assigned during insertion anymore, all backend code explicitly assigns oids with GetNewOidWithIndex(). For the rare case that insertions into the catalog via SQL are called for the new pg_nextoid() function can be used (which only works on catalog tables). The fact that oid columns on system tables are now normal columns means that they will be included in the set of columns expanded by * (i.e. SELECT * FROM pg_class will now include the table's oid, previously it did not). It'd not technically be hard to hide oid column by default, but that'd mean confusing behavior would either have to be carried forward forever, or it'd cause breakage down the line. While it's not unlikely that further adjustments are needed, the scope/invasiveness of the patch makes it worthwhile to get merge this now. It's painful to maintain externally, too complicated to commit after the code code freeze, and a dependency of a number of other patches. Catversion bump, for obvious reasons. Author: Andres Freund, with contributions by John Naylor Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
tupdesc = CreateTemplateTupleDesc(3);
TupleDescInitEntry(tupdesc, (AttrNumber) 1,
"chunk_id",
OIDOID,
-1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 2,
"chunk_seq",
INT4OID,
-1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 3,
"chunk_data",
BYTEAOID,
-1, 0);
/*
* Ensure that the toast table doesn't itself get toasted, or we'll be
* toast :-(. This is essential for chunk_data because type bytea is
* toastable; hit the other two just to be sure.
*/
TupleDescAttr(tupdesc, 0)->attstorage = 'p';
TupleDescAttr(tupdesc, 1)->attstorage = 'p';
TupleDescAttr(tupdesc, 2)->attstorage = 'p';
/*
* Toast tables for regular relations go in pg_toast; those for temp
* relations go into the per-backend temp-toast-table namespace.
*/
if (isTempOrTempToastNamespace(rel->rd_rel->relnamespace))
namespaceid = GetTempToastNamespace();
else
namespaceid = PG_TOAST_NAMESPACE;
/*
2015-05-24 03:35:49 +02:00
* Use binary-upgrade override for pg_type.oid, if supplied. We might be
* in the post-schema-restore phase where we are doing ALTER TABLE to
* create TOAST tables that didn't exist in the old cluster.
*/
if (IsBinaryUpgrade && OidIsValid(binary_upgrade_next_toast_pg_type_oid))
{
toast_typid = binary_upgrade_next_toast_pg_type_oid;
binary_upgrade_next_toast_pg_type_oid = InvalidOid;
}
toast_relid = heap_create_with_catalog(toast_relname,
namespaceid,
rel->rd_rel->reltablespace,
toastOid,
toast_typid,
InvalidOid,
rel->rd_rel->relowner,
tableam: introduce table AM infrastructure. This introduces the concept of table access methods, i.e. CREATE ACCESS METHOD ... TYPE TABLE and CREATE TABLE ... USING (storage-engine). No table access functionality is delegated to table AMs as of this commit, that'll be done in following commits. Subsequent commits will incrementally abstract table access functionality to be routed through table access methods. That change is too large to be reviewed & committed at once, so it'll be done incrementally. Docs will be updated at the end, as adding them incrementally would likely make them less coherent, and definitely is a lot more work, without a lot of benefit. Table access methods are specified similar to index access methods, i.e. pg_am.amhandler returns, as INTERNAL, a pointer to a struct with callbacks. In contrast to index AMs that struct needs to live as long as a backend, typically that's achieved by just returning a pointer to a constant struct. Psql's \d+ now displays a table's access method. That can be disabled with HIDE_TABLEAM=true, which is mainly useful so regression tests can be run against different AMs. It's quite possible that this behaviour still needs to be fine tuned. For now it's not allowed to set a table AM for a partitioned table, as we've not resolved how partitions would inherit that. Disallowing allows us to introduce, if we decide that's the way forward, such a behaviour without a compatibility break. Catversion bumped, to add the heap table AM and references to it. Author: Haribabu Kommi, Andres Freund, Alvaro Herrera, Dimitri Golgov and others Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de https://postgr.es/m/20160812231527.GA690404@alvherre.pgsql https://postgr.es/m/20190107235616.6lur25ph22u5u5av@alap3.anarazel.de https://postgr.es/m/20190304234700.w5tmhducs5wxgzls@alap3.anarazel.de
2019-03-06 18:54:38 +01:00
rel->rd_rel->relam,
tupdesc,
NIL,
RELKIND_TOASTVALUE,
rel->rd_rel->relpersistence,
shared_relation,
mapped_relation,
ONCOMMIT_NOOP,
reloptions,
false,
true,
true,
InvalidOid,
NULL);
Assert(toast_relid != InvalidOid);
/* make the toast relation visible, else table_open will fail */
CommandCounterIncrement();
/* ShareLock is not really needed here, but take it anyway */
toast_rel = table_open(toast_relid, ShareLock);
/*
* Create unique index on chunk_id, chunk_seq.
*
* NOTE: the normal TOAST access routines could actually function with a
* single-column index on chunk_id only. However, the slice access
* routines use both columns for faster access to an individual chunk. In
* addition, we want it to be unique as a check against the possibility of
* duplicate TOAST chunk OIDs. The index might also be a little more
* efficient this way, since btree isn't all that happy with large numbers
* of equal keys.
*/
indexInfo = makeNode(IndexInfo);
indexInfo->ii_NumIndexAttrs = 2;
indexInfo->ii_NumIndexKeyAttrs = 2;
indexInfo->ii_IndexAttrNumbers[0] = 1;
indexInfo->ii_IndexAttrNumbers[1] = 2;
indexInfo->ii_Expressions = NIL;
indexInfo->ii_ExpressionsState = NIL;
indexInfo->ii_Predicate = NIL;
Faster expression evaluation and targetlist projection. This replaces the old, recursive tree-walk based evaluation, with non-recursive, opcode dispatch based, expression evaluation. Projection is now implemented as part of expression evaluation. This both leads to significant performance improvements, and makes future just-in-time compilation of expressions easier. The speed gains primarily come from: - non-recursive implementation reduces stack usage / overhead - simple sub-expressions are implemented with a single jump, without function calls - sharing some state between different sub-expressions - reduced amount of indirect/hard to predict memory accesses by laying out operation metadata sequentially; including the avoidance of nearly all of the previously used linked lists - more code has been moved to expression initialization, avoiding constant re-checks at evaluation time Future just-in-time compilation (JIT) has become easier, as demonstrated by released patches intended to be merged in a later release, for primarily two reasons: Firstly, due to a stricter split between expression initialization and evaluation, less code has to be handled by the JIT. Secondly, due to the non-recursive nature of the generated "instructions", less performance-critical code-paths can easily be shared between interpreted and compiled evaluation. The new framework allows for significant future optimizations. E.g.: - basic infrastructure for to later reduce the per executor-startup overhead of expression evaluation, by caching state in prepared statements. That'd be helpful in OLTPish scenarios where initialization overhead is measurable. - optimizing the generated "code". A number of proposals for potential work has already been made. - optimizing the interpreter. Similarly a number of proposals have been made here too. The move of logic into the expression initialization step leads to some backward-incompatible changes: - Function permission checks are now done during expression initialization, whereas previously they were done during execution. In edge cases this can lead to errors being raised that previously wouldn't have been, e.g. a NULL array being coerced to a different array type previously didn't perform checks. - The set of domain constraints to be checked, is now evaluated once during expression initialization, previously it was re-built every time a domain check was evaluated. For normal queries this doesn't change much, but e.g. for plpgsql functions, which caches ExprStates, the old set could stick around longer. The behavior around might still change. Author: Andres Freund, with significant changes by Tom Lane, changes by Heikki Linnakangas Reviewed-By: Tom Lane, Heikki Linnakangas Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
indexInfo->ii_PredicateState = NULL;
indexInfo->ii_ExclusionOps = NULL;
indexInfo->ii_ExclusionProcs = NULL;
indexInfo->ii_ExclusionStrats = NULL;
indexInfo->ii_Unique = true;
indexInfo->ii_ReadyForInserts = true;
indexInfo->ii_Concurrent = false;
indexInfo->ii_BrokenHotChain = false;
indexInfo->ii_ParallelWorkers = 0;
indexInfo->ii_Am = BTREE_AM_OID;
indexInfo->ii_AmCache = NULL;
indexInfo->ii_Context = CurrentMemoryContext;
collationObjectId[0] = InvalidOid;
collationObjectId[1] = InvalidOid;
classObjectId[0] = OID_BTREE_OPS_OID;
classObjectId[1] = INT4_BTREE_OPS_OID;
coloptions[0] = 0;
coloptions[1] = 0;
index_create(toast_rel, toast_idxname, toastIndexOid, InvalidOid,
InvalidOid, InvalidOid,
2011-06-09 20:32:50 +02:00
indexInfo,
list_make2("chunk_id", "chunk_seq"),
BTREE_AM_OID,
rel->rd_rel->reltablespace,
collationObjectId, classObjectId, coloptions, (Datum) 0,
INDEX_CREATE_IS_PRIMARY, 0, true, true, NULL);
table_close(toast_rel, NoLock);
/*
* Store the toast table's OID in the parent relation's pg_class row
*/
class_rel = table_open(RelationRelationId, RowExclusiveLock);
reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid));
if (!HeapTupleIsValid(reltup))
elog(ERROR, "cache lookup failed for relation %u", relOid);
((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid;
if (!IsBootstrapProcessingMode())
{
/* normal case, use a transactional update */
CatalogTupleUpdate(class_rel, &reltup->t_self, reltup);
}
else
{
/* While bootstrapping, we cannot UPDATE, so overwrite in-place */
heap_inplace_update(class_rel, reltup);
}
heap_freetuple(reltup);
table_close(class_rel, RowExclusiveLock);
/*
* Register dependency from the toast table to the master, so that the
* toast table will be deleted if the master is. Skip this in bootstrap
* mode.
*/
if (!IsBootstrapProcessingMode())
{
baseobject.classId = RelationRelationId;
baseobject.objectId = relOid;
baseobject.objectSubId = 0;
toastobject.classId = RelationRelationId;
toastobject.objectId = toast_relid;
toastobject.objectSubId = 0;
recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
}
/*
* Make changes visible
*/
CommandCounterIncrement();
return true;
}
/*
* Check to see whether the table needs a TOAST table. It does only if
* (1) there are any toastable attributes, and (2) the maximum length
* of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
* create a toast table for something like "f1 varchar(20)".)
* No need to create a TOAST table for partitioned tables.
*/
static bool
needs_toast_table(Relation rel)
{
int32 data_length = 0;
bool maxlength_unknown = false;
bool has_toastable_attrs = false;
TupleDesc tupdesc;
int32 tuple_length;
int i;
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
return false;
tupdesc = rel->rd_att;
for (i = 0; i < tupdesc->natts; i++)
{
Form_pg_attribute att = TupleDescAttr(tupdesc, i);
if (att->attisdropped)
continue;
data_length = att_align_nominal(data_length, att->attalign);
if (att->attlen > 0)
{
/* Fixed-length types are never toastable */
data_length += att->attlen;
}
else
{
int32 maxlen = type_maximum_size(att->atttypid,
att->atttypmod);
if (maxlen < 0)
maxlength_unknown = true;
else
data_length += maxlen;
if (att->attstorage != 'p')
has_toastable_attrs = true;
}
}
if (!has_toastable_attrs)
return false; /* nothing to toast? */
if (maxlength_unknown)
return true; /* any unlimited-length attrs? */
tuple_length = MAXALIGN(SizeofHeapTupleHeader +
BITMAPLEN(tupdesc->natts)) +
MAXALIGN(data_length);
return (tuple_length > TOAST_TUPLE_THRESHOLD);
}