postgresql/src/include/access/tuptoaster.h

231 lines
7.3 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* tuptoaster.h
* POSTGRES definitions for external and compressed storage
* of variable size attributes.
*
* Copyright (c) 2000-2014, PostgreSQL Global Development Group
*
2010-09-20 22:08:53 +02:00
* src/include/access/tuptoaster.h
*
*-------------------------------------------------------------------------
*/
#ifndef TUPTOASTER_H
#define TUPTOASTER_H
#include "access/htup_details.h"
#include "utils/relcache.h"
#include "storage/lock.h"
/*
* This enables de-toasting of index entries. Needed until VACUUM is
* smart enough to rebuild indexes from scratch.
*/
#define TOAST_INDEX_HACK
/*
* Find the maximum size of a tuple if there are to be N tuples per page.
*/
2010-02-26 03:01:40 +01:00
#define MaximumBytesPerTuple(tuplesPerPage) \
MAXALIGN_DOWN((BLCKSZ - \
MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData))) \
/ (tuplesPerPage))
/*
* These symbols control toaster activation. If a tuple is larger than
* TOAST_TUPLE_THRESHOLD, we will try to toast it down to no more than
* TOAST_TUPLE_TARGET bytes through compressing compressible fields and
* moving EXTENDED and EXTERNAL data out-of-line.
*
* The numbers need not be the same, though they currently are. It doesn't
* make sense for TARGET to exceed THRESHOLD, but it could be useful to make
* it be smaller.
*
* Currently we choose both values to match the largest tuple size for which
* TOAST_TUPLES_PER_PAGE tuples can fit on a heap page.
*
* XXX while these can be modified without initdb, some thought needs to be
* given to needs_toast_table() in toasting.c before unleashing random
* changes. Also see LOBLKSIZE in large_object.h, which can *not* be
* changed without initdb.
*/
#define TOAST_TUPLES_PER_PAGE 4
#define TOAST_TUPLE_THRESHOLD MaximumBytesPerTuple(TOAST_TUPLES_PER_PAGE)
#define TOAST_TUPLE_TARGET TOAST_TUPLE_THRESHOLD
/*
* The code will also consider moving MAIN data out-of-line, but only as a
* last resort if the previous steps haven't reached the target tuple size.
* In this phase we use a different target size, currently equal to the
2010-02-26 03:01:40 +01:00
* largest tuple that will fit on a heap page. This is reasonable since
* the user has told us to keep the data in-line if at all possible.
*/
#define TOAST_TUPLES_PER_PAGE_MAIN 1
2010-02-26 03:01:40 +01:00
#define TOAST_TUPLE_TARGET_MAIN MaximumBytesPerTuple(TOAST_TUPLES_PER_PAGE_MAIN)
/*
* If an index value is larger than TOAST_INDEX_TARGET, we will try to
* compress it (we can't move it out-of-line, however). Note that this
* number is per-datum, not per-tuple, for simplicity in index_form_tuple().
*/
#define TOAST_INDEX_TARGET (MaxHeapTupleSize / 16)
/*
* When we store an oversize datum externally, we divide it into chunks
2001-03-22 05:01:46 +01:00
* containing at most TOAST_MAX_CHUNK_SIZE data bytes. This number *must*
* be small enough that the completed toast-table tuple (including the
* ID and sequence fields and all overhead) will fit on a page.
* The coding here sets the size on the theory that we want to fit
* EXTERN_TUPLES_PER_PAGE tuples of maximum size onto a page.
*
* NB: Changing TOAST_MAX_CHUNK_SIZE requires an initdb.
*/
2007-11-15 22:14:46 +01:00
#define EXTERN_TUPLES_PER_PAGE 4 /* tweak only this */
#define EXTERN_TUPLE_MAX_SIZE MaximumBytesPerTuple(EXTERN_TUPLES_PER_PAGE)
#define TOAST_MAX_CHUNK_SIZE \
(EXTERN_TUPLE_MAX_SIZE - \
MAXALIGN(offsetof(HeapTupleHeaderData, t_bits)) - \
sizeof(Oid) - \
sizeof(int32) - \
VARHDRSZ)
/* Size of an EXTERNAL datum that contains a standard TOAST pointer */
#define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_external))
/* Size of an indirect datum that contains a standard TOAST pointer */
#define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_indirect))
/*
* Testing whether an externally-stored value is compressed now requires
* comparing extsize (the actual length of the external data) to rawsize
* (the original uncompressed datum's size). The latter includes VARHDRSZ
* overhead, the former doesn't. We never use compression unless it actually
* saves space, so we expect either equality or less-than.
*/
#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \
((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ)
/*
* Macro to fetch the possibly-unaligned contents of an EXTERNAL datum
* into a local "struct varatt_external" toast pointer. This should be
* just a memcpy, but some versions of gcc seem to produce broken code
* that assumes the datum contents are aligned. Introducing an explicit
* intermediate "varattrib_1b_e *" variable seems to fix it.
*/
#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr) \
do { \
varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \
Assert(VARATT_IS_EXTERNAL(attre)); \
Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \
memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \
} while (0)
/* ----------
* toast_insert_or_update -
*
* Called by heap_insert() and heap_update().
* ----------
*/
extern HeapTuple toast_insert_or_update(Relation rel,
2007-11-15 22:14:46 +01:00
HeapTuple newtup, HeapTuple oldtup,
int options);
/* ----------
* toast_delete -
*
* Called by heap_delete().
* ----------
*/
extern void toast_delete(Relation rel, HeapTuple oldtup);
/* ----------
* heap_tuple_fetch_attr() -
*
* Fetches an external stored attribute from the toast
* relation. Does NOT decompress it, if stored external
* in compressed format.
* ----------
*/
2007-11-15 22:14:46 +01:00
extern struct varlena *heap_tuple_fetch_attr(struct varlena * attr);
/* ----------
* heap_tuple_untoast_attr() -
*
* Fully detoasts one attribute, fetching and/or decompressing
* it as needed.
* ----------
*/
2007-11-15 22:14:46 +01:00
extern struct varlena *heap_tuple_untoast_attr(struct varlena * attr);
/* ----------
* heap_tuple_untoast_attr_slice() -
*
2002-09-04 22:31:48 +02:00
* Fetches only the specified portion of an attribute.
* (Handles all cases for attribute storage)
* ----------
*/
2007-11-15 22:14:46 +01:00
extern struct varlena *heap_tuple_untoast_attr_slice(struct varlena * attr,
2002-09-04 22:31:48 +02:00
int32 sliceoffset,
int32 slicelength);
Fix race condition with toast table access from a stale syscache entry. If a tuple in a syscache contains an out-of-line toasted field, and we try to fetch that field shortly after some other transaction has committed an update or deletion of the tuple, there is a race condition: vacuum could come along and remove the toast tuples before we can fetch them. This leads to transient failures like "missing chunk number 0 for toast value NNNNN in pg_toast_2619", as seen in recent reports from Andrew Hammond and Tim Uckun. The design idea of syscache is that access to stale syscache entries should be prevented by relation-level locks, but that fails for at least two cases where toasted fields are possible: ANALYZE updates pg_statistic rows without locking out sessions that might want to plan queries on the same table, and CREATE OR REPLACE FUNCTION updates pg_proc rows without any meaningful lock at all. The least risky fix seems to be an idea that Heikki suggested when we were dealing with a related problem back in August: forcibly detoast any out-of-line fields before putting a tuple into syscache in the first place. This avoids the problem because at the time we fetch the parent tuple from the catalog, we should be holding an MVCC snapshot that will prevent removal of the toast tuples, even if the parent tuple is outdated immediately after we fetch it. (Note: I'm not convinced that this statement holds true at every instant where we could be fetching a syscache entry at all, but it does appear to hold true at the times where we could fetch an entry that could have a toasted field. We will need to be a bit wary of adding toast tables to low-level catalogs that don't have them already.) An additional benefit is that subsequent uses of the syscache entry should be faster, since they won't have to detoast the field. Back-patch to all supported versions. The problem is significantly harder to reproduce in pre-9.0 releases, because of their willingness to flush every entry in a syscache whenever the underlying catalog is vacuumed (cf CatalogCacheFlushRelation); but there is still a window for trouble.
2011-11-02 00:48:37 +01:00
/* ----------
* toast_flatten_tuple -
*
* "Flatten" a tuple to contain no out-of-line toasted fields.
* (This does not eliminate compressed or short-header datums.)
* ----------
*/
extern HeapTuple toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc);
/* ----------
* toast_flatten_tuple_attribute -
*
* If a Datum is of composite type, "flatten" it to contain no toasted fields.
* This must be invoked on any potentially-composite field that is to be
2004-08-29 07:07:03 +02:00
* inserted into a tuple. Doing this preserves the invariant that toasting
* goes only one level deep in a tuple.
* ----------
*/
extern Datum toast_flatten_tuple_attribute(Datum value,
2004-08-29 07:07:03 +02:00
Oid typeId, int32 typeMod);
/* ----------
* toast_compress_datum -
*
* Create a compressed version of a varlena datum, if possible
* ----------
*/
extern Datum toast_compress_datum(Datum value);
/* ----------
* toast_raw_datum_size -
*
* Return the raw (detoasted) size of a varlena datum
* ----------
*/
extern Size toast_raw_datum_size(Datum value);
/* ----------
* toast_datum_size -
*
* Return the storage size of a varlena datum
* ----------
*/
extern Size toast_datum_size(Datum value);
/* ----------
* toast_get_valid_index -
*
* Return OID of valid index associated to a toast relation
* ----------
*/
extern Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock);
#endif /* TUPTOASTER_H */