postgresql/src/backend/access/heap/tuptoaster.c

1467 lines
35 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* tuptoaster.c
* Support routines for external and compressed storage of
* variable size attributes.
*
* Copyright (c) 2000-2006, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.59 2006/03/05 15:58:21 momjian Exp $
*
*
* INTERFACE ROUTINES
* toast_insert_or_update -
* Try to make a given tuple fit into one page by compressing
* or moving off attributes
*
* toast_delete -
* Reclaim toast storage when a tuple is deleted
*
* heap_tuple_untoast_attr -
* Fetch back a given value from the "secondary" relation
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <unistd.h>
#include <fcntl.h>
#include "access/heapam.h"
#include "access/genam.h"
#include "access/tuptoaster.h"
#include "catalog/catalog.h"
#include "utils/rel.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/pg_lzcompress.h"
#include "utils/typcache.h"
#undef TOAST_DEBUG
2001-03-22 05:01:46 +01:00
static void toast_delete_datum(Relation rel, Datum value);
static Datum toast_save_datum(Relation rel, Datum value);
2001-03-22 05:01:46 +01:00
static varattrib *toast_fetch_datum(varattrib *attr);
static varattrib *toast_fetch_datum_slice(varattrib *attr,
2002-09-04 22:31:48 +02:00
int32 sliceoffset, int32 length);
/* ----------
* heap_tuple_fetch_attr -
*
2001-03-22 05:01:46 +01:00
* Public entry point to get back a toasted value
* external storage (possibly still in compressed format).
* ----------
*/
varattrib *
heap_tuple_fetch_attr(varattrib *attr)
{
2001-03-22 05:01:46 +01:00
varattrib *result;
if (VARATT_IS_EXTERNAL(attr))
{
/*
* This is an external stored plain value
*/
result = toast_fetch_datum(attr);
}
else
{
/*
2005-10-15 04:49:52 +02:00
* This is a plain value inside of the main tuple - why am I called?
*/
result = attr;
2001-03-22 05:01:46 +01:00
}
return result;
}
/* ----------
* heap_tuple_untoast_attr -
*
* Public entry point to get back a toasted value from compression
* or external storage.
* ----------
*/
varattrib *
heap_tuple_untoast_attr(varattrib *attr)
{
2001-03-22 05:01:46 +01:00
varattrib *result;
if (VARATT_IS_EXTERNAL(attr))
{
if (VARATT_IS_COMPRESSED(attr))
{
/* ----------
* This is an external stored compressed value
* Fetch it from the toast heap and decompress.
* ----------
*/
2001-03-22 05:01:46 +01:00
varattrib *tmp;
tmp = toast_fetch_datum(attr);
2001-03-22 05:01:46 +01:00
result = (varattrib *) palloc(attr->va_content.va_external.va_rawsize
+ VARHDRSZ);
VARATT_SIZEP(result) = attr->va_content.va_external.va_rawsize
2001-03-22 05:01:46 +01:00
+ VARHDRSZ;
pglz_decompress((PGLZ_Header *) tmp, VARATT_DATA(result));
pfree(tmp);
}
else
{
/*
* This is an external stored plain value
*/
result = toast_fetch_datum(attr);
}
}
else if (VARATT_IS_COMPRESSED(attr))
{
/*
* This is a compressed value inside of the main tuple
*/
2001-03-22 05:01:46 +01:00
result = (varattrib *) palloc(attr->va_content.va_compressed.va_rawsize
+ VARHDRSZ);
VARATT_SIZEP(result) = attr->va_content.va_compressed.va_rawsize
2001-03-22 05:01:46 +01:00
+ VARHDRSZ;
pglz_decompress((PGLZ_Header *) attr, VARATT_DATA(result));
}
else
/*
2005-10-15 04:49:52 +02:00
* This is a plain value inside of the main tuple - why am I called?
*/
return attr;
return result;
}
/* ----------
* heap_tuple_untoast_attr_slice -
*
2002-09-04 22:31:48 +02:00
* Public entry point to get back part of a toasted value
* from compression or external storage.
* ----------
*/
2002-09-04 22:31:48 +02:00
varattrib *
heap_tuple_untoast_attr_slice(varattrib *attr, int32 sliceoffset, int32 slicelength)
{
varattrib *preslice;
varattrib *result;
2002-09-04 22:31:48 +02:00
int32 attrsize;
if (VARATT_IS_COMPRESSED(attr))
{
2002-09-04 22:31:48 +02:00
varattrib *tmp;
if (VARATT_IS_EXTERNAL(attr))
tmp = toast_fetch_datum(attr);
else
{
2002-09-04 22:31:48 +02:00
tmp = attr; /* compressed in main tuple */
}
2002-09-04 22:31:48 +02:00
preslice = (varattrib *) palloc(attr->va_content.va_external.va_rawsize
+ VARHDRSZ);
VARATT_SIZEP(preslice) = attr->va_content.va_external.va_rawsize + VARHDRSZ;
pglz_decompress((PGLZ_Header *) tmp, VARATT_DATA(preslice));
2002-09-04 22:31:48 +02:00
if (tmp != attr)
pfree(tmp);
}
2002-09-04 22:31:48 +02:00
else
{
/* Plain value */
if (VARATT_IS_EXTERNAL(attr))
2002-09-04 22:31:48 +02:00
{
/* fast path */
return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
}
else
preslice = attr;
}
2002-09-04 22:31:48 +02:00
/* slicing of datum for compressed cases and plain value */
2002-09-04 22:31:48 +02:00
attrsize = VARSIZE(preslice) - VARHDRSZ;
2002-09-04 22:31:48 +02:00
if (sliceoffset >= attrsize)
{
sliceoffset = 0;
slicelength = 0;
}
2002-09-04 22:31:48 +02:00
if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
slicelength = attrsize - sliceoffset;
2002-09-04 22:31:48 +02:00
result = (varattrib *) palloc(slicelength + VARHDRSZ);
VARATT_SIZEP(result) = slicelength + VARHDRSZ;
2002-09-04 22:31:48 +02:00
memcpy(VARDATA(result), VARDATA(preslice) + sliceoffset, slicelength);
2002-09-04 22:31:48 +02:00
if (preslice != attr)
pfree(preslice);
return result;
}
/* ----------
* toast_raw_datum_size -
*
* Return the raw (detoasted) size of a varlena datum
* ----------
*/
Size
toast_raw_datum_size(Datum value)
{
varattrib *attr = (varattrib *) DatumGetPointer(value);
Size result;
if (VARATT_IS_COMPRESSED(attr))
{
/*
* va_rawsize shows the original data size, whether the datum is
* external or not.
*/
result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ;
}
else if (VARATT_IS_EXTERNAL(attr))
{
/*
2005-10-15 04:49:52 +02:00
* an uncompressed external attribute has rawsize including the header
* (not too consistent!)
*/
result = attr->va_content.va_external.va_rawsize;
}
else
{
/* plain untoasted datum */
result = VARSIZE(attr);
}
return result;
}
/* ----------
* toast_datum_size
*
* Return the physical storage size (possibly compressed) of a varlena datum
* ----------
*/
2005-10-15 04:49:52 +02:00
Size
toast_datum_size(Datum value)
{
2005-10-15 04:49:52 +02:00
varattrib *attr = (varattrib *) DatumGetPointer(value);
Size result;
if (VARATT_IS_EXTERNAL(attr))
{
/*
* Attribute is stored externally - return the extsize whether
2005-10-15 04:49:52 +02:00
* compressed or not. We do not count the size of the toast pointer
* ... should we?
*/
result = attr->va_content.va_external.va_extsize;
}
else
{
/*
2005-10-15 04:49:52 +02:00
* Attribute is stored inline either compressed or not, just calculate
* the size of the datum in either case.
*/
result = VARSIZE(attr);
}
return result;
}
/* ----------
* toast_delete -
*
* Cascaded delete toast-entries on DELETE
* ----------
*/
void
toast_delete(Relation rel, HeapTuple oldtup)
{
2001-03-22 05:01:46 +01:00
TupleDesc tupleDesc;
Form_pg_attribute *att;
int numAttrs;
int i;
Datum toast_values[MaxHeapAttributeNumber];
bool toast_isnull[MaxHeapAttributeNumber];
/*
* Get the tuple descriptor and break down the tuple into fields.
*
2004-08-29 07:07:03 +02:00
* NOTE: it's debatable whether to use heap_deformtuple() here or just
2005-10-15 04:49:52 +02:00
* heap_getattr() only the varlena columns. The latter could win if there
* are few varlena columns and many non-varlena ones. However,
* heap_deformtuple costs only O(N) while the heap_getattr way would cost
* O(N^2) if there are many varlena columns, so it seems better to err on
* the side of linear cost. (We won't even be here unless there's at
* least one varlena column, by the way.)
*/
2001-03-22 05:01:46 +01:00
tupleDesc = rel->rd_att;
att = tupleDesc->attrs;
numAttrs = tupleDesc->natts;
Assert(numAttrs <= MaxHeapAttributeNumber);
heap_deform_tuple(oldtup, tupleDesc, toast_values, toast_isnull);
/*
2005-10-15 04:49:52 +02:00
* Check for external stored attributes and delete them from the secondary
* relation.
*/
for (i = 0; i < numAttrs; i++)
{
if (att[i]->attlen == -1)
{
2004-08-29 07:07:03 +02:00
Datum value = toast_values[i];
if (!toast_isnull[i] && VARATT_IS_EXTERNAL(value))
toast_delete_datum(rel, value);
}
}
}
/* ----------
* toast_insert_or_update -
*
* Delete no-longer-used toast-entries and create new ones to
* make the new tuple fit on INSERT or UPDATE
*
* Inputs:
* newtup: the candidate new tuple to be inserted
* oldtup: the old row version for UPDATE, or NULL for INSERT
* Result:
* either newtup if no toasting is needed, or a palloc'd modified tuple
* that is what should actually get stored
*
* NOTE: neither newtup nor oldtup will be modified. This is a change
* from the pre-8.1 API of this routine.
* ----------
*/
HeapTuple
toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
{
HeapTuple result_tuple;
2001-03-22 05:01:46 +01:00
TupleDesc tupleDesc;
Form_pg_attribute *att;
int numAttrs;
int i;
bool need_change = false;
bool need_free = false;
bool need_delold = false;
bool has_nulls = false;
Size maxDataLen;
char toast_action[MaxHeapAttributeNumber];
bool toast_isnull[MaxHeapAttributeNumber];
bool toast_oldisnull[MaxHeapAttributeNumber];
2001-03-22 05:01:46 +01:00
Datum toast_values[MaxHeapAttributeNumber];
Datum toast_oldvalues[MaxHeapAttributeNumber];
2001-03-22 05:01:46 +01:00
int32 toast_sizes[MaxHeapAttributeNumber];
bool toast_free[MaxHeapAttributeNumber];
bool toast_delold[MaxHeapAttributeNumber];
/*
* Get the tuple descriptor and break down the tuple(s) into fields.
*/
2001-03-22 05:01:46 +01:00
tupleDesc = rel->rd_att;
att = tupleDesc->attrs;
numAttrs = tupleDesc->natts;
Assert(numAttrs <= MaxHeapAttributeNumber);
heap_deform_tuple(newtup, tupleDesc, toast_values, toast_isnull);
if (oldtup != NULL)
heap_deform_tuple(oldtup, tupleDesc, toast_oldvalues, toast_oldisnull);
/* ----------
* Then collect information about the values given
*
* NOTE: toast_action[i] can have these values:
* ' ' default handling
* 'p' already processed --- don't touch it
* 'x' incompressible, but OK to move off
*
* NOTE: toast_sizes[i] is only made valid for varlena attributes with
* toast_action[i] different from 'p'.
* ----------
*/
2001-03-22 05:01:46 +01:00
memset(toast_action, ' ', numAttrs * sizeof(char));
memset(toast_free, 0, numAttrs * sizeof(bool));
memset(toast_delold, 0, numAttrs * sizeof(bool));
for (i = 0; i < numAttrs; i++)
{
2001-03-22 05:01:46 +01:00
varattrib *old_value;
varattrib *new_value;
if (oldtup != NULL)
{
/*
* For UPDATE get the old and new values of this attribute
*/
old_value = (varattrib *) DatumGetPointer(toast_oldvalues[i]);
2001-03-22 05:01:46 +01:00
new_value = (varattrib *) DatumGetPointer(toast_values[i]);
/*
* If the old value is an external stored one, check if it has
* changed so we have to delete it later.
*/
if (att[i]->attlen == -1 && !toast_oldisnull[i] &&
2001-03-22 05:01:46 +01:00
VARATT_IS_EXTERNAL(old_value))
{
if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) ||
old_value->va_content.va_external.va_valueid !=
new_value->va_content.va_external.va_valueid ||
old_value->va_content.va_external.va_toastrelid !=
new_value->va_content.va_external.va_toastrelid)
{
/*
* The old external stored value isn't needed any more
* after the update
*/
toast_delold[i] = true;
need_delold = true;
}
else
{
/*
2005-10-15 04:49:52 +02:00
* This attribute isn't changed by this update so we reuse
* the original reference to the old value in the new
* tuple.
*/
toast_action[i] = 'p';
toast_sizes[i] = VARATT_SIZE(toast_values[i]);
continue;
}
}
}
else
{
/*
* For INSERT simply get the new value
*/
new_value = (varattrib *) DatumGetPointer(toast_values[i]);
}
/*
* Handle NULL attributes
*/
if (toast_isnull[i])
{
toast_action[i] = 'p';
has_nulls = true;
continue;
}
/*
* Now look at varlena attributes
*/
if (att[i]->attlen == -1)
{
/*
* If the table's attribute says PLAIN always, force it so.
*/
if (att[i]->attstorage == 'p')
toast_action[i] = 'p';
/*
* We took care of UPDATE above, so any external value we find
* still in the tuple must be someone else's we cannot reuse.
* Expand it to plain (and, probably, toast it again below).
*/
if (VARATT_IS_EXTERNAL(new_value))
{
new_value = heap_tuple_untoast_attr(new_value);
toast_values[i] = PointerGetDatum(new_value);
toast_free[i] = true;
need_change = true;
need_free = true;
}
/*
* Remember the size of this attribute
*/
toast_sizes[i] = VARATT_SIZE(new_value);
}
else
{
/*
* Not a varlena attribute, plain storage always
*/
toast_action[i] = 'p';
}
}
/* ----------
* Compress and/or save external until data fits into target length
*
* 1: Inline compress attributes with attstorage 'x'
* 2: Store attributes with attstorage 'x' or 'e' external
2001-03-22 05:01:46 +01:00
* 3: Inline compress attributes with attstorage 'm'
* 4: Store attributes with attstorage 'm' external
* ----------
*/
maxDataLen = offsetof(HeapTupleHeaderData, t_bits);
if (has_nulls)
maxDataLen += BITMAPLEN(numAttrs);
maxDataLen = TOAST_TUPLE_TARGET - MAXALIGN(maxDataLen);
/*
* Look for attributes with attstorage 'x' to compress
*/
while (MAXALIGN(heap_compute_data_size(tupleDesc,
toast_values, toast_isnull)) >
2001-03-22 05:01:46 +01:00
maxDataLen)
{
2001-03-22 05:01:46 +01:00
int biggest_attno = -1;
int32 biggest_size = MAXALIGN(sizeof(varattrib));
Datum old_value;
Datum new_value;
/*
* Search for the biggest yet uncompressed internal attribute
*/
for (i = 0; i < numAttrs; i++)
{
if (toast_action[i] != ' ')
continue;
if (VARATT_IS_EXTENDED(toast_values[i]))
continue;
if (att[i]->attstorage != 'x')
continue;
if (toast_sizes[i] > biggest_size)
{
biggest_attno = i;
2001-03-22 05:01:46 +01:00
biggest_size = toast_sizes[i];
}
}
if (biggest_attno < 0)
break;
/*
* Attempt to compress it inline
*/
2001-03-22 05:01:46 +01:00
i = biggest_attno;
old_value = toast_values[i];
new_value = toast_compress_datum(old_value);
if (DatumGetPointer(new_value) != NULL)
{
/* successful compression */
if (toast_free[i])
pfree(DatumGetPointer(old_value));
2001-03-22 05:01:46 +01:00
toast_values[i] = new_value;
toast_free[i] = true;
toast_sizes[i] = VARATT_SIZE(toast_values[i]);
need_change = true;
need_free = true;
}
else
{
2001-03-22 05:01:46 +01:00
/*
2005-10-15 04:49:52 +02:00
* incompressible data, ignore on subsequent compression passes
2001-03-22 05:01:46 +01:00
*/
toast_action[i] = 'x';
}
}
/*
2005-10-15 04:49:52 +02:00
* Second we look for attributes of attstorage 'x' or 'e' that are still
* inline.
*/
while (MAXALIGN(heap_compute_data_size(tupleDesc,
toast_values, toast_isnull)) >
2001-03-22 05:01:46 +01:00
maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid)
{
2001-03-22 05:01:46 +01:00
int biggest_attno = -1;
int32 biggest_size = MAXALIGN(sizeof(varattrib));
Datum old_value;
/*------
* Search for the biggest yet inlined attribute with
* attstorage equals 'x' or 'e'
*------
*/
for (i = 0; i < numAttrs; i++)
{
if (toast_action[i] == 'p')
continue;
if (VARATT_IS_EXTERNAL(toast_values[i]))
continue;
if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
continue;
if (toast_sizes[i] > biggest_size)
{
biggest_attno = i;
2001-03-22 05:01:46 +01:00
biggest_size = toast_sizes[i];
}
}
if (biggest_attno < 0)
break;
/*
* Store this external
*/
2001-03-22 05:01:46 +01:00
i = biggest_attno;
old_value = toast_values[i];
toast_action[i] = 'p';
toast_values[i] = toast_save_datum(rel, toast_values[i]);
if (toast_free[i])
pfree(DatumGetPointer(old_value));
2001-03-22 05:01:46 +01:00
toast_free[i] = true;
toast_sizes[i] = VARATT_SIZE(toast_values[i]);
need_change = true;
2001-03-22 05:01:46 +01:00
need_free = true;
}
/*
* Round 3 - this time we take attributes with storage 'm' into
* compression
*/
while (MAXALIGN(heap_compute_data_size(tupleDesc,
toast_values, toast_isnull)) >
2001-03-22 05:01:46 +01:00
maxDataLen)
{
2001-03-22 05:01:46 +01:00
int biggest_attno = -1;
int32 biggest_size = MAXALIGN(sizeof(varattrib));
Datum old_value;
Datum new_value;
/*
* Search for the biggest yet uncompressed internal attribute
*/
for (i = 0; i < numAttrs; i++)
{
if (toast_action[i] != ' ')
continue;
if (VARATT_IS_EXTENDED(toast_values[i]))
continue;
if (att[i]->attstorage != 'm')
continue;
if (toast_sizes[i] > biggest_size)
{
biggest_attno = i;
2001-03-22 05:01:46 +01:00
biggest_size = toast_sizes[i];
}
}
if (biggest_attno < 0)
break;
/*
* Attempt to compress it inline
*/
2001-03-22 05:01:46 +01:00
i = biggest_attno;
old_value = toast_values[i];
new_value = toast_compress_datum(old_value);
if (DatumGetPointer(new_value) != NULL)
{
/* successful compression */
if (toast_free[i])
pfree(DatumGetPointer(old_value));
2001-03-22 05:01:46 +01:00
toast_values[i] = new_value;
toast_free[i] = true;
toast_sizes[i] = VARATT_SIZE(toast_values[i]);
need_change = true;
need_free = true;
}
else
{
2001-03-22 05:01:46 +01:00
/*
2005-10-15 04:49:52 +02:00
* incompressible data, ignore on subsequent compression passes
2001-03-22 05:01:46 +01:00
*/
toast_action[i] = 'x';
}
}
/*
* Finally we store attributes of type 'm' external
*/
while (MAXALIGN(heap_compute_data_size(tupleDesc,
toast_values, toast_isnull)) >
2001-03-22 05:01:46 +01:00
maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid)
{
2001-03-22 05:01:46 +01:00
int biggest_attno = -1;
int32 biggest_size = MAXALIGN(sizeof(varattrib));
Datum old_value;
/*--------
* Search for the biggest yet inlined attribute with
* attstorage = 'm'
*--------
*/
for (i = 0; i < numAttrs; i++)
{
if (toast_action[i] == 'p')
continue;
if (VARATT_IS_EXTERNAL(toast_values[i]))
continue;
if (att[i]->attstorage != 'm')
continue;
if (toast_sizes[i] > biggest_size)
{
biggest_attno = i;
2001-03-22 05:01:46 +01:00
biggest_size = toast_sizes[i];
}
}
if (biggest_attno < 0)
break;
/*
* Store this external
*/
2001-03-22 05:01:46 +01:00
i = biggest_attno;
old_value = toast_values[i];
toast_action[i] = 'p';
toast_values[i] = toast_save_datum(rel, toast_values[i]);
if (toast_free[i])
pfree(DatumGetPointer(old_value));
2001-03-22 05:01:46 +01:00
toast_free[i] = true;
toast_sizes[i] = VARATT_SIZE(toast_values[i]);
need_change = true;
2001-03-22 05:01:46 +01:00
need_free = true;
}
/*
2005-10-15 04:49:52 +02:00
* In the case we toasted any values, we need to build a new heap tuple
* with the changed values.
*/
if (need_change)
{
HeapTupleHeader olddata = newtup->t_data;
HeapTupleHeader new_data;
2001-03-22 05:01:46 +01:00
int32 new_len;
/*
* Calculate the new size of the tuple. Header size should not
* change, but data size might.
*/
new_len = offsetof(HeapTupleHeaderData, t_bits);
if (has_nulls)
new_len += BITMAPLEN(numAttrs);
if (olddata->t_infomask & HEAP_HASOID)
new_len += sizeof(Oid);
new_len = MAXALIGN(new_len);
Assert(new_len == olddata->t_hoff);
new_len += heap_compute_data_size(tupleDesc,
toast_values, toast_isnull);
/*
* Allocate and zero the space needed, and fill HeapTupleData fields.
*/
result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_len);
result_tuple->t_len = new_len;
result_tuple->t_self = newtup->t_self;
result_tuple->t_tableOid = newtup->t_tableOid;
new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE);
result_tuple->t_data = new_data;
/*
* Put the existing tuple header and the changed values into place
*/
memcpy(new_data, olddata, olddata->t_hoff);
heap_fill_tuple(tupleDesc,
toast_values,
toast_isnull,
(char *) new_data + olddata->t_hoff,
&(new_data->t_infomask),
has_nulls ? new_data->t_bits : NULL);
}
else
result_tuple = newtup;
/*
* Free allocated temp values
*/
if (need_free)
for (i = 0; i < numAttrs; i++)
if (toast_free[i])
pfree(DatumGetPointer(toast_values[i]));
/*
* Delete external values from the old tuple
*/
if (need_delold)
for (i = 0; i < numAttrs; i++)
if (toast_delold[i])
toast_delete_datum(rel, toast_oldvalues[i]);
return result_tuple;
}
/* ----------
* toast_flatten_tuple_attribute -
*
* If a Datum is of composite type, "flatten" it to contain no toasted fields.
* This must be invoked on any potentially-composite field that is to be
2004-08-29 07:07:03 +02:00
* inserted into a tuple. Doing this preserves the invariant that toasting
* goes only one level deep in a tuple.
* ----------
*/
Datum
toast_flatten_tuple_attribute(Datum value,
Oid typeId, int32 typeMod)
{
TupleDesc tupleDesc;
HeapTupleHeader olddata;
HeapTupleHeader new_data;
int32 new_len;
HeapTupleData tmptup;
Form_pg_attribute *att;
int numAttrs;
int i;
bool need_change = false;
bool has_nulls = false;
Datum toast_values[MaxTupleAttributeNumber];
bool toast_isnull[MaxTupleAttributeNumber];
bool toast_free[MaxTupleAttributeNumber];
/*
* See if it's a composite type, and get the tupdesc if so.
*/
tupleDesc = lookup_rowtype_tupdesc_noerror(typeId, typeMod, true);
if (tupleDesc == NULL)
return value; /* not a composite type */
att = tupleDesc->attrs;
numAttrs = tupleDesc->natts;
/*
* Break down the tuple into fields.
*/
olddata = DatumGetHeapTupleHeader(value);
Assert(typeId == HeapTupleHeaderGetTypeId(olddata));
Assert(typeMod == HeapTupleHeaderGetTypMod(olddata));
/* Build a temporary HeapTuple control structure */
tmptup.t_len = HeapTupleHeaderGetDatumLength(olddata);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
tmptup.t_data = olddata;
Assert(numAttrs <= MaxTupleAttributeNumber);
heap_deform_tuple(&tmptup, tupleDesc, toast_values, toast_isnull);
memset(toast_free, 0, numAttrs * sizeof(bool));
for (i = 0; i < numAttrs; i++)
{
/*
* Look at non-null varlena attributes
*/
if (toast_isnull[i])
has_nulls = true;
else if (att[i]->attlen == -1)
{
varattrib *new_value;
new_value = (varattrib *) DatumGetPointer(toast_values[i]);
if (VARATT_IS_EXTENDED(new_value))
{
new_value = heap_tuple_untoast_attr(new_value);
toast_values[i] = PointerGetDatum(new_value);
toast_free[i] = true;
need_change = true;
}
}
}
/*
* If nothing to untoast, just return the original tuple.
*/
if (!need_change)
return value;
/*
2005-10-15 04:49:52 +02:00
* Calculate the new size of the tuple. Header size should not change,
* but data size might.
*/
new_len = offsetof(HeapTupleHeaderData, t_bits);
if (has_nulls)
new_len += BITMAPLEN(numAttrs);
if (olddata->t_infomask & HEAP_HASOID)
new_len += sizeof(Oid);
new_len = MAXALIGN(new_len);
Assert(new_len == olddata->t_hoff);
new_len += heap_compute_data_size(tupleDesc, toast_values, toast_isnull);
new_data = (HeapTupleHeader) palloc0(new_len);
/*
* Put the tuple header and the changed values into place
*/
memcpy(new_data, olddata, olddata->t_hoff);
HeapTupleHeaderSetDatumLength(new_data, new_len);
heap_fill_tuple(tupleDesc,
toast_values,
toast_isnull,
(char *) new_data + olddata->t_hoff,
&(new_data->t_infomask),
has_nulls ? new_data->t_bits : NULL);
/*
* Free allocated temp values
*/
for (i = 0; i < numAttrs; i++)
if (toast_free[i])
pfree(DatumGetPointer(toast_values[i]));
return PointerGetDatum(new_data);
}
/* ----------
* toast_compress_datum -
*
* Create a compressed version of a varlena datum
*
* If we fail (ie, compressed result is actually bigger than original)
* then return NULL. We must not use compressed data if it'd expand
* the tuple!
* ----------
*/
Datum
toast_compress_datum(Datum value)
{
2001-03-22 05:01:46 +01:00
varattrib *tmp;
tmp = (varattrib *) palloc(sizeof(PGLZ_Header) + VARATT_SIZE(value));
pglz_compress(VARATT_DATA(value), VARATT_SIZE(value) - VARHDRSZ,
(PGLZ_Header *) tmp,
PGLZ_strategy_default);
if (VARATT_SIZE(tmp) < VARATT_SIZE(value))
{
/* successful compression */
VARATT_SIZEP(tmp) |= VARATT_FLAG_COMPRESSED;
return PointerGetDatum(tmp);
}
else
{
/* incompressible data */
pfree(tmp);
return PointerGetDatum(NULL);
}
}
/* ----------
* toast_save_datum -
*
* Save one single datum into the secondary relation and return
* a varattrib reference for it.
* ----------
*/
static Datum
toast_save_datum(Relation rel, Datum value)
{
2001-03-22 05:01:46 +01:00
Relation toastrel;
Relation toastidx;
HeapTuple toasttup;
TupleDesc toasttupDesc;
Datum t_values[3];
bool t_isnull[3];
2001-03-22 05:01:46 +01:00
varattrib *result;
struct
{
struct varlena hdr;
char data[TOAST_MAX_CHUNK_SIZE];
} chunk_data;
2001-03-22 05:01:46 +01:00
int32 chunk_size;
int32 chunk_seq = 0;
char *data_p;
int32 data_todo;
/*
2005-10-15 04:49:52 +02:00
* Open the toast relation and its index. We can use the index to check
* uniqueness of the OID we assign to the toasted item, even though it has
* additional columns besides OID.
*/
toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
toasttupDesc = toastrel->rd_att;
toastidx = index_open(toastrel->rd_rel->reltoastidxid);
/*
* Create the varattrib reference
*/
2001-03-22 05:01:46 +01:00
result = (varattrib *) palloc(sizeof(varattrib));
2001-03-22 05:01:46 +01:00
result->va_header = sizeof(varattrib) | VARATT_FLAG_EXTERNAL;
if (VARATT_IS_COMPRESSED(value))
{
result->va_header |= VARATT_FLAG_COMPRESSED;
2001-03-22 05:01:46 +01:00
result->va_content.va_external.va_rawsize =
((varattrib *) value)->va_content.va_compressed.va_rawsize;
}
else
result->va_content.va_external.va_rawsize = VARATT_SIZE(value);
2001-03-22 05:01:46 +01:00
result->va_content.va_external.va_extsize =
VARATT_SIZE(value) - VARHDRSZ;
result->va_content.va_external.va_valueid =
GetNewOidWithIndex(toastrel, toastidx);
2001-03-22 05:01:46 +01:00
result->va_content.va_external.va_toastrelid =
rel->rd_rel->reltoastrelid;
/*
* Initialize constant parts of the tuple data
*/
t_values[0] = ObjectIdGetDatum(result->va_content.va_external.va_valueid);
t_values[2] = PointerGetDatum(&chunk_data);
t_isnull[0] = false;
t_isnull[1] = false;
t_isnull[2] = false;
/*
* Get the data to process
*/
2001-03-22 05:01:46 +01:00
data_p = VARATT_DATA(value);
data_todo = VARATT_SIZE(value) - VARHDRSZ;
/*
* We must explicitly lock the toast index because we aren't using an
* index scan here.
*/
LockRelation(toastidx, RowExclusiveLock);
2001-03-22 05:01:46 +01:00
/*
2001-03-22 05:01:46 +01:00
* Split up the item into chunks
*/
while (data_todo > 0)
{
/*
* Calculate the size of this chunk
*/
chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);
/*
* Build a tuple and store it
*/
t_values[1] = Int32GetDatum(chunk_seq++);
VARATT_SIZEP(&chunk_data) = chunk_size + VARHDRSZ;
memcpy(VARATT_DATA(&chunk_data), data_p, chunk_size);
toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
if (!HeapTupleIsValid(toasttup))
elog(ERROR, "failed to build TOAST tuple");
simple_heap_insert(toastrel, toasttup);
/*
* Create the index entry. We cheat a little here by not using
2005-10-15 04:49:52 +02:00
* FormIndexDatum: this relies on the knowledge that the index columns
* are the same as the initial columns of the table.
*
* Note also that there had better not be any user-created index on
* the TOAST table, since we don't bother to update anything else.
*/
index_insert(toastidx, t_values, t_isnull,
&(toasttup->t_self),
toastrel, toastidx->rd_index->indisunique);
/*
* Free memory
*/
heap_freetuple(toasttup);
/*
* Move on to next chunk
*/
data_todo -= chunk_size;
data_p += chunk_size;
}
/*
* Done - close toast relation and return the reference
*/
UnlockRelation(toastidx, RowExclusiveLock);
index_close(toastidx);
heap_close(toastrel, RowExclusiveLock);
return PointerGetDatum(result);
}
/* ----------
* toast_delete_datum -
*
* Delete a single external stored value.
* ----------
*/
static void
toast_delete_datum(Relation rel, Datum value)
{
varattrib *attr = (varattrib *) DatumGetPointer(value);
2001-03-22 05:01:46 +01:00
Relation toastrel;
Relation toastidx;
ScanKeyData toastkey;
IndexScanDesc toastscan;
HeapTuple toasttup;
if (!VARATT_IS_EXTERNAL(attr))
return;
/*
* Open the toast relation and it's index
*/
2001-03-22 05:01:46 +01:00
toastrel = heap_open(attr->va_content.va_external.va_toastrelid,
RowExclusiveLock);
toastidx = index_open(toastrel->rd_rel->reltoastidxid);
/*
* Setup a scan key to fetch from the index by va_valueid (we don't
* particularly care whether we see them in sequence or not)
*/
ScanKeyInit(&toastkey,
(AttrNumber) 1,
BTEqualStrategyNumber, F_OIDEQ,
2005-10-15 04:49:52 +02:00
ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
/*
* Find the chunks by index
*/
toastscan = index_beginscan(toastrel, toastidx, true,
SnapshotToast, 1, &toastkey);
while ((toasttup = index_getnext(toastscan, ForwardScanDirection)) != NULL)
{
/*
* Have a chunk, delete it
*/
simple_heap_delete(toastrel, &toasttup->t_self);
}
/*
* End scan and close relations
*/
index_endscan(toastscan);
index_close(toastidx);
heap_close(toastrel, RowExclusiveLock);
}
/* ----------
* toast_fetch_datum -
*
* Reconstruct an in memory varattrib from the chunks saved
* in the toast relation
* ----------
*/
static varattrib *
toast_fetch_datum(varattrib *attr)
{
2001-03-22 05:01:46 +01:00
Relation toastrel;
Relation toastidx;
ScanKeyData toastkey;
IndexScanDesc toastscan;
HeapTuple ttup;
TupleDesc toasttupDesc;
varattrib *result;
int32 ressize;
int32 residx,
nextidx;
int32 numchunks;
2001-03-22 05:01:46 +01:00
Pointer chunk;
bool isnull;
int32 chunksize;
ressize = attr->va_content.va_external.va_extsize;
2001-03-22 05:01:46 +01:00
numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
2001-03-22 05:01:46 +01:00
result = (varattrib *) palloc(ressize + VARHDRSZ);
VARATT_SIZEP(result) = ressize + VARHDRSZ;
if (VARATT_IS_COMPRESSED(attr))
VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED;
/*
* Open the toast relation and its index
*/
2001-03-22 05:01:46 +01:00
toastrel = heap_open(attr->va_content.va_external.va_toastrelid,
AccessShareLock);
toasttupDesc = toastrel->rd_att;
toastidx = index_open(toastrel->rd_rel->reltoastidxid);
/*
* Setup a scan key to fetch from the index by va_valueid
*/
ScanKeyInit(&toastkey,
(AttrNumber) 1,
BTEqualStrategyNumber, F_OIDEQ,
2005-10-15 04:49:52 +02:00
ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
/*
* Read the chunks by index
*
* Note that because the index is actually on (valueid, chunkidx) we will
* see the chunks in chunkidx order, even though we didn't explicitly ask
* for it.
*/
nextidx = 0;
toastscan = index_beginscan(toastrel, toastidx, true,
SnapshotToast, 1, &toastkey);
while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL)
{
/*
* Have a chunk, extract the sequence number and the data
*/
residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
Assert(!isnull);
chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
Assert(!isnull);
chunksize = VARATT_SIZE(chunk) - VARHDRSZ;
/*
* Some checks on the data we've found
*/
if (residx != nextidx)
elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u",
residx, nextidx,
attr->va_content.va_external.va_valueid);
2001-03-22 05:01:46 +01:00
if (residx < numchunks - 1)
{
if (chunksize != TOAST_MAX_CHUNK_SIZE)
elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u",
chunksize, residx,
attr->va_content.va_external.va_valueid);
}
else if (residx < numchunks)
{
if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u",
chunksize, residx,
attr->va_content.va_external.va_valueid);
}
else
elog(ERROR, "unexpected chunk number %d for toast value %u",
residx,
attr->va_content.va_external.va_valueid);
/*
* Copy the data into proper place in our result
*/
2001-03-22 05:01:46 +01:00
memcpy(((char *) VARATT_DATA(result)) + residx * TOAST_MAX_CHUNK_SIZE,
VARATT_DATA(chunk),
chunksize);
nextidx++;
}
/*
* Final checks that we successfully fetched the datum
*/
if (nextidx != numchunks)
elog(ERROR, "missing chunk number %d for toast value %u",
nextidx,
2001-03-22 05:01:46 +01:00
attr->va_content.va_external.va_valueid);
/*
* End scan and close relations
*/
index_endscan(toastscan);
index_close(toastidx);
heap_close(toastrel, AccessShareLock);
return result;
}
/* ----------
* toast_fetch_datum_slice -
*
* Reconstruct a segment of a varattrib from the chunks saved
* in the toast relation
* ----------
*/
static varattrib *
toast_fetch_datum_slice(varattrib *attr, int32 sliceoffset, int32 length)
{
Relation toastrel;
Relation toastidx;
ScanKeyData toastkey[3];
int nscankeys;
IndexScanDesc toastscan;
HeapTuple ttup;
TupleDesc toasttupDesc;
varattrib *result;
int32 attrsize;
int32 residx;
2002-09-04 22:31:48 +02:00
int32 nextidx;
int numchunks;
int startchunk;
int endchunk;
int32 startoffset;
int32 endoffset;
2002-09-04 22:31:48 +02:00
int totalchunks;
Pointer chunk;
bool isnull;
int32 chunksize;
2002-09-04 22:31:48 +02:00
int32 chcpystrt;
int32 chcpyend;
attrsize = attr->va_content.va_external.va_extsize;
totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
2002-09-04 22:31:48 +02:00
if (sliceoffset >= attrsize)
{
2002-09-04 22:31:48 +02:00
sliceoffset = 0;
length = 0;
}
if (((sliceoffset + length) > attrsize) || length < 0)
2002-09-04 22:31:48 +02:00
length = attrsize - sliceoffset;
result = (varattrib *) palloc(length + VARHDRSZ);
VARATT_SIZEP(result) = length + VARHDRSZ;
if (VARATT_IS_COMPRESSED(attr))
VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED;
2002-09-04 22:31:48 +02:00
if (length == 0)
return result; /* Can save a lot of work at this point! */
startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE;
2002-09-04 22:31:48 +02:00
numchunks = (endchunk - startchunk) + 1;
startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE;
endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE;
/*
* Open the toast relation and it's index
*/
toastrel = heap_open(attr->va_content.va_external.va_toastrelid,
AccessShareLock);
toasttupDesc = toastrel->rd_att;
toastidx = index_open(toastrel->rd_rel->reltoastidxid);
/*
2005-10-15 04:49:52 +02:00
* Setup a scan key to fetch from the index. This is either two keys or
* three depending on the number of chunks.
*/
ScanKeyInit(&toastkey[0],
(AttrNumber) 1,
BTEqualStrategyNumber, F_OIDEQ,
2005-10-15 04:49:52 +02:00
ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
2002-09-04 22:31:48 +02:00
/*
* Use equality condition for one chunk, a range condition otherwise:
*/
2002-09-04 22:31:48 +02:00
if (numchunks == 1)
{
ScanKeyInit(&toastkey[1],
(AttrNumber) 2,
BTEqualStrategyNumber, F_INT4EQ,
Int32GetDatum(startchunk));
2002-09-04 22:31:48 +02:00
nscankeys = 2;
}
else
{
ScanKeyInit(&toastkey[1],
(AttrNumber) 2,
BTGreaterEqualStrategyNumber, F_INT4GE,
Int32GetDatum(startchunk));
ScanKeyInit(&toastkey[2],
(AttrNumber) 2,
BTLessEqualStrategyNumber, F_INT4LE,
Int32GetDatum(endchunk));
2002-09-04 22:31:48 +02:00
nscankeys = 3;
}
/*
* Read the chunks by index
*
* The index is on (valueid, chunkidx) so they will come in order
*/
nextidx = startchunk;
toastscan = index_beginscan(toastrel, toastidx, true,
SnapshotToast, nscankeys, toastkey);
while ((ttup = index_getnext(toastscan, ForwardScanDirection)) != NULL)
{
/*
* Have a chunk, extract the sequence number and the data
*/
residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
Assert(!isnull);
chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
Assert(!isnull);
chunksize = VARATT_SIZE(chunk) - VARHDRSZ;
/*
* Some checks on the data we've found
*/
if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk))
elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u",
residx, nextidx,
attr->va_content.va_external.va_valueid);
if (residx < totalchunks - 1)
{
if (chunksize != TOAST_MAX_CHUNK_SIZE)
elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u",
chunksize, residx,
attr->va_content.va_external.va_valueid);
}
else
{
if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize)
elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %u",
chunksize, residx,
attr->va_content.va_external.va_valueid);
}
/*
* Copy the data into proper place in our result
*/
chcpystrt = 0;
chcpyend = chunksize - 1;
2002-09-04 22:31:48 +02:00
if (residx == startchunk)
chcpystrt = startoffset;
if (residx == endchunk)
chcpyend = endoffset;
memcpy(((char *) VARATT_DATA(result)) +
(residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
VARATT_DATA(chunk) + chcpystrt,
(chcpyend - chcpystrt) + 1);
2002-09-04 22:31:48 +02:00
nextidx++;
}
/*
* Final checks that we successfully fetched the datum
*/
2002-09-04 22:31:48 +02:00
if (nextidx != (endchunk + 1))
elog(ERROR, "missing chunk number %d for toast value %u",
nextidx,
attr->va_content.va_external.va_valueid);
/*
* End scan and close relations
*/
index_endscan(toastscan);
index_close(toastidx);
heap_close(toastrel, AccessShareLock);
return result;
}