tableam: New callback relation_fetch_toast_slice.

Instead of always calling heap_fetch_toast_slice during detoasting,
invoke a table AM callback which, when the toast table is a heap
table, will be heap_fetch_toast_slice.

This makes it possible for a table AM other than heap to be used
as a TOAST table. It also completes the series of commits intended
to improve the interaction of tableam with TOAST that began with
commit 8b94dab06617ef80a0901ab103ebd8754427ef5a; detoast.c is
now, hopefully, fully AM-independent.

Patch by me, reviewed by Andres Freund and Peter Eisentraut.

Discussion: http://postgr.es/m/CA+TgmoZv-=2iWM4jcw5ZhJeL18HF96+W1yJeYrnGMYdkFFnEpQ@mail.gmail.com
This commit is contained in:
Robert Haas 2020-01-07 14:35:48 -05:00
parent 83322e38da
commit ce242ae154
5 changed files with 245 additions and 193 deletions

View File

@ -14,22 +14,17 @@
#include "postgres.h"
#include "access/detoast.h"
#include "access/genam.h"
#include "access/heaptoast.h"
#include "access/table.h"
#include "access/tableam.h"
#include "access/toast_internals.h"
#include "common/pg_lzcompress.h"
#include "utils/expandeddatum.h"
#include "utils/fmgroids.h"
#include "utils/rel.h"
static struct varlena *toast_fetch_datum(struct varlena *attr);
static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
int32 sliceoffset,
int32 slicelength);
static void heap_fetch_toast_slice(Relation toastrel, Oid valueid,
int32 attrsize, int32 sliceoffset,
int32 slicelength, struct varlena *result);
static struct varlena *toast_decompress_datum(struct varlena *attr);
static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
@ -356,8 +351,8 @@ toast_fetch_datum(struct varlena *attr)
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
/* Fetch all chunks */
heap_fetch_toast_slice(toastrel, toast_pointer.va_valueid, attrsize, 0,
attrsize, result);
table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
attrsize, 0, attrsize, result);
/* Close toast table */
table_close(toastrel, AccessShareLock);
@ -431,8 +426,9 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
/* Fetch all chunks */
heap_fetch_toast_slice(toastrel, toast_pointer.va_valueid, attrsize,
sliceoffset, slicelength, result);
table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
attrsize, sliceoffset, slicelength,
result);
/* Close toast table */
table_close(toastrel, AccessShareLock);
@ -440,189 +436,6 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
return result;
}
/*
* Fetch a TOAST slice from a heap table.
*
* toastrel is the relation from which chunks are to be fetched.
* valueid identifies the TOAST value from which chunks are being fetched.
* attrsize is the total size of the TOAST value.
* sliceoffset is the byte offset within the TOAST value from which to fetch.
* slicelength is the number of bytes to be fetched from the TOAST value.
* result is the varlena into which the results should be written.
*/
static void
heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize,
int32 sliceoffset, int32 slicelength,
struct varlena *result)
{
Relation *toastidxs;
ScanKeyData toastkey[3];
TupleDesc toasttupDesc = toastrel->rd_att;
int nscankeys;
SysScanDesc toastscan;
HeapTuple ttup;
int32 expectedchunk;
int32 totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
int startchunk;
int endchunk;
int num_indexes;
int validIndex;
SnapshotData SnapshotToast;
/* Look for the valid index of toast relation */
validIndex = toast_open_indexes(toastrel,
AccessShareLock,
&toastidxs,
&num_indexes);
startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
endchunk = (sliceoffset + slicelength - 1) / TOAST_MAX_CHUNK_SIZE;
Assert(endchunk <= totalchunks);
/*
* Setup a scan key to fetch from the index. This is either two keys or
* three depending on the number of chunks.
*/
ScanKeyInit(&toastkey[0],
(AttrNumber) 1,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(valueid));
/*
* No additional condition if fetching all chunks. Otherwise, use an
* equality condition for one chunk, and a range condition otherwise.
*/
if (startchunk == 0 && endchunk == totalchunks - 1)
nscankeys = 1;
else if (startchunk == endchunk)
{
ScanKeyInit(&toastkey[1],
(AttrNumber) 2,
BTEqualStrategyNumber, F_INT4EQ,
Int32GetDatum(startchunk));
nscankeys = 2;
}
else
{
ScanKeyInit(&toastkey[1],
(AttrNumber) 2,
BTGreaterEqualStrategyNumber, F_INT4GE,
Int32GetDatum(startchunk));
ScanKeyInit(&toastkey[2],
(AttrNumber) 2,
BTLessEqualStrategyNumber, F_INT4LE,
Int32GetDatum(endchunk));
nscankeys = 3;
}
/* Prepare for scan */
init_toast_snapshot(&SnapshotToast);
toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
&SnapshotToast, nscankeys, toastkey);
/*
* Read the chunks by index
*
* The index is on (valueid, chunkidx) so they will come in order
*/
expectedchunk = startchunk;
while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
int32 curchunk;
Pointer chunk;
bool isnull;
char *chunkdata;
int32 chunksize;
int32 expected_size;
int32 chcpystrt;
int32 chcpyend;
/*
* Have a chunk, extract the sequence number and the data
*/
curchunk = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
Assert(!isnull);
chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
Assert(!isnull);
if (!VARATT_IS_EXTENDED(chunk))
{
chunksize = VARSIZE(chunk) - VARHDRSZ;
chunkdata = VARDATA(chunk);
}
else if (VARATT_IS_SHORT(chunk))
{
/* could happen due to heap_form_tuple doing its thing */
chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
chunkdata = VARDATA_SHORT(chunk);
}
else
{
/* should never happen */
elog(ERROR, "found toasted toast chunk for toast value %u in %s",
valueid, RelationGetRelationName(toastrel));
chunksize = 0; /* keep compiler quiet */
chunkdata = NULL;
}
/*
* Some checks on the data we've found
*/
if (curchunk != expectedchunk)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk number %d (expected %d) for toast value %u in %s",
curchunk, expectedchunk, valueid,
RelationGetRelationName(toastrel))));
if (curchunk > endchunk)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
curchunk,
startchunk, endchunk, valueid,
RelationGetRelationName(toastrel))));
expected_size = curchunk < totalchunks - 1 ? TOAST_MAX_CHUNK_SIZE
: attrsize - ((totalchunks - 1) * TOAST_MAX_CHUNK_SIZE);
if (chunksize != expected_size)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
chunksize, expected_size,
curchunk, totalchunks, valueid,
RelationGetRelationName(toastrel))));
/*
* Copy the data into proper place in our result
*/
chcpystrt = 0;
chcpyend = chunksize - 1;
if (curchunk == startchunk)
chcpystrt = sliceoffset % TOAST_MAX_CHUNK_SIZE;
if (curchunk == endchunk)
chcpyend = (sliceoffset + slicelength - 1) % TOAST_MAX_CHUNK_SIZE;
memcpy(VARDATA(result) +
(curchunk * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
chunkdata + chcpystrt,
(chcpyend - chcpystrt) + 1);
expectedchunk++;
}
/*
* Final checks that we successfully fetched the datum
*/
if (expectedchunk != (endchunk + 1))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("missing chunk number %d for toast value %u in %s",
expectedchunk, valueid,
RelationGetRelationName(toastrel))));
/* End scan and close indexes. */
systable_endscan_ordered(toastscan);
toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
}
/* ----------
* toast_decompress_datum -
*

View File

@ -2545,6 +2545,7 @@ static const TableAmRoutine heapam_methods = {
.relation_size = table_block_relation_size,
.relation_needs_toast_table = heapam_relation_needs_toast_table,
.relation_toast_am = heapam_relation_toast_am,
.relation_fetch_toast_slice = heap_fetch_toast_slice,
.relation_estimate_size = heapam_estimate_rel_size,

View File

@ -25,10 +25,12 @@
#include "postgres.h"
#include "access/detoast.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/heaptoast.h"
#include "access/toast_helper.h"
#include "access/toast_internals.h"
#include "utils/fmgroids.h"
/* ----------
@ -604,3 +606,183 @@ toast_build_flattened_tuple(TupleDesc tupleDesc,
return new_tuple;
}
/*
* Fetch a TOAST slice from a heap table.
*
* toastrel is the relation from which chunks are to be fetched.
* valueid identifies the TOAST value from which chunks are being fetched.
* attrsize is the total size of the TOAST value.
* sliceoffset is the byte offset within the TOAST value from which to fetch.
* slicelength is the number of bytes to be fetched from the TOAST value.
* result is the varlena into which the results should be written.
*/
void
heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize,
int32 sliceoffset, int32 slicelength,
struct varlena *result)
{
Relation *toastidxs;
ScanKeyData toastkey[3];
TupleDesc toasttupDesc = toastrel->rd_att;
int nscankeys;
SysScanDesc toastscan;
HeapTuple ttup;
int32 expectedchunk;
int32 totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
int startchunk;
int endchunk;
int num_indexes;
int validIndex;
SnapshotData SnapshotToast;
/* Look for the valid index of toast relation */
validIndex = toast_open_indexes(toastrel,
AccessShareLock,
&toastidxs,
&num_indexes);
startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
endchunk = (sliceoffset + slicelength - 1) / TOAST_MAX_CHUNK_SIZE;
Assert(endchunk <= totalchunks);
/* Set up a scan key to fetch from the index. */
ScanKeyInit(&toastkey[0],
(AttrNumber) 1,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(valueid));
/*
* No additional condition if fetching all chunks. Otherwise, use an
* equality condition for one chunk, and a range condition otherwise.
*/
if (startchunk == 0 && endchunk == totalchunks - 1)
nscankeys = 1;
else if (startchunk == endchunk)
{
ScanKeyInit(&toastkey[1],
(AttrNumber) 2,
BTEqualStrategyNumber, F_INT4EQ,
Int32GetDatum(startchunk));
nscankeys = 2;
}
else
{
ScanKeyInit(&toastkey[1],
(AttrNumber) 2,
BTGreaterEqualStrategyNumber, F_INT4GE,
Int32GetDatum(startchunk));
ScanKeyInit(&toastkey[2],
(AttrNumber) 2,
BTLessEqualStrategyNumber, F_INT4LE,
Int32GetDatum(endchunk));
nscankeys = 3;
}
/* Prepare for scan */
init_toast_snapshot(&SnapshotToast);
toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
&SnapshotToast, nscankeys, toastkey);
/*
* Read the chunks by index
*
* The index is on (valueid, chunkidx) so they will come in order
*/
expectedchunk = startchunk;
while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
{
int32 curchunk;
Pointer chunk;
bool isnull;
char *chunkdata;
int32 chunksize;
int32 expected_size;
int32 chcpystrt;
int32 chcpyend;
/*
* Have a chunk, extract the sequence number and the data
*/
curchunk = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
Assert(!isnull);
chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
Assert(!isnull);
if (!VARATT_IS_EXTENDED(chunk))
{
chunksize = VARSIZE(chunk) - VARHDRSZ;
chunkdata = VARDATA(chunk);
}
else if (VARATT_IS_SHORT(chunk))
{
/* could happen due to heap_form_tuple doing its thing */
chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
chunkdata = VARDATA_SHORT(chunk);
}
else
{
/* should never happen */
elog(ERROR, "found toasted toast chunk for toast value %u in %s",
valueid, RelationGetRelationName(toastrel));
chunksize = 0; /* keep compiler quiet */
chunkdata = NULL;
}
/*
* Some checks on the data we've found
*/
if (curchunk != expectedchunk)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk number %d (expected %d) for toast value %u in %s",
curchunk, expectedchunk, valueid,
RelationGetRelationName(toastrel))));
if (curchunk > endchunk)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
curchunk,
startchunk, endchunk, valueid,
RelationGetRelationName(toastrel))));
expected_size = curchunk < totalchunks - 1 ? TOAST_MAX_CHUNK_SIZE
: attrsize - ((totalchunks - 1) * TOAST_MAX_CHUNK_SIZE);
if (chunksize != expected_size)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
chunksize, expected_size,
curchunk, totalchunks, valueid,
RelationGetRelationName(toastrel))));
/*
* Copy the data into proper place in our result
*/
chcpystrt = 0;
chcpyend = chunksize - 1;
if (curchunk == startchunk)
chcpystrt = sliceoffset % TOAST_MAX_CHUNK_SIZE;
if (curchunk == endchunk)
chcpyend = (sliceoffset + slicelength - 1) % TOAST_MAX_CHUNK_SIZE;
memcpy(VARDATA(result) +
(curchunk * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
chunkdata + chcpystrt,
(chcpyend - chcpystrt) + 1);
expectedchunk++;
}
/*
* Final checks that we successfully fetched the datum
*/
if (expectedchunk != (endchunk + 1))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("missing chunk number %d for toast value %u in %s",
expectedchunk, valueid,
RelationGetRelationName(toastrel))));
/* End scan and close indexes. */
systable_endscan_ordered(toastscan);
toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
}

View File

@ -136,4 +136,14 @@ extern HeapTuple toast_build_flattened_tuple(TupleDesc tupleDesc,
Datum *values,
bool *isnull);
/* ----------
* heap_fetch_toast_slice
*
* Fetch a slice from a toast value stored in a heap table.
* ----------
*/
extern void heap_fetch_toast_slice(Relation toastrel, Oid valueid,
int32 attrsize, int32 sliceoffset,
int32 slicelength, struct varlena *result);
#endif /* HEAPTOAST_H */

View File

@ -588,6 +588,17 @@ typedef struct TableAmRoutine
*/
Oid (*relation_toast_am) (Relation rel);
/*
* This callback is invoked when detoasting a value stored in a toast
* table implemented by this AM. See table_relation_fetch_toast_slice()
* for more details.
*/
void (*relation_fetch_toast_slice) (Relation toastrel, Oid valueid,
int32 attrsize,
int32 sliceoffset,
int32 slicelength,
struct varlena *result);
/* ------------------------------------------------------------------------
* Planner related functions.
@ -1620,6 +1631,41 @@ table_relation_toast_am(Relation rel)
return rel->rd_tableam->relation_toast_am(rel);
}
/*
* Fetch all or part of a TOAST value from a TOAST table.
*
* If this AM is never used to implement a TOAST table, then this callback
* is not needed. But, if toasted values are ever stored in a table of this
* type, then you will need this callback.
*
* toastrel is the relation in which the toasted value is stored.
*
* valueid identifes which toast value is to be fetched. For the heap,
* this corresponds to the values stored in the chunk_id column.
*
* attrsize is the total size of the toast value to be fetched.
*
* sliceoffset is the offset within the toast value of the first byte that
* should be fetched.
*
* slicelength is the number of bytes from the toast value that should be
* fetched.
*
* result is caller-allocated space into which the fetched bytes should be
* stored.
*/
static inline void
table_relation_fetch_toast_slice(Relation toastrel, Oid valueid,
int32 attrsize, int32 sliceoffset,
int32 slicelength, struct varlena *result)
{
return toastrel->rd_tableam->relation_fetch_toast_slice(toastrel, valueid,
attrsize,
sliceoffset,
slicelength,
result);
}
/* ----------------------------------------------------------------------------
* Planner related functionality