From bc4de01db3a210e988dc88b585d7c38e6e7054c7 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 9 Feb 2015 12:30:52 -0500 Subject: [PATCH] Minor cleanup/code review for "indirect toast" stuff. Fix some issues I noticed while fooling with an extension to allow an additional kind of toast pointer. Much of this is just comment improvement, but there are a couple of actual bugs, which might or might not be reachable today depending on what can happen during logical decoding. An example is that toast_flatten_tuple() failed to cover the possibility of an indirection pointer in its input. Back-patch to 9.4 just in case that is reachable now. In HEAD, also correct some really minor issues with recent compression reorganization, such as dangerously underparenthesized macros. --- src/backend/access/heap/tuptoaster.c | 65 +++++++++++++++++++--------- src/include/access/tuptoaster.h | 6 +-- src/include/postgres.h | 37 ++++++++-------- 3 files changed, 66 insertions(+), 42 deletions(-) diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index d19e7220c5..f8c1401d7f 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -50,7 +50,7 @@ */ typedef struct toast_compress_header { - int32 vl_len_; /* varlena header (do not touch directly!) */ + int32 vl_len_; /* varlena header (do not touch directly!) */ int32 rawsize; } toast_compress_header; @@ -58,12 +58,12 @@ typedef struct toast_compress_header * Utilities for manipulation of header information for compressed * toast entries. */ -#define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header)) -#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) ptr)->rawsize) +#define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header)) +#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize) #define TOAST_COMPRESS_RAWDATA(ptr) \ - (((char *) ptr) + TOAST_COMPRESS_HDRSZ) + (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ) #define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \ - (((toast_compress_header *) ptr)->rawsize = len) + (((toast_compress_header *) (ptr))->rawsize = (len)) static void toast_delete_datum(Relation rel, Datum value); static Datum toast_save_datum(Relation rel, Datum value, @@ -90,8 +90,9 @@ static void toast_close_indexes(Relation *toastidxs, int num_indexes, * * This will return a datum that contains all the data internally, ie, not * relying on external storage or memory, but it can still be compressed or - * have a short header. - ---------- + * have a short header. Note some callers assume that if the input is an + * EXTERNAL datum, the result will be a pfree'able chunk. + * ---------- */ struct varlena * heap_tuple_fetch_attr(struct varlena * attr) @@ -108,9 +109,7 @@ heap_tuple_fetch_attr(struct varlena * attr) else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { /* - * copy into the caller's memory context. That's not required in all - * cases but sufficient for now since this is mainly used when we need - * to persist a Datum for unusually long time, like in a HOLD cursor. + * This is an indirect pointer --- dereference it */ struct varatt_indirect redirect; @@ -120,11 +119,14 @@ heap_tuple_fetch_attr(struct varlena * attr) /* nested indirect Datums aren't allowed */ Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); - /* doesn't make much sense, but better handle it */ - if (VARATT_IS_EXTERNAL_ONDISK(attr)) + /* recurse if value is still external in some other way */ + if (VARATT_IS_EXTERNAL(attr)) return heap_tuple_fetch_attr(attr); - /* copy datum verbatim */ + /* + * Copy into the caller's memory context, in case caller tries to + * pfree the result. + */ result = (struct varlena *) palloc(VARSIZE_ANY(attr)); memcpy(result, attr, VARSIZE_ANY(attr)); } @@ -144,7 +146,10 @@ heap_tuple_fetch_attr(struct varlena * attr) * heap_tuple_untoast_attr - * * Public entry point to get back a toasted value from compression - * or external storage. + * or external storage. The result is always non-extended varlena form. + * + * Note some callers assume that if the input is an EXTERNAL or COMPRESSED + * datum, the result will be a pfree'able chunk. * ---------- */ struct varlena * @@ -160,12 +165,16 @@ heap_tuple_untoast_attr(struct varlena * attr) if (VARATT_IS_COMPRESSED(attr)) { struct varlena *tmp = attr; + attr = toast_decompress_datum(tmp); pfree(tmp); } } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { + /* + * This is an indirect pointer --- dereference it + */ struct varatt_indirect redirect; VARATT_EXTERNAL_GET_POINTER(redirect, attr); @@ -174,7 +183,18 @@ heap_tuple_untoast_attr(struct varlena * attr) /* nested indirect Datums aren't allowed */ Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); + /* recurse in case value is still extended in some other way */ attr = heap_tuple_untoast_attr(attr); + + /* if it isn't, we'd better copy it */ + if (attr == (struct varlena *) redirect.pointer) + { + struct varlena *result; + + result = (struct varlena *) palloc(VARSIZE_ANY(attr)); + memcpy(result, attr, VARSIZE_ANY(attr)); + attr = result; + } } else if (VARATT_IS_COMPRESSED(attr)) { @@ -246,9 +266,12 @@ heap_tuple_untoast_attr_slice(struct varlena * attr, else preslice = attr; + Assert(!VARATT_IS_EXTERNAL(preslice)); + if (VARATT_IS_COMPRESSED(preslice)) { struct varlena *tmp = preslice; + preslice = toast_decompress_datum(tmp); if (tmp != attr) @@ -448,8 +471,6 @@ toast_delete(Relation rel, HeapTuple oldtup) continue; else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value))) toast_delete_datum(rel, value); - else if (VARATT_IS_EXTERNAL_INDIRECT(PointerGetDatum(value))) - elog(ERROR, "attempt to delete tuple containing indirect datums"); } } } @@ -611,7 +632,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, /* * We took care of UPDATE above, so any external value we find - * still in the tuple must be someone else's we cannot reuse. + * still in the tuple must be someone else's that we cannot reuse + * (this includes the case of an out-of-line in-memory datum). * Fetch it back (without decompression, unless we are forcing * PLAIN storage). If necessary, we'll push it out as a new * external value below. @@ -1043,7 +1065,7 @@ toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc) new_value = (struct varlena *) DatumGetPointer(toast_values[i]); if (VARATT_IS_EXTERNAL(new_value)) { - new_value = toast_fetch_datum(new_value); + new_value = heap_tuple_fetch_attr(new_value); toast_values[i] = PointerGetDatum(new_value); toast_free[i] = true; } @@ -1746,8 +1768,8 @@ toast_fetch_datum(struct varlena * attr) int num_indexes; int validIndex; - if (VARATT_IS_EXTERNAL_INDIRECT(attr)) - elog(ERROR, "shouldn't be called for indirect tuples"); + if (!VARATT_IS_EXTERNAL_ONDISK(attr)) + elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums"); /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); @@ -1923,7 +1945,8 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length) int num_indexes; int validIndex; - Assert(VARATT_IS_EXTERNAL_ONDISK(attr)); + if (!VARATT_IS_EXTERNAL_ONDISK(attr)) + elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums"); /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h index 1c4e96ea51..331dd259cc 100644 --- a/src/include/access/tuptoaster.h +++ b/src/include/access/tuptoaster.h @@ -96,10 +96,10 @@ VARHDRSZ) /* Size of an EXTERNAL datum that contains a standard TOAST pointer */ -#define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_external)) +#define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_external)) -/* Size of an indirect datum that contains a standard TOAST pointer */ -#define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_indirect)) +/* Size of an EXTERNAL datum that contains an indirection pointer */ +#define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_indirect)) /* * Testing whether an externally-stored value is compressed now requires diff --git a/src/include/postgres.h b/src/include/postgres.h index dfce01bf4b..082c75b093 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -54,11 +54,11 @@ */ /* - * struct varatt_external is a "TOAST pointer", that is, the information needed - * to fetch a Datum stored in an out-of-line on-disk Datum. The data is - * compressed if and only if va_extsize < va_rawsize - VARHDRSZ. This struct - * must not contain any padding, because we sometimes compare pointers using - * memcmp. + * struct varatt_external is a traditional "TOAST pointer", that is, the + * information needed to fetch a Datum stored out-of-line in a TOAST table. + * The data is compressed if and only if va_extsize < va_rawsize - VARHDRSZ. + * This struct must not contain any padding, because we sometimes compare + * these pointers using memcmp. * * Note that this information is stored unaligned within actual tuples, so * you need to memcpy from the tuple into a local struct variable before @@ -74,22 +74,23 @@ typedef struct varatt_external } varatt_external; /* - * Out-of-line Datum thats stored in memory in contrast to varatt_external - * pointers which points to data in an external toast relation. + * struct varatt_indirect is a "TOAST pointer" representing an out-of-line + * Datum that's stored in memory, not in an external toast relation. + * The creator of such a Datum is entirely responsible that the referenced + * storage survives for as long as referencing pointer Datums can exist. * - * Note that just as varatt_external's this is stored unaligned within the - * tuple. + * Note that just as for struct varatt_external, this struct is stored + * unaligned within any containing tuple. */ typedef struct varatt_indirect { struct varlena *pointer; /* Pointer to in-memory varlena */ } varatt_indirect; - /* - * Type of external toast datum stored. The peculiar value for VARTAG_ONDISK - * comes from the requirement for on-disk compatibility with the older - * definitions of varattrib_1b_e where v_tag was named va_len_1be... + * Type tag for the various sorts of "TOAST pointer" datums. The peculiar + * value for VARTAG_ONDISK comes from a requirement for on-disk compatibility + * with a previous notion that the tag field was the pointer datum's length. */ typedef enum vartag_external { @@ -98,9 +99,9 @@ typedef enum vartag_external } vartag_external; #define VARTAG_SIZE(tag) \ - ((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \ + ((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \ (tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \ - TrapMacro(true, "unknown vartag")) + TrapMacro(true, "unrecognized TOAST vartag")) /* * These structs describe the header of a varlena object that may have been @@ -132,7 +133,7 @@ typedef struct char va_data[1]; /* Data begins here */ } varattrib_1b; -/* inline portion of a short varlena pointing to an external resource */ +/* TOAST pointers are a subset of varattrib_1b with an identifying tag byte */ typedef struct { uint8 va_header; /* Always 0x80 or 0x01 */ @@ -162,8 +163,8 @@ typedef struct * this lets us disambiguate alignment padding bytes from the start of an * unaligned datum. (We now *require* pad bytes to be filled with zero!) * - * In TOAST datums the tag field in varattrib_1b_e is used to discern whether - * its an indirection pointer or more commonly an on-disk tuple. + * In TOAST pointers the va_tag field (see varattrib_1b_e) is used to discern + * the specific type and length of the pointer datum. */ /*