postgresql/src/include/access/toast_helper.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

117 lines
3.7 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* toast_helper.h
* Helper functions for table AMs implementing compressed or
* out-of-line storage of varlena attributes.
*
* Copyright (c) 2000-2023, PostgreSQL Global Development Group
*
* src/include/access/toast_helper.h
*
*-------------------------------------------------------------------------
*/
#ifndef TOAST_HELPER_H
#define TOAST_HELPER_H
#include "utils/rel.h"
/*
* Information about one column of a tuple being toasted.
*
* NOTE: toast_action[i] can have these values:
* ' ' default handling
* TYPSTORAGE_PLAIN already processed --- don't touch it
* TYPSTORAGE_EXTENDED incompressible, but OK to move off
*
* NOTE: toast_attr[i].tai_size is only made valid for varlena attributes with
* toast_action[i] different from TYPSTORAGE_PLAIN.
*/
typedef struct
{
struct varlena *tai_oldexternal;
int32 tai_size;
uint8 tai_colflags;
Allow configurable LZ4 TOAST compression. There is now a per-column COMPRESSION option which can be set to pglz (the default, and the only option in up until now) or lz4. Or, if you like, you can set the new default_toast_compression GUC to lz4, and then that will be the default for new table columns for which no value is specified. We don't have lz4 support in the PostgreSQL code, so to use lz4 compression, PostgreSQL must be built --with-lz4. In general, TOAST compression means compression of individual column values, not the whole tuple, and those values can either be compressed inline within the tuple or compressed and then stored externally in the TOAST table, so those properties also apply to this feature. Prior to this commit, a TOAST pointer has two unused bits as part of the va_extsize field, and a compessed datum has two unused bits as part of the va_rawsize field. These bits are unused because the length of a varlena is limited to 1GB; we now use them to indicate the compression type that was used. This means we only have bit space for 2 more built-in compresison types, but we could work around that problem, if necessary, by introducing a new vartag_external value for any further types we end up wanting to add. Hopefully, it won't be too important to offer a wide selection of algorithms here, since each one we add not only takes more coding but also adds a build dependency for every packager. Nevertheless, it seems worth doing at least this much, because LZ4 gets better compression than PGLZ with less CPU usage. It's possible for LZ4-compressed datums to leak into composite type values stored on disk, just as it is for PGLZ. It's also possible for LZ4-compressed attributes to be copied into a different table via SQL commands such as CREATE TABLE AS or INSERT .. SELECT. It would be expensive to force such values to be decompressed, so PostgreSQL has never done so. For the same reasons, we also don't force recompression of already-compressed values even if the target table prefers a different compression method than was used for the source data. These architectural decisions are perhaps arguable but revisiting them is well beyond the scope of what seemed possible to do as part of this project. However, it's relatively cheap to recompress as part of VACUUM FULL or CLUSTER, so this commit adjusts those commands to do so, if the configured compression method of the table happens not to match what was used for some column value stored therein. Dilip Kumar. The original patches on which this work was based were written by Ildus Kurbangaliev, and those were patches were based on even earlier work by Nikita Glukhov, but the design has since changed very substantially, since allow a potentially large number of compression methods that could be added and dropped on a running system proved too problematic given some of the architectural issues mentioned above; the choice of which specific compression method to add first is now different; and a lot of the code has been heavily refactored. More recently, Justin Przyby helped quite a bit with testing and reviewing and this version also includes some code contributions from him. Other design input and review from Tomas Vondra, Álvaro Herrera, Andres Freund, Oleg Bartunov, Alexander Korotkov, and me. Discussion: http://postgr.es/m/20170907194236.4cefce96%40wp.localdomain Discussion: http://postgr.es/m/CAFiTN-uUpX3ck%3DK0mLEk-G_kUQY%3DSNOTeqdaNRR9FMdQrHKebw%40mail.gmail.com
2021-03-19 20:10:38 +01:00
char tai_compression;
} ToastAttrInfo;
/*
* Information about one tuple being toasted.
*/
typedef struct
{
/*
* Before calling toast_tuple_init, the caller must initialize the
* following fields. Each array must have a length equal to
* ttc_rel->rd_att->natts. The ttc_oldvalues and ttc_oldisnull fields
* should be NULL in the case of an insert.
*/
Relation ttc_rel; /* the relation that contains the tuple */
Datum *ttc_values; /* values from the tuple columns */
bool *ttc_isnull; /* null flags for the tuple columns */
Datum *ttc_oldvalues; /* values from previous tuple */
bool *ttc_oldisnull; /* null flags from previous tuple */
/*
* Before calling toast_tuple_init, the caller should set ttc_attr to
* point to an array of ToastAttrInfo structures of a length equal to
* ttc_rel->rd_att->natts. The contents of the array need not be
* initialized. ttc_flags also does not need to be initialized.
*/
uint8 ttc_flags;
ToastAttrInfo *ttc_attr;
} ToastTupleContext;
/*
* Flags indicating the overall state of a TOAST operation.
*
* TOAST_NEEDS_DELETE_OLD indicates that one or more old TOAST datums need
* to be deleted.
*
* TOAST_NEEDS_FREE indicates that one or more TOAST values need to be freed.
*
* TOAST_HAS_NULLS indicates that nulls were found in the tuple being toasted.
*
* TOAST_NEEDS_CHANGE indicates that a new tuple needs to built; in other
* words, the toaster did something.
*/
#define TOAST_NEEDS_DELETE_OLD 0x0001
#define TOAST_NEEDS_FREE 0x0002
#define TOAST_HAS_NULLS 0x0004
#define TOAST_NEEDS_CHANGE 0x0008
/*
* Flags indicating the status of a TOAST operation with respect to a
* particular column.
*
* TOASTCOL_NEEDS_DELETE_OLD indicates that the old TOAST datums for this
* column need to be deleted.
*
* TOASTCOL_NEEDS_FREE indicates that the value for this column needs to
* be freed.
*
* TOASTCOL_IGNORE indicates that the toaster should not further process
* this column.
*
* TOASTCOL_INCOMPRESSIBLE indicates that this column has been found to
* be incompressible, but could be moved out-of-line.
*/
#define TOASTCOL_NEEDS_DELETE_OLD TOAST_NEEDS_DELETE_OLD
#define TOASTCOL_NEEDS_FREE TOAST_NEEDS_FREE
#define TOASTCOL_IGNORE 0x0010
#define TOASTCOL_INCOMPRESSIBLE 0x0020
extern void toast_tuple_init(ToastTupleContext *ttc);
extern int toast_tuple_find_biggest_attribute(ToastTupleContext *ttc,
bool for_compression,
bool check_main);
extern void toast_tuple_try_compression(ToastTupleContext *ttc, int attribute);
extern void toast_tuple_externalize(ToastTupleContext *ttc, int attribute,
int options);
extern void toast_tuple_cleanup(ToastTupleContext *ttc);
extern void toast_delete_external(Relation rel, Datum *values, bool *isnull,
bool is_speculative);
#endif