postgresql/src/include/access/brin_xlog.h

110 lines
2.8 KiB
C
Raw Normal View History

BRIN: Block Range Indexes BRIN is a new index access method intended to accelerate scans of very large tables, without the maintenance overhead of btrees or other traditional indexes. They work by maintaining "summary" data about block ranges. Bitmap index scans work by reading each summary tuple and comparing them with the query quals; all pages in the range are returned in a lossy TID bitmap if the quals are consistent with the values in the summary tuple, otherwise not. Normal index scans are not supported because these indexes do not store TIDs. As new tuples are added into the index, the summary information is updated (if the block range in which the tuple is added is already summarized) or not; in the latter case, a subsequent pass of VACUUM or the brin_summarize_new_values() function will create the summary information. For data types with natural 1-D sort orders, the summary info consists of the maximum and the minimum values of each indexed column within each page range. This type of operator class we call "Minmax", and we supply a bunch of them for most data types with B-tree opclasses. Since the BRIN code is generalized, other approaches are possible for things such as arrays, geometric types, ranges, etc; even for things such as enum types we could do something different than minmax with better results. In this commit I only include minmax. Catalog version bumped due to new builtin catalog entries. There's more that could be done here, but this is a good step forwards. Loosely based on ideas from Simon Riggs; code mostly by Álvaro Herrera, with contribution by Heikki Linnakangas. Patch reviewed by: Amit Kapila, Heikki Linnakangas, Robert Haas. Testing help from Jeff Janes, Erik Rijkers, Emanuel Calvo. PS: The research leading to these results has received funding from the European Union's Seventh Framework Programme (FP7/2007-2013) under grant agreement n° 318633.
2014-11-07 20:38:14 +01:00
/*-------------------------------------------------------------------------
*
* brin_xlog.h
* POSTGRES BRIN access XLOG definitions.
*
*
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/access/brin_xlog.h
*
*-------------------------------------------------------------------------
*/
#ifndef BRIN_XLOG_H
#define BRIN_XLOG_H
#include "access/xlogrecord.h"
#include "lib/stringinfo.h"
#include "storage/bufpage.h"
#include "storage/itemptr.h"
#include "storage/relfilenode.h"
#include "utils/relcache.h"
/*
* WAL record definitions for BRIN's WAL operations
*
* XLOG allows to store some information in high 4 bits of log
* record xl_info field.
*/
#define XLOG_BRIN_CREATE_INDEX 0x00
#define XLOG_BRIN_INSERT 0x10
#define XLOG_BRIN_UPDATE 0x20
#define XLOG_BRIN_SAMEPAGE_UPDATE 0x30
#define XLOG_BRIN_REVMAP_EXTEND 0x40
#define XLOG_BRIN_REVMAP_VACUUM 0x50
#define XLOG_BRIN_OPMASK 0x70
/*
* When we insert the first item on a new page, we restore the entire page in
* redo.
*/
#define XLOG_BRIN_INIT_PAGE 0x80
/* This is what we need to know about a BRIN index create */
typedef struct xl_brin_createidx
{
BlockNumber pagesPerRange;
RelFileNode node;
uint16 version;
} xl_brin_createidx;
#define SizeOfBrinCreateIdx (offsetof(xl_brin_createidx, version) + sizeof(uint16))
/*
* This is what we need to know about a BRIN tuple insert
*/
typedef struct xl_brin_insert
{
RelFileNode node;
BlockNumber heapBlk;
/* extra information needed to update the revmap */
BlockNumber revmapBlk;
BlockNumber pagesPerRange;
uint16 tuplen;
ItemPointerData tid;
/* tuple data follows at end of struct */
} xl_brin_insert;
#define SizeOfBrinInsert (offsetof(xl_brin_insert, tid) + sizeof(ItemPointerData))
/*
* A cross-page update is the same as an insert, but also store the old tid.
*/
typedef struct xl_brin_update
{
ItemPointerData oldtid;
xl_brin_insert insert;
BRIN: Block Range Indexes BRIN is a new index access method intended to accelerate scans of very large tables, without the maintenance overhead of btrees or other traditional indexes. They work by maintaining "summary" data about block ranges. Bitmap index scans work by reading each summary tuple and comparing them with the query quals; all pages in the range are returned in a lossy TID bitmap if the quals are consistent with the values in the summary tuple, otherwise not. Normal index scans are not supported because these indexes do not store TIDs. As new tuples are added into the index, the summary information is updated (if the block range in which the tuple is added is already summarized) or not; in the latter case, a subsequent pass of VACUUM or the brin_summarize_new_values() function will create the summary information. For data types with natural 1-D sort orders, the summary info consists of the maximum and the minimum values of each indexed column within each page range. This type of operator class we call "Minmax", and we supply a bunch of them for most data types with B-tree opclasses. Since the BRIN code is generalized, other approaches are possible for things such as arrays, geometric types, ranges, etc; even for things such as enum types we could do something different than minmax with better results. In this commit I only include minmax. Catalog version bumped due to new builtin catalog entries. There's more that could be done here, but this is a good step forwards. Loosely based on ideas from Simon Riggs; code mostly by Álvaro Herrera, with contribution by Heikki Linnakangas. Patch reviewed by: Amit Kapila, Heikki Linnakangas, Robert Haas. Testing help from Jeff Janes, Erik Rijkers, Emanuel Calvo. PS: The research leading to these results has received funding from the European Union's Seventh Framework Programme (FP7/2007-2013) under grant agreement n° 318633.
2014-11-07 20:38:14 +01:00
} xl_brin_update;
#define SizeOfBrinUpdate (offsetof(xl_brin_update, insert) + SizeOfBrinInsert)
BRIN: Block Range Indexes BRIN is a new index access method intended to accelerate scans of very large tables, without the maintenance overhead of btrees or other traditional indexes. They work by maintaining "summary" data about block ranges. Bitmap index scans work by reading each summary tuple and comparing them with the query quals; all pages in the range are returned in a lossy TID bitmap if the quals are consistent with the values in the summary tuple, otherwise not. Normal index scans are not supported because these indexes do not store TIDs. As new tuples are added into the index, the summary information is updated (if the block range in which the tuple is added is already summarized) or not; in the latter case, a subsequent pass of VACUUM or the brin_summarize_new_values() function will create the summary information. For data types with natural 1-D sort orders, the summary info consists of the maximum and the minimum values of each indexed column within each page range. This type of operator class we call "Minmax", and we supply a bunch of them for most data types with B-tree opclasses. Since the BRIN code is generalized, other approaches are possible for things such as arrays, geometric types, ranges, etc; even for things such as enum types we could do something different than minmax with better results. In this commit I only include minmax. Catalog version bumped due to new builtin catalog entries. There's more that could be done here, but this is a good step forwards. Loosely based on ideas from Simon Riggs; code mostly by Álvaro Herrera, with contribution by Heikki Linnakangas. Patch reviewed by: Amit Kapila, Heikki Linnakangas, Robert Haas. Testing help from Jeff Janes, Erik Rijkers, Emanuel Calvo. PS: The research leading to these results has received funding from the European Union's Seventh Framework Programme (FP7/2007-2013) under grant agreement n° 318633.
2014-11-07 20:38:14 +01:00
/* This is what we need to know about a BRIN tuple samepage update */
typedef struct xl_brin_samepage_update
{
RelFileNode node;
ItemPointerData tid;
/* tuple data follows at end of struct */
} xl_brin_samepage_update;
#define SizeOfBrinSamepageUpdate (offsetof(xl_brin_samepage_update, tid) + sizeof(ItemPointerData))
/* This is what we need to know about a revmap extension */
typedef struct xl_brin_revmap_extend
{
RelFileNode node;
BlockNumber targetBlk;
} xl_brin_revmap_extend;
#define SizeOfBrinRevmapExtend (offsetof(xl_brin_revmap_extend, targetBlk) + \
sizeof(BlockNumber))
extern void brin_desc(StringInfo buf, XLogRecord *record);
extern void brin_redo(XLogRecPtr lsn, XLogRecord *record);
extern const char *brin_identify(uint8 info);
#endif /* BRIN_XLOG_H */