1996-08-27 23:50:29 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* itup.h
|
1997-09-07 07:04:48 +02:00
|
|
|
* POSTGRES index tuple definitions.
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
|
|
|
*
|
2017-01-03 19:48:53 +01:00
|
|
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/access/itup.h
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef ITUP_H
|
|
|
|
#define ITUP_H
|
|
|
|
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "access/tupdesc.h"
|
1999-07-16 19:07:40 +02:00
|
|
|
#include "access/tupmacs.h"
|
2006-07-13 18:49:20 +02:00
|
|
|
#include "storage/bufpage.h"
|
1999-07-16 01:04:24 +02:00
|
|
|
#include "storage/itemptr.h"
|
1996-08-27 23:50:29 +02:00
|
|
|
|
2005-03-27 20:38:27 +02:00
|
|
|
/*
|
|
|
|
* Index tuple header structure
|
|
|
|
*
|
2014-05-06 18:12:18 +02:00
|
|
|
* All index tuples start with IndexTupleData. If the HasNulls bit is set,
|
2005-03-27 20:38:27 +02:00
|
|
|
* this is followed by an IndexAttributeBitMapData. The index attribute
|
|
|
|
* values follow, beginning at a MAXALIGN boundary.
|
|
|
|
*
|
|
|
|
* Note that the space allocated for the bitmap does not vary with the number
|
|
|
|
* of attributes; that is because we don't have room to store the number of
|
|
|
|
* attributes in the header. Given the MAXALIGN constraint there's no space
|
|
|
|
* savings to be had anyway, for usual values of INDEX_MAX_KEYS.
|
|
|
|
*/
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
typedef struct IndexTupleData
|
|
|
|
{
|
2001-02-21 20:07:04 +01:00
|
|
|
ItemPointerData t_tid; /* reference TID to heap tuple */
|
1996-08-27 23:50:29 +02:00
|
|
|
|
2001-02-22 22:48:49 +01:00
|
|
|
/* ---------------
|
2012-04-24 04:43:09 +02:00
|
|
|
* t_info is laid out in the following fashion:
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
2001-02-22 22:48:49 +01:00
|
|
|
* 15th (high) bit: has nulls
|
2002-08-25 19:20:01 +02:00
|
|
|
* 14th bit: has var-width attributes
|
2001-02-22 22:48:49 +01:00
|
|
|
* 13th bit: unused
|
|
|
|
* 12-0 bit: size of tuple
|
|
|
|
* ---------------
|
1997-09-07 07:04:48 +02:00
|
|
|
*/
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1997-09-08 04:41:22 +02:00
|
|
|
unsigned short t_info; /* various info about tuple */
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1997-09-08 23:56:23 +02:00
|
|
|
} IndexTupleData; /* MORE DATA FOLLOWS AT END OF STRUCT */
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
typedef IndexTupleData *IndexTuple;
|
1996-08-27 23:50:29 +02:00
|
|
|
|
2005-03-27 20:38:27 +02:00
|
|
|
typedef struct IndexAttributeBitMapData
|
|
|
|
{
|
|
|
|
bits8 bits[(INDEX_MAX_KEYS + 8 - 1) / 8];
|
2017-06-21 20:39:04 +02:00
|
|
|
} IndexAttributeBitMapData;
|
2005-03-27 20:38:27 +02:00
|
|
|
|
2017-06-21 20:39:04 +02:00
|
|
|
typedef IndexAttributeBitMapData * IndexAttributeBitMap;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2005-03-27 20:38:27 +02:00
|
|
|
/*
|
|
|
|
* t_info manipulation macros
|
1996-08-27 23:50:29 +02:00
|
|
|
*/
|
|
|
|
#define INDEX_SIZE_MASK 0x1FFF
|
Improve hash index bucket split behavior.
Previously, the right to split a bucket was represented by a
heavyweight lock on the page number of the primary bucket page.
Unfortunately, this meant that every scan needed to take a heavyweight
lock on that bucket also, which was bad for concurrency. Instead, use
a cleanup lock on the primary bucket page to indicate the right to
begin a split, so that scans only need to retain a pin on that page,
which is they would have to acquire anyway, and which is also much
cheaper.
In addition to reducing the locking cost, this also avoids locking out
scans and inserts for the entire lifetime of the split: while the new
bucket is being populated with copies of the appropriate tuples from
the old bucket, scans and inserts can happen in parallel. There are
minor concurrency improvements for vacuum operations as well, though
the situation there is still far from ideal.
This patch also removes the unworldly assumption that a split will
never be interrupted. With the new code, a split is done in a series
of small steps and the system can pick up where it left off if it is
interrupted prior to completion. While this patch does not itself add
write-ahead logging for hash indexes, it is clearly a necessary first
step, since one of the things that could interrupt a split is the
removal of electrical power from the machine performing it.
Amit Kapila. I wrote the original design on which this patch is
based, and did a good bit of work on the comments and README through
multiple rounds of review, but all of the code is Amit's. Also
reviewed by Jesper Pedersen, Jeff Janes, and others.
Discussion: http://postgr.es/m/CAA4eK1LfzcZYxLoXS874Ad0+S-ZM60U9bwcyiUZx9mHZ-KCWhw@mail.gmail.com
2016-11-30 21:39:21 +01:00
|
|
|
/* bit 0x2000 is reserved for index-AM specific usage */
|
1997-09-07 07:04:48 +02:00
|
|
|
#define INDEX_VAR_MASK 0x4000
|
2005-03-27 20:38:27 +02:00
|
|
|
#define INDEX_NULL_MASK 0x8000
|
1996-08-27 23:50:29 +02:00
|
|
|
|
2001-02-22 22:48:49 +01:00
|
|
|
#define IndexTupleSize(itup) ((Size) (((IndexTuple) (itup))->t_info & INDEX_SIZE_MASK))
|
|
|
|
#define IndexTupleDSize(itup) ((Size) ((itup).t_info & INDEX_SIZE_MASK))
|
2001-03-22 05:01:46 +01:00
|
|
|
#define IndexTupleHasNulls(itup) ((((IndexTuple) (itup))->t_info & INDEX_NULL_MASK))
|
2002-08-25 19:20:01 +02:00
|
|
|
#define IndexTupleHasVarwidths(itup) ((((IndexTuple) (itup))->t_info & INDEX_VAR_MASK))
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
|
1998-01-31 05:39:26 +01:00
|
|
|
/*
|
|
|
|
* Takes an infomask as argument (primarily because this needs to be usable
|
2005-03-21 02:24:04 +01:00
|
|
|
* at index_form_tuple time so enough space is allocated).
|
1998-01-31 05:39:26 +01:00
|
|
|
*/
|
|
|
|
#define IndexInfoFindDataOffset(t_info) \
|
|
|
|
( \
|
2005-03-27 20:38:27 +02:00
|
|
|
(!((t_info) & INDEX_NULL_MASK)) ? \
|
1998-01-31 05:39:26 +01:00
|
|
|
( \
|
2005-03-27 20:38:27 +02:00
|
|
|
(Size)MAXALIGN(sizeof(IndexTupleData)) \
|
1998-01-31 05:39:26 +01:00
|
|
|
) \
|
|
|
|
: \
|
|
|
|
( \
|
1999-07-19 09:07:29 +02:00
|
|
|
(Size)MAXALIGN(sizeof(IndexTupleData) + sizeof(IndexAttributeBitMapData)) \
|
1998-01-31 05:39:26 +01:00
|
|
|
) \
|
|
|
|
)
|
1996-08-27 23:50:29 +02:00
|
|
|
|
1998-01-31 05:39:26 +01:00
|
|
|
/* ----------------
|
|
|
|
* index_getattr
|
|
|
|
*
|
|
|
|
* This gets called many times, so we macro the cacheable and NULL
|
2005-03-27 20:38:27 +02:00
|
|
|
* lookups, and call nocache_index_getattr() for the rest.
|
1998-01-31 05:39:26 +01:00
|
|
|
*
|
|
|
|
* ----------------
|
|
|
|
*/
|
|
|
|
#define index_getattr(tup, attnum, tupleDesc, isnull) \
|
|
|
|
( \
|
1998-06-15 20:40:05 +02:00
|
|
|
AssertMacro(PointerIsValid(isnull) && (attnum) > 0), \
|
|
|
|
*(isnull) = false, \
|
2001-02-22 22:48:49 +01:00
|
|
|
!IndexTupleHasNulls(tup) ? \
|
1998-01-31 05:39:26 +01:00
|
|
|
( \
|
2000-11-30 19:38:47 +01:00
|
|
|
(tupleDesc)->attrs[(attnum)-1]->attcacheoff >= 0 ? \
|
1998-01-31 05:39:26 +01:00
|
|
|
( \
|
2000-12-28 00:59:14 +01:00
|
|
|
fetchatt((tupleDesc)->attrs[(attnum)-1], \
|
2005-03-27 20:38:27 +02:00
|
|
|
(char *) (tup) + IndexInfoFindDataOffset((tup)->t_info) \
|
2000-11-30 19:38:47 +01:00
|
|
|
+ (tupleDesc)->attrs[(attnum)-1]->attcacheoff) \
|
1998-01-31 05:39:26 +01:00
|
|
|
) \
|
|
|
|
: \
|
2010-01-10 05:26:36 +01:00
|
|
|
nocache_index_getattr((tup), (attnum), (tupleDesc)) \
|
1998-01-31 05:39:26 +01:00
|
|
|
) \
|
|
|
|
: \
|
|
|
|
( \
|
2005-03-27 20:38:27 +02:00
|
|
|
(att_isnull((attnum)-1, (char *)(tup) + sizeof(IndexTupleData))) ? \
|
1998-06-15 20:40:05 +02:00
|
|
|
( \
|
|
|
|
*(isnull) = true, \
|
|
|
|
(Datum)NULL \
|
|
|
|
) \
|
|
|
|
: \
|
|
|
|
( \
|
2010-01-10 05:26:36 +01:00
|
|
|
nocache_index_getattr((tup), (attnum), (tupleDesc)) \
|
1998-06-15 20:40:05 +02:00
|
|
|
) \
|
1998-01-31 05:39:26 +01:00
|
|
|
) \
|
|
|
|
)
|
|
|
|
|
2006-05-07 03:21:30 +02:00
|
|
|
/*
|
|
|
|
* MaxIndexTuplesPerPage is an upper bound on the number of tuples that can
|
|
|
|
* fit on one index page. An index tuple must have either data or a null
|
|
|
|
* bitmap, so we can safely assume it's at least 1 byte bigger than a bare
|
|
|
|
* IndexTupleData struct. We arrive at the divisor because each tuple
|
|
|
|
* must be maxaligned, and it must have an associated item pointer.
|
|
|
|
*/
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
#define MinIndexTupleSize MAXALIGN(sizeof(IndexTupleData) + 1)
|
2006-05-07 03:21:30 +02:00
|
|
|
#define MaxIndexTuplesPerPage \
|
2008-07-13 22:45:47 +02:00
|
|
|
((int) ((BLCKSZ - SizeOfPageHeaderData) / \
|
2006-05-07 03:21:30 +02:00
|
|
|
(MAXALIGN(sizeof(IndexTupleData) + 1) + sizeof(ItemIdData))))
|
|
|
|
|
1998-02-26 05:46:47 +01:00
|
|
|
|
2003-02-23 07:17:13 +01:00
|
|
|
/* routines in indextuple.c */
|
2005-03-21 02:24:04 +01:00
|
|
|
extern IndexTuple index_form_tuple(TupleDesc tupleDescriptor,
|
2005-10-15 04:49:52 +02:00
|
|
|
Datum *values, bool *isnull);
|
1998-09-01 06:40:42 +02:00
|
|
|
extern Datum nocache_index_getattr(IndexTuple tup, int attnum,
|
2010-01-10 05:26:36 +01:00
|
|
|
TupleDesc tupleDesc);
|
2009-08-01 21:59:41 +02:00
|
|
|
extern void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor,
|
2010-02-26 03:01:40 +01:00
|
|
|
Datum *values, bool *isnull);
|
2003-02-23 07:17:13 +01:00
|
|
|
extern IndexTuple CopyIndexTuple(IndexTuple source);
|
1996-08-27 23:50:29 +02:00
|
|
|
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
#endif /* ITUP_H */
|