2009-03-24 21:17:18 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* ginfast.c
|
|
|
|
* Fast insert routines for the Postgres inverted index access method.
|
|
|
|
* Pending entries are stored in linear list of pages. Later on
|
|
|
|
* (typically during VACUUM), ginInsertCleanup() will be invoked to
|
|
|
|
* transfer pending entries into the regular index structure. This
|
|
|
|
* wins because bulk insertion is much more efficient than retail.
|
|
|
|
*
|
2017-01-03 19:48:53 +01:00
|
|
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
2009-03-24 21:17:18 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/access/gin/ginfast.c
|
2009-03-24 21:17:18 +01:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
#include "access/gin_private.h"
|
2017-02-14 21:37:59 +01:00
|
|
|
#include "access/ginxlog.h"
|
2014-11-06 12:52:08 +01:00
|
|
|
#include "access/xloginsert.h"
|
2016-01-28 04:57:52 +01:00
|
|
|
#include "access/xlog.h"
|
2009-03-24 21:17:18 +01:00
|
|
|
#include "commands/vacuum.h"
|
2016-01-28 04:57:52 +01:00
|
|
|
#include "catalog/pg_am.h"
|
2009-03-24 21:17:18 +01:00
|
|
|
#include "miscadmin.h"
|
|
|
|
#include "utils/memutils.h"
|
2011-02-23 18:18:09 +01:00
|
|
|
#include "utils/rel.h"
|
2016-01-28 04:57:52 +01:00
|
|
|
#include "utils/acl.h"
|
2016-04-28 15:21:42 +02:00
|
|
|
#include "postmaster/autovacuum.h"
|
2015-09-07 15:24:01 +02:00
|
|
|
#include "storage/indexfsm.h"
|
2016-04-28 15:21:42 +02:00
|
|
|
#include "storage/lmgr.h"
|
2016-12-28 18:00:00 +01:00
|
|
|
#include "utils/builtins.h"
|
2009-03-24 21:17:18 +01:00
|
|
|
|
2014-11-11 13:08:21 +01:00
|
|
|
/* GUC parameter */
|
2014-11-13 04:14:48 +01:00
|
|
|
int gin_pending_list_limit = 0;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
#define GIN_PAGE_FREESIZE \
|
|
|
|
( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) )
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
typedef struct KeyArray
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
Datum *keys; /* expansible array */
|
|
|
|
GinNullCategory *categories; /* another expansible array */
|
2009-06-11 16:49:15 +02:00
|
|
|
int32 nvalues; /* current number of valid entries */
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
int32 maxvalues; /* allocated size of arrays */
|
|
|
|
} KeyArray;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build a pending-list page from the given array of tuples, and write it out.
|
2009-09-15 22:31:30 +02:00
|
|
|
*
|
|
|
|
* Returns amount of free space left on the page.
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
|
|
|
static int32
|
|
|
|
writeListPage(Relation index, Buffer buffer,
|
|
|
|
IndexTuple *tuples, int32 ntuples, BlockNumber rightlink)
|
|
|
|
{
|
2016-04-20 15:31:19 +02:00
|
|
|
Page page = BufferGetPage(buffer);
|
2009-09-15 22:31:30 +02:00
|
|
|
int32 i,
|
2009-06-11 16:49:15 +02:00
|
|
|
freesize,
|
|
|
|
size = 0;
|
|
|
|
OffsetNumber l,
|
|
|
|
off;
|
|
|
|
char *workspace;
|
|
|
|
char *ptr;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
/* workspace could be a local array; we use palloc for alignment */
|
|
|
|
workspace = palloc(BLCKSZ);
|
|
|
|
|
|
|
|
START_CRIT_SECTION();
|
|
|
|
|
|
|
|
GinInitBuffer(buffer, GIN_LIST);
|
|
|
|
|
|
|
|
off = FirstOffsetNumber;
|
|
|
|
ptr = workspace;
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
for (i = 0; i < ntuples; i++)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
int this_size = IndexTupleSize(tuples[i]);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
memcpy(ptr, tuples[i], this_size);
|
|
|
|
ptr += this_size;
|
|
|
|
size += this_size;
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
l = PageAddItem(page, (Item) tuples[i], this_size, off, false, false);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
if (l == InvalidOffsetNumber)
|
|
|
|
elog(ERROR, "failed to add item to index page in \"%s\"",
|
|
|
|
RelationGetRelationName(index));
|
|
|
|
|
|
|
|
off++;
|
|
|
|
}
|
|
|
|
|
|
|
|
Assert(size <= BLCKSZ); /* else we overran workspace */
|
|
|
|
|
|
|
|
GinPageGetOpaque(page)->rightlink = rightlink;
|
|
|
|
|
|
|
|
/*
|
2011-04-10 17:42:00 +02:00
|
|
|
* tail page may contain only whole row(s) or final part of row placed on
|
|
|
|
* previous pages (a "row" here meaning all the index tuples generated for
|
|
|
|
* one heap tuple)
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
2009-06-11 16:49:15 +02:00
|
|
|
if (rightlink == InvalidBlockNumber)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
GinPageSetFullRow(page);
|
|
|
|
GinPageGetOpaque(page)->maxoff = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
GinPageGetOpaque(page)->maxoff = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
|
2010-12-13 18:34:26 +01:00
|
|
|
if (RelationNeedsWAL(index))
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
ginxlogInsertListPage data;
|
|
|
|
XLogRecPtr recptr;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
2009-09-15 22:31:30 +02:00
|
|
|
data.rightlink = rightlink;
|
|
|
|
data.ntuples = ntuples;
|
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
XLogBeginInsert();
|
|
|
|
XLogRegisterData((char *) &data, sizeof(ginxlogInsertListPage));
|
2009-03-24 21:17:18 +01:00
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
|
|
|
|
XLogRegisterBufData(0, workspace, size);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE);
|
2009-03-24 21:17:18 +01:00
|
|
|
PageSetLSN(page, recptr);
|
|
|
|
}
|
|
|
|
|
2009-09-15 22:31:30 +02:00
|
|
|
/* get free space before releasing buffer */
|
|
|
|
freesize = PageGetExactFreeSpace(page);
|
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
UnlockReleaseBuffer(buffer);
|
|
|
|
|
|
|
|
END_CRIT_SECTION();
|
|
|
|
|
|
|
|
pfree(workspace);
|
|
|
|
|
|
|
|
return freesize;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
makeSublist(Relation index, IndexTuple *tuples, int32 ntuples,
|
|
|
|
GinMetaPageData *res)
|
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
Buffer curBuffer = InvalidBuffer;
|
|
|
|
Buffer prevBuffer = InvalidBuffer;
|
|
|
|
int i,
|
|
|
|
size = 0,
|
|
|
|
tupsize;
|
|
|
|
int startTuple = 0;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
Assert(ntuples > 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Split tuples into pages
|
|
|
|
*/
|
2009-06-11 16:49:15 +02:00
|
|
|
for (i = 0; i < ntuples; i++)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
if (curBuffer == InvalidBuffer)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
curBuffer = GinNewBuffer(index);
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (prevBuffer != InvalidBuffer)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
res->nPendingPages++;
|
|
|
|
writeListPage(index, prevBuffer,
|
2009-09-15 22:31:30 +02:00
|
|
|
tuples + startTuple,
|
|
|
|
i - startTuple,
|
2009-03-24 21:17:18 +01:00
|
|
|
BufferGetBlockNumber(curBuffer));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
res->head = BufferGetBlockNumber(curBuffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
prevBuffer = curBuffer;
|
|
|
|
startTuple = i;
|
|
|
|
size = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
tupsize = MAXALIGN(IndexTupleSize(tuples[i])) + sizeof(ItemIdData);
|
|
|
|
|
2009-09-15 22:31:30 +02:00
|
|
|
if (size + tupsize > GinListPageSize)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
/* won't fit, force a new page and reprocess */
|
|
|
|
i--;
|
|
|
|
curBuffer = InvalidBuffer;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
size += tupsize;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Write last page
|
|
|
|
*/
|
|
|
|
res->tail = BufferGetBlockNumber(curBuffer);
|
|
|
|
res->tailFreeSize = writeListPage(index, curBuffer,
|
2009-09-15 22:31:30 +02:00
|
|
|
tuples + startTuple,
|
|
|
|
ntuples - startTuple,
|
2009-03-24 21:17:18 +01:00
|
|
|
InvalidBlockNumber);
|
|
|
|
res->nPendingPages++;
|
|
|
|
/* that was only one heap tuple */
|
|
|
|
res->nPendingHeapTuples = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
* Write the index tuples contained in *collector into the index's
|
|
|
|
* pending list.
|
|
|
|
*
|
|
|
|
* Function guarantees that all these tuples will be inserted consecutively,
|
|
|
|
* preserving order
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
|
|
|
void
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
Relation index = ginstate->index;
|
2009-06-11 16:49:15 +02:00
|
|
|
Buffer metabuffer;
|
|
|
|
Page metapage;
|
|
|
|
GinMetaPageData *metadata = NULL;
|
|
|
|
Buffer buffer = InvalidBuffer;
|
|
|
|
Page page = NULL;
|
|
|
|
ginxlogUpdateMeta data;
|
|
|
|
bool separateList = false;
|
|
|
|
bool needCleanup = false;
|
2014-11-11 13:08:21 +01:00
|
|
|
int cleanupSize;
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
bool needWal;
|
2009-06-11 16:49:15 +02:00
|
|
|
|
|
|
|
if (collector->ntuples == 0)
|
2009-03-24 21:17:18 +01:00
|
|
|
return;
|
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
needWal = RelationNeedsWAL(index);
|
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
data.node = index->rd_node;
|
|
|
|
data.ntuples = 0;
|
|
|
|
data.newRightlink = data.prevTail = InvalidBlockNumber;
|
|
|
|
|
|
|
|
metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
|
2016-04-20 15:31:19 +02:00
|
|
|
metapage = BufferGetPage(metabuffer);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
2009-09-15 22:31:30 +02:00
|
|
|
if (collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GinListPageSize)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Total size is greater than one page => make sublist
|
|
|
|
*/
|
|
|
|
separateList = true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
LockBuffer(metabuffer, GIN_EXCLUSIVE);
|
|
|
|
metadata = GinPageGetMeta(metapage);
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (metadata->head == InvalidBlockNumber ||
|
|
|
|
collector->sumsize + collector->ntuples * sizeof(ItemIdData) > metadata->tailFreeSize)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Pending list is empty or total size is greater than freespace
|
|
|
|
* on tail page => make sublist
|
|
|
|
*
|
|
|
|
* We unlock metabuffer to keep high concurrency
|
|
|
|
*/
|
|
|
|
separateList = true;
|
|
|
|
LockBuffer(metabuffer, GIN_UNLOCK);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (separateList)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We should make sublist separately and append it to the tail
|
|
|
|
*/
|
2009-09-15 22:31:30 +02:00
|
|
|
GinMetaPageData sublist;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
2009-09-15 22:31:30 +02:00
|
|
|
memset(&sublist, 0, sizeof(GinMetaPageData));
|
2009-03-24 21:17:18 +01:00
|
|
|
makeSublist(index, collector->tuples, collector->ntuples, &sublist);
|
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
if (needWal)
|
|
|
|
XLogBeginInsert();
|
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
/*
|
|
|
|
* metapage was unlocked, see above
|
|
|
|
*/
|
|
|
|
LockBuffer(metabuffer, GIN_EXCLUSIVE);
|
|
|
|
metadata = GinPageGetMeta(metapage);
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (metadata->head == InvalidBlockNumber)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
/*
|
Fix multiple problems in WAL replay.
Most of the replay functions for WAL record types that modify more than
one page failed to ensure that those pages were locked correctly to ensure
that concurrent queries could not see inconsistent page states. This is
a hangover from coding decisions made long before Hot Standby was added,
when it was hardly necessary to acquire buffer locks during WAL replay
at all, let alone hold them for carefully-chosen periods.
The key problem was that RestoreBkpBlocks was written to hold lock on each
page restored from a full-page image for only as long as it took to update
that page. This was guaranteed to break any WAL replay function in which
there was any update-ordering constraint between pages, because even if the
nominal order of the pages is the right one, any mixture of full-page and
non-full-page updates in the same record would result in out-of-order
updates. Moreover, it wouldn't work for situations where there's a
requirement to maintain lock on one page while updating another. Failure
to honor an update ordering constraint in this way is thought to be the
cause of bug #7648 from Daniel Farina: what seems to have happened there
is that a btree page being split was rewritten from a full-page image
before the new right sibling page was written, and because lock on the
original page was not maintained it was possible for hot standby queries to
try to traverse the page's right-link to the not-yet-existing sibling page.
To fix, get rid of RestoreBkpBlocks as such, and instead create a new
function RestoreBackupBlock that restores just one full-page image at a
time. This function can be invoked by WAL replay functions at the points
where they would otherwise perform non-full-page updates; in this way, the
physical order of page updates remains the same no matter which pages are
replaced by full-page images. We can then further adjust the logic in
individual replay functions if it is necessary to hold buffer locks
for overlapping periods. A side benefit is that we can simplify the
handling of concurrency conflict resolution by moving that code into the
record-type-specfic functions; there's no more need to contort the code
layout to keep conflict resolution in front of the RestoreBkpBlocks call.
In connection with that, standardize on zero-based numbering rather than
one-based numbering for referencing the full-page images. In HEAD, I
removed the macros XLR_BKP_BLOCK_1 through XLR_BKP_BLOCK_4. They are
still there in the header files in previous branches, but are no longer
used by the code.
In addition, fix some other bugs identified in the course of making these
changes:
spgRedoAddNode could fail to update the parent downlink at all, if the
parent tuple is in the same page as either the old or new split tuple and
we're not doing a full-page image: it would get fooled by the LSN having
been advanced already. This would result in permanent index corruption,
not just transient failure of concurrent queries.
Also, ginHeapTupleFastInsert's "merge lists" case failed to mark the old
tail page as a candidate for a full-page image; in the worst case this
could result in torn-page corruption.
heap_xlog_freeze() was inconsistent about using a cleanup lock or plain
exclusive lock: it did the former in the normal path but the latter for a
full-page image. A plain exclusive lock seems sufficient, so change to
that.
Also, remove gistRedoPageDeleteRecord(), which has been dead code since
VACUUM FULL was rewritten.
Back-patch to 9.0, where hot standby was introduced. Note however that 9.0
had a significantly different WAL-logging scheme for GIST index updates,
and it doesn't appear possible to make that scheme safe for concurrent hot
standby queries, because it can leave inconsistent states in the index even
between WAL records. Given the lack of complaints from the field, we won't
work too hard on fixing that branch.
2012-11-13 04:05:08 +01:00
|
|
|
* Main list is empty, so just insert sublist as main list
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
|
|
|
START_CRIT_SECTION();
|
2009-09-15 22:31:30 +02:00
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
metadata->head = sublist.head;
|
|
|
|
metadata->tail = sublist.tail;
|
|
|
|
metadata->tailFreeSize = sublist.tailFreeSize;
|
|
|
|
|
|
|
|
metadata->nPendingPages = sublist.nPendingPages;
|
|
|
|
metadata->nPendingHeapTuples = sublist.nPendingHeapTuples;
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2009-09-15 22:31:30 +02:00
|
|
|
* Merge lists
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
|
|
|
data.prevTail = metadata->tail;
|
2009-09-15 22:31:30 +02:00
|
|
|
data.newRightlink = sublist.head;
|
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
buffer = ReadBuffer(index, metadata->tail);
|
|
|
|
LockBuffer(buffer, GIN_EXCLUSIVE);
|
2016-04-20 15:31:19 +02:00
|
|
|
page = BufferGetPage(buffer);
|
2009-09-15 22:31:30 +02:00
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
|
|
|
|
|
|
|
|
START_CRIT_SECTION();
|
|
|
|
|
|
|
|
GinPageGetOpaque(page)->rightlink = sublist.head;
|
2009-09-15 22:31:30 +02:00
|
|
|
|
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
metadata->tail = sublist.tail;
|
|
|
|
metadata->tailFreeSize = sublist.tailFreeSize;
|
|
|
|
|
|
|
|
metadata->nPendingPages += sublist.nPendingPages;
|
|
|
|
metadata->nPendingHeapTuples += sublist.nPendingHeapTuples;
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
|
|
|
|
if (needWal)
|
|
|
|
XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2009-09-15 22:31:30 +02:00
|
|
|
* Insert into tail page. Metapage is already locked
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
2009-06-11 16:49:15 +02:00
|
|
|
OffsetNumber l,
|
|
|
|
off;
|
|
|
|
int i,
|
|
|
|
tupsize;
|
|
|
|
char *ptr;
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
char *collectordata;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
buffer = ReadBuffer(index, metadata->tail);
|
|
|
|
LockBuffer(buffer, GIN_EXCLUSIVE);
|
2016-04-20 15:31:19 +02:00
|
|
|
page = BufferGetPage(buffer);
|
2009-09-15 22:31:30 +02:00
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
off = (PageIsEmpty(page)) ? FirstOffsetNumber :
|
2009-06-11 16:49:15 +02:00
|
|
|
OffsetNumberNext(PageGetMaxOffsetNumber(page));
|
2009-03-24 21:17:18 +01:00
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
collectordata = ptr = (char *) palloc(collector->sumsize);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
data.ntuples = collector->ntuples;
|
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
if (needWal)
|
|
|
|
XLogBeginInsert();
|
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
START_CRIT_SECTION();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Increase counter of heap tuples
|
|
|
|
*/
|
2009-06-11 16:49:15 +02:00
|
|
|
Assert(GinPageGetOpaque(page)->maxoff <= metadata->nPendingHeapTuples);
|
2009-03-24 21:17:18 +01:00
|
|
|
GinPageGetOpaque(page)->maxoff++;
|
|
|
|
metadata->nPendingHeapTuples++;
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
for (i = 0; i < collector->ntuples; i++)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
tupsize = IndexTupleSize(collector->tuples[i]);
|
2009-06-11 16:49:15 +02:00
|
|
|
l = PageAddItem(page, (Item) collector->tuples[i], tupsize, off, false, false);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
if (l == InvalidOffsetNumber)
|
|
|
|
elog(ERROR, "failed to add item to index page in \"%s\"",
|
2009-06-11 16:49:15 +02:00
|
|
|
RelationGetRelationName(index));
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
memcpy(ptr, collector->tuples[i], tupsize);
|
2009-06-11 16:49:15 +02:00
|
|
|
ptr += tupsize;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
off++;
|
|
|
|
}
|
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
Assert((ptr - collectordata) <= collector->sumsize);
|
|
|
|
if (needWal)
|
|
|
|
{
|
|
|
|
XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
|
|
|
|
XLogRegisterBufData(1, collectordata, collector->sumsize);
|
|
|
|
}
|
2009-09-15 22:31:30 +02:00
|
|
|
|
|
|
|
metadata->tailFreeSize = PageGetExactFreeSpace(page);
|
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
MarkBufferDirty(buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2009-09-15 22:31:30 +02:00
|
|
|
* Write metabuffer, make xlog entry
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
|
|
|
MarkBufferDirty(metabuffer);
|
2009-09-15 22:31:30 +02:00
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
if (needWal)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
XLogRecPtr recptr;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
2009-09-15 22:31:30 +02:00
|
|
|
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
|
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
|
|
|
|
XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
|
|
|
|
|
|
|
|
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
|
2009-03-24 21:17:18 +01:00
|
|
|
PageSetLSN(metapage, recptr);
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (buffer != InvalidBuffer)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
PageSetLSN(page, recptr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (buffer != InvalidBuffer)
|
|
|
|
UnlockReleaseBuffer(buffer);
|
|
|
|
|
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* Force pending list cleanup when it becomes too long. And,
|
|
|
|
* ginInsertCleanup could take significant amount of time, so we prefer to
|
|
|
|
* call it when it can do all the work in a single collection cycle. In
|
|
|
|
* non-vacuum mode, it shouldn't require maintenance_work_mem, so fire it
|
2014-11-11 13:08:21 +01:00
|
|
|
* while pending list is still small enough to fit into
|
2014-11-13 04:14:48 +01:00
|
|
|
* gin_pending_list_limit.
|
2009-03-24 21:17:18 +01:00
|
|
|
*
|
|
|
|
* ginInsertCleanup() should not be called inside our CRIT_SECTION.
|
|
|
|
*/
|
2014-11-11 13:08:21 +01:00
|
|
|
cleanupSize = GinGetPendingListCleanupSize(index);
|
|
|
|
if (metadata->nPendingPages * GIN_PAGE_FREESIZE > cleanupSize * 1024L)
|
2009-03-24 21:17:18 +01:00
|
|
|
needCleanup = true;
|
|
|
|
|
|
|
|
UnlockReleaseBuffer(metabuffer);
|
|
|
|
|
|
|
|
END_CRIT_SECTION();
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (needCleanup)
|
2016-04-28 15:21:42 +02:00
|
|
|
ginInsertCleanup(ginstate, false, true, NULL);
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
* Create temporary index tuples for a single indexable item (one index column
|
|
|
|
* for the heap tuple specified by ht_ctid), and append them to the array
|
|
|
|
* in *collector. They will subsequently be written out using
|
2014-05-06 18:12:18 +02:00
|
|
|
* ginHeapTupleFastInsert. Note that to guarantee consistent state, all
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
* temp tuples for a given heap tuple must be written in one call to
|
|
|
|
* ginHeapTupleFastInsert.
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
void
|
|
|
|
ginHeapTupleFastCollect(GinState *ginstate,
|
2009-03-24 21:17:18 +01:00
|
|
|
GinTupleCollector *collector,
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
OffsetNumber attnum, Datum value, bool isNull,
|
|
|
|
ItemPointer ht_ctid)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
Datum *entries;
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
GinNullCategory *categories;
|
2009-03-24 21:17:18 +01:00
|
|
|
int32 i,
|
|
|
|
nentries;
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
/*
|
|
|
|
* Extract the key values that need to be inserted in the index
|
|
|
|
*/
|
|
|
|
entries = ginExtractEntries(ginstate, attnum, value, isNull,
|
|
|
|
&nentries, &categories);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate/reallocate memory for storing collected tuples
|
|
|
|
*/
|
2009-06-11 16:49:15 +02:00
|
|
|
if (collector->tuples == NULL)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
collector->lentuples = nentries * ginstate->origTupdesc->natts;
|
2009-06-11 16:49:15 +02:00
|
|
|
collector->tuples = (IndexTuple *) palloc(sizeof(IndexTuple) * collector->lentuples);
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
while (collector->ntuples + nentries > collector->lentuples)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
collector->lentuples *= 2;
|
2009-06-11 16:49:15 +02:00
|
|
|
collector->tuples = (IndexTuple *) repalloc(collector->tuples,
|
|
|
|
sizeof(IndexTuple) * collector->lentuples);
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2011-04-10 17:42:00 +02:00
|
|
|
* Build an index tuple for each key value, and add to array. In pending
|
|
|
|
* tuples we just stick the heap TID into t_tid.
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
|
|
|
for (i = 0; i < nentries; i++)
|
|
|
|
{
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
IndexTuple itup;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
itup = GinFormTuple(ginstate, attnum, entries[i], categories[i],
|
2014-01-22 17:51:48 +01:00
|
|
|
NULL, 0, 0, true);
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
itup->t_tid = *ht_ctid;
|
|
|
|
collector->tuples[collector->ntuples++] = itup;
|
|
|
|
collector->sumsize += IndexTupleSize(itup);
|
|
|
|
}
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Deletes pending list pages up to (not including) newHead page.
|
|
|
|
* If newHead == InvalidBlockNumber then function drops the whole list.
|
|
|
|
*
|
|
|
|
* metapage is pinned and exclusive-locked throughout this function.
|
|
|
|
*/
|
2016-04-28 15:21:42 +02:00
|
|
|
static void
|
2009-03-24 21:17:18 +01:00
|
|
|
shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
|
2015-09-23 14:33:51 +02:00
|
|
|
bool fill_fsm, IndexBulkDeleteResult *stats)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
Page metapage;
|
|
|
|
GinMetaPageData *metadata;
|
|
|
|
BlockNumber blknoToDelete;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
2016-04-20 15:31:19 +02:00
|
|
|
metapage = BufferGetPage(metabuffer);
|
2009-03-24 21:17:18 +01:00
|
|
|
metadata = GinPageGetMeta(metapage);
|
|
|
|
blknoToDelete = metadata->head;
|
|
|
|
|
|
|
|
do
|
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
Page page;
|
|
|
|
int i;
|
|
|
|
int64 nDeletedHeapTuples = 0;
|
|
|
|
ginxlogDeleteListPages data;
|
|
|
|
Buffer buffers[GIN_NDELETE_AT_ONCE];
|
2016-06-10 00:02:36 +02:00
|
|
|
BlockNumber freespace[GIN_NDELETE_AT_ONCE];
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
data.ndeleted = 0;
|
|
|
|
while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead)
|
|
|
|
{
|
2015-09-07 15:24:01 +02:00
|
|
|
freespace[data.ndeleted] = blknoToDelete;
|
2009-06-11 16:49:15 +02:00
|
|
|
buffers[data.ndeleted] = ReadBuffer(index, blknoToDelete);
|
|
|
|
LockBuffer(buffers[data.ndeleted], GIN_EXCLUSIVE);
|
2016-04-20 15:31:19 +02:00
|
|
|
page = BufferGetPage(buffers[data.ndeleted]);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
data.ndeleted++;
|
|
|
|
|
2016-04-28 15:21:42 +02:00
|
|
|
Assert(!GinPageIsDeleted(page));
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
nDeletedHeapTuples += GinPageGetOpaque(page)->maxoff;
|
2009-06-11 16:49:15 +02:00
|
|
|
blknoToDelete = GinPageGetOpaque(page)->rightlink;
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (stats)
|
|
|
|
stats->pages_deleted += data.ndeleted;
|
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
/*
|
|
|
|
* This operation touches an unusually large number of pages, so
|
|
|
|
* prepare the XLogInsert machinery for that before entering the
|
|
|
|
* critical section.
|
|
|
|
*/
|
2014-11-21 14:13:15 +01:00
|
|
|
if (RelationNeedsWAL(index))
|
|
|
|
XLogEnsureRecordSpace(data.ndeleted, 0);
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
START_CRIT_SECTION();
|
|
|
|
|
|
|
|
metadata->head = blknoToDelete;
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
Assert(metadata->nPendingPages >= data.ndeleted);
|
2009-03-24 21:17:18 +01:00
|
|
|
metadata->nPendingPages -= data.ndeleted;
|
2009-06-11 16:49:15 +02:00
|
|
|
Assert(metadata->nPendingHeapTuples >= nDeletedHeapTuples);
|
2009-03-24 21:17:18 +01:00
|
|
|
metadata->nPendingHeapTuples -= nDeletedHeapTuples;
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (blknoToDelete == InvalidBlockNumber)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
metadata->tail = InvalidBlockNumber;
|
|
|
|
metadata->tailFreeSize = 0;
|
|
|
|
metadata->nPendingPages = 0;
|
|
|
|
metadata->nPendingHeapTuples = 0;
|
|
|
|
}
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
MarkBufferDirty(metabuffer);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
for (i = 0; i < data.ndeleted; i++)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
2016-04-20 15:31:19 +02:00
|
|
|
page = BufferGetPage(buffers[i]);
|
2009-06-11 16:49:15 +02:00
|
|
|
GinPageGetOpaque(page)->flags = GIN_DELETED;
|
|
|
|
MarkBufferDirty(buffers[i]);
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
2010-12-13 18:34:26 +01:00
|
|
|
if (RelationNeedsWAL(index))
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
XLogRecPtr recptr;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
XLogBeginInsert();
|
|
|
|
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
|
|
|
|
for (i = 0; i < data.ndeleted; i++)
|
|
|
|
XLogRegisterBuffer(i + 1, buffers[i], REGBUF_WILL_INIT);
|
|
|
|
|
2009-09-15 22:31:30 +02:00
|
|
|
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
|
|
|
|
|
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and
block(s) in a standardized format. That makes it easier to write tools that
need that information, like pg_rewind, prefetching the blocks to speed up
recovery, etc.
There's a whole new API for building WAL records, replacing the XLogRecData
chains used previously. The new API consists of XLogRegister* functions,
which are called for each buffer and chunk of data that is added to the
record. The new API also gives more control over when a full-page image is
written, by passing flags to the XLogRegisterBuffer function.
This also simplifies the XLogReadBufferForRedo() calls. The function can dig
the relation and block number from the WAL record, so they no longer need to
be passed as arguments.
For the convenience of redo routines, XLogReader now disects each WAL record
after reading it, copying the main data part and the per-block data into
MAXALIGNed buffers. The data chunks are not aligned within the WAL record,
but the redo routines can assume that the pointers returned by XLogRecGet*
functions are. Redo routines are now passed the XLogReaderState, which
contains the record in the already-disected format, instead of the plain
XLogRecord.
The new record format also makes the fixed size XLogRecord header smaller,
by removing the xl_len field. The length of the "main data" portion is now
stored at the end of the WAL record, and there's a separate header after
XLogRecord for it. The alignment padding at the end of XLogRecord is also
removed. This compansates for the fact that the new format would otherwise
be more bulky than the old format.
Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera,
Fujii Masao.
2014-11-20 16:56:26 +01:00
|
|
|
XLogRegisterData((char *) &data,
|
|
|
|
sizeof(ginxlogDeleteListPages));
|
|
|
|
|
|
|
|
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE);
|
2009-03-24 21:17:18 +01:00
|
|
|
PageSetLSN(metapage, recptr);
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
for (i = 0; i < data.ndeleted; i++)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
2016-04-20 15:31:19 +02:00
|
|
|
page = BufferGetPage(buffers[i]);
|
2009-03-24 21:17:18 +01:00
|
|
|
PageSetLSN(page, recptr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
for (i = 0; i < data.ndeleted; i++)
|
|
|
|
UnlockReleaseBuffer(buffers[i]);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
END_CRIT_SECTION();
|
2015-09-07 15:24:01 +02:00
|
|
|
|
2015-09-23 14:33:51 +02:00
|
|
|
for (i = 0; fill_fsm && i < data.ndeleted; i++)
|
2015-09-07 15:24:01 +02:00
|
|
|
RecordFreeIndexPage(index, freespace[i]);
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
} while (blknoToDelete != newHead);
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
/* Initialize empty KeyArray */
|
2009-03-24 21:17:18 +01:00
|
|
|
static void
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
initKeyArray(KeyArray *keys, int32 maxvalues)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
keys->keys = (Datum *) palloc(sizeof(Datum) * maxvalues);
|
|
|
|
keys->categories = (GinNullCategory *)
|
|
|
|
palloc(sizeof(GinNullCategory) * maxvalues);
|
|
|
|
keys->nvalues = 0;
|
|
|
|
keys->maxvalues = maxvalues;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Add datum to KeyArray, resizing if needed */
|
|
|
|
static void
|
|
|
|
addDatum(KeyArray *keys, Datum datum, GinNullCategory category)
|
|
|
|
{
|
|
|
|
if (keys->nvalues >= keys->maxvalues)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
keys->maxvalues *= 2;
|
|
|
|
keys->keys = (Datum *)
|
|
|
|
repalloc(keys->keys, sizeof(Datum) * keys->maxvalues);
|
|
|
|
keys->categories = (GinNullCategory *)
|
|
|
|
repalloc(keys->categories, sizeof(GinNullCategory) * keys->maxvalues);
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
keys->keys[keys->nvalues] = datum;
|
|
|
|
keys->categories[keys->nvalues] = category;
|
|
|
|
keys->nvalues++;
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
* Collect data from a pending-list page in preparation for insertion into
|
|
|
|
* the main index.
|
|
|
|
*
|
|
|
|
* Go through all tuples >= startoff on page and collect values in accum
|
2009-03-24 21:17:18 +01:00
|
|
|
*
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
* Note that ka is just workspace --- it does not carry any state across
|
2009-03-24 21:17:18 +01:00
|
|
|
* calls.
|
|
|
|
*/
|
|
|
|
static void
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
processPendingPage(BuildAccumulator *accum, KeyArray *ka,
|
2009-03-24 21:17:18 +01:00
|
|
|
Page page, OffsetNumber startoff)
|
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
ItemPointerData heapptr;
|
|
|
|
OffsetNumber i,
|
|
|
|
maxoff;
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
OffsetNumber attrnum;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
/* reset *ka to empty */
|
|
|
|
ka->nvalues = 0;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
2009-06-11 16:49:15 +02:00
|
|
|
Assert(maxoff >= FirstOffsetNumber);
|
2009-03-24 21:17:18 +01:00
|
|
|
ItemPointerSetInvalid(&heapptr);
|
|
|
|
attrnum = 0;
|
|
|
|
|
|
|
|
for (i = startoff; i <= maxoff; i = OffsetNumberNext(i))
|
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
OffsetNumber curattnum;
|
2011-04-10 17:42:00 +02:00
|
|
|
Datum curkey;
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
GinNullCategory curcategory;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
/* Check for change of heap TID or attnum */
|
2009-03-24 21:17:18 +01:00
|
|
|
curattnum = gintuple_get_attrnum(accum->ginstate, itup);
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (!ItemPointerIsValid(&heapptr))
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
heapptr = itup->t_tid;
|
|
|
|
attrnum = curattnum;
|
|
|
|
}
|
2009-06-11 16:49:15 +02:00
|
|
|
else if (!(ItemPointerEquals(&heapptr, &itup->t_tid) &&
|
|
|
|
curattnum == attrnum))
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
/*
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
* ginInsertBAEntries can insert several datums per call, but only
|
|
|
|
* for one heap tuple and one column. So call it at a boundary,
|
|
|
|
* and reset ka.
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
ginInsertBAEntries(accum, &heapptr, attrnum,
|
|
|
|
ka->keys, ka->categories, ka->nvalues);
|
|
|
|
ka->nvalues = 0;
|
2009-03-24 21:17:18 +01:00
|
|
|
heapptr = itup->t_tid;
|
|
|
|
attrnum = curattnum;
|
|
|
|
}
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
|
|
|
|
/* Add key to KeyArray */
|
|
|
|
curkey = gintuple_get_key(accum->ginstate, itup, &curcategory);
|
|
|
|
addDatum(ka, curkey, curcategory);
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
/* Dump out all remaining keys */
|
|
|
|
ginInsertBAEntries(accum, &heapptr, attrnum,
|
|
|
|
ka->keys, ka->categories, ka->nvalues);
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Move tuples from pending pages into regular GIN structure.
|
|
|
|
*
|
2016-04-28 15:21:42 +02:00
|
|
|
* On first glance it looks completely not crash-safe. But if we crash
|
|
|
|
* after posting entries to the main index and before removing them from the
|
2009-03-24 21:17:18 +01:00
|
|
|
* pending list, it's okay because when we redo the posting later on, nothing
|
2016-04-28 15:21:42 +02:00
|
|
|
* bad will happen.
|
2009-03-24 21:17:18 +01:00
|
|
|
*
|
2015-09-23 14:33:51 +02:00
|
|
|
* fill_fsm indicates that ginInsertCleanup should add deleted pages
|
|
|
|
* to FSM otherwise caller is responsible to put deleted pages into
|
|
|
|
* FSM.
|
|
|
|
*
|
2009-03-24 21:17:18 +01:00
|
|
|
* If stats isn't null, we count deleted pending pages into the counts.
|
|
|
|
*/
|
|
|
|
void
|
2016-04-28 15:21:42 +02:00
|
|
|
ginInsertCleanup(GinState *ginstate, bool full_clean,
|
2016-01-21 17:22:56 +01:00
|
|
|
bool fill_fsm, IndexBulkDeleteResult *stats)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
Relation index = ginstate->index;
|
2009-06-11 16:49:15 +02:00
|
|
|
Buffer metabuffer,
|
|
|
|
buffer;
|
|
|
|
Page metapage,
|
|
|
|
page;
|
|
|
|
GinMetaPageData *metadata;
|
|
|
|
MemoryContext opCtx,
|
|
|
|
oldCtx;
|
|
|
|
BuildAccumulator accum;
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
KeyArray datums;
|
2016-04-28 15:21:42 +02:00
|
|
|
BlockNumber blkno,
|
|
|
|
blknoFinish;
|
|
|
|
bool cleanupFinish = false;
|
2015-09-07 15:24:01 +02:00
|
|
|
bool fsm_vac = false;
|
2016-04-28 15:21:42 +02:00
|
|
|
Size workMemory;
|
|
|
|
bool inVacuum = (stats == NULL);
|
|
|
|
|
|
|
|
/*
|
2016-06-10 00:02:36 +02:00
|
|
|
* We would like to prevent concurrent cleanup process. For that we will
|
|
|
|
* lock metapage in exclusive mode using LockPage() call. Nobody other
|
|
|
|
* will use that lock for metapage, so we keep possibility of concurrent
|
|
|
|
* insertion into pending list
|
2016-04-28 15:21:42 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
if (inVacuum)
|
|
|
|
{
|
|
|
|
/*
|
2016-06-10 00:02:36 +02:00
|
|
|
* We are called from [auto]vacuum/analyze or gin_clean_pending_list()
|
|
|
|
* and we would like to wait concurrent cleanup to finish.
|
2016-04-28 15:21:42 +02:00
|
|
|
*/
|
|
|
|
LockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock);
|
|
|
|
workMemory =
|
|
|
|
(IsAutoVacuumWorkerProcess() && autovacuum_work_mem != -1) ?
|
2016-06-10 00:02:36 +02:00
|
|
|
autovacuum_work_mem : maintenance_work_mem;
|
2016-04-28 15:21:42 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2016-06-10 00:02:36 +02:00
|
|
|
* We are called from regular insert and if we see concurrent cleanup
|
|
|
|
* just exit in hope that concurrent process will clean up pending
|
|
|
|
* list.
|
2016-04-28 15:21:42 +02:00
|
|
|
*/
|
|
|
|
if (!ConditionalLockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock))
|
|
|
|
return;
|
|
|
|
workMemory = work_mem;
|
|
|
|
}
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
|
|
|
|
LockBuffer(metabuffer, GIN_SHARE);
|
2016-04-20 15:31:19 +02:00
|
|
|
metapage = BufferGetPage(metabuffer);
|
2009-03-24 21:17:18 +01:00
|
|
|
metadata = GinPageGetMeta(metapage);
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
if (metadata->head == InvalidBlockNumber)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
/* Nothing to do */
|
|
|
|
UnlockReleaseBuffer(metabuffer);
|
2016-04-28 15:21:42 +02:00
|
|
|
UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock);
|
2009-03-24 21:17:18 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-04-28 15:21:42 +02:00
|
|
|
/*
|
|
|
|
* Remember a tail page to prevent infinite cleanup if other backends add
|
|
|
|
* new tuples faster than we can cleanup.
|
|
|
|
*/
|
|
|
|
blknoFinish = metadata->tail;
|
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
/*
|
|
|
|
* Read and lock head of pending list
|
|
|
|
*/
|
|
|
|
blkno = metadata->head;
|
|
|
|
buffer = ReadBuffer(index, blkno);
|
|
|
|
LockBuffer(buffer, GIN_SHARE);
|
2016-04-20 15:31:19 +02:00
|
|
|
page = BufferGetPage(buffer);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
LockBuffer(metabuffer, GIN_UNLOCK);
|
|
|
|
|
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Initialize. All temporary space will be in opCtx
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
|
|
|
opCtx = AllocSetContextCreate(CurrentMemoryContext,
|
|
|
|
"GIN insert cleanup temporary context",
|
Add macros to make AllocSetContextCreate() calls simpler and safer.
I found that half a dozen (nearly 5%) of our AllocSetContextCreate calls
had typos in the context-sizing parameters. While none of these led to
especially significant problems, they did create minor inefficiencies,
and it's now clear that expecting people to copy-and-paste those calls
accurately is not a great idea. Let's reduce the risk of future errors
by introducing single macros that encapsulate the common use-cases.
Three such macros are enough to cover all but two special-purpose contexts;
those two calls can be left as-is, I think.
While this patch doesn't in itself improve matters for third-party
extensions, it doesn't break anything for them either, and they can
gradually adopt the simplified notation over time.
In passing, change TopMemoryContext to use the default allocation
parameters. Formerly it could only be extended 8K at a time. That was
probably reasonable when this code was written; but nowadays we create
many more contexts than we did then, so that it's not unusual to have a
couple hundred K in TopMemoryContext, even without considering various
dubious code that sticks other things there. There seems no good reason
not to let it use growing blocks like most other contexts.
Back-patch to 9.6, mostly because that's still close enough to HEAD that
it's easy to do so, and keeping the branches in sync can be expected to
avoid some future back-patching pain. The bugs fixed by these changes
don't seem to be significant enough to justify fixing them further back.
Discussion: <21072.1472321324@sss.pgh.pa.us>
2016-08-27 23:50:38 +02:00
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
oldCtx = MemoryContextSwitchTo(opCtx);
|
|
|
|
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
initKeyArray(&datums, 128);
|
2009-03-24 21:17:18 +01:00
|
|
|
ginInitBA(&accum);
|
|
|
|
accum.ginstate = ginstate;
|
|
|
|
|
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* At the top of this loop, we have pin and lock on the current page of
|
|
|
|
* the pending list. However, we'll release that before exiting the loop.
|
|
|
|
* Note we also have pin but not lock on the metapage.
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
2009-06-11 16:49:15 +02:00
|
|
|
for (;;)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
2016-04-28 15:21:42 +02:00
|
|
|
Assert(!GinPageIsDeleted(page));
|
|
|
|
|
|
|
|
/*
|
2016-06-10 00:02:36 +02:00
|
|
|
* Are we walk through the page which as we remember was a tail when
|
|
|
|
* we start our cleanup? But if caller asks us to clean up whole
|
|
|
|
* pending list then ignore old tail, we will work until list becomes
|
|
|
|
* empty.
|
2016-04-28 15:21:42 +02:00
|
|
|
*/
|
|
|
|
if (blkno == blknoFinish && full_clean == false)
|
|
|
|
cleanupFinish = true;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
/*
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
* read page's datums into accum
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
|
|
|
processPendingPage(&accum, &datums, page, FirstOffsetNumber);
|
|
|
|
|
2015-09-07 16:16:29 +02:00
|
|
|
vacuum_delay_point();
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* Is it time to flush memory to disk? Flush if we are at the end of
|
|
|
|
* the pending list, or if we have a full row and memory is getting
|
|
|
|
* full.
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
2009-03-24 23:06:03 +01:00
|
|
|
if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
|
|
|
|
(GinPageHasFullRow(page) &&
|
2016-04-28 15:21:42 +02:00
|
|
|
(accum.allocatedMemory >= workMemory * 1024L)))
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
2009-06-11 16:49:15 +02:00
|
|
|
ItemPointerData *list;
|
|
|
|
uint32 nlist;
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
Datum key;
|
|
|
|
GinNullCategory category;
|
2009-06-11 16:49:15 +02:00
|
|
|
OffsetNumber maxoff,
|
|
|
|
attnum;
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* Unlock current page to increase performance. Changes of page
|
|
|
|
* will be checked later by comparing maxoff after completion of
|
|
|
|
* memory flush.
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
|
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
|
|
LockBuffer(buffer, GIN_UNLOCK);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Moving collected data into regular structure can take
|
|
|
|
* significant amount of time - so, run it without locking pending
|
|
|
|
* list.
|
|
|
|
*/
|
2010-08-01 04:12:42 +02:00
|
|
|
ginBeginBAScan(&accum);
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
while ((list = ginGetBAEntry(&accum,
|
2011-04-10 17:42:00 +02:00
|
|
|
&attnum, &key, &category, &nlist)) != NULL)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
ginEntryInsert(ginstate, attnum, key, category,
|
|
|
|
list, nlist, NULL);
|
2015-09-07 16:16:29 +02:00
|
|
|
vacuum_delay_point();
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lock the whole list to remove pages
|
|
|
|
*/
|
|
|
|
LockBuffer(metabuffer, GIN_EXCLUSIVE);
|
|
|
|
LockBuffer(buffer, GIN_SHARE);
|
|
|
|
|
2016-04-28 15:21:42 +02:00
|
|
|
Assert(!GinPageIsDeleted(page));
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* While we left the page unlocked, more stuff might have gotten
|
2014-05-06 18:12:18 +02:00
|
|
|
* added to it. If so, process those entries immediately. There
|
2009-03-24 21:17:18 +01:00
|
|
|
* shouldn't be very many, so we don't worry about the fact that
|
|
|
|
* we're doing this with exclusive lock. Insertion algorithm
|
2012-04-24 04:43:09 +02:00
|
|
|
* guarantees that inserted row(s) will not continue on next page.
|
2009-03-24 21:17:18 +01:00
|
|
|
* NOTE: intentionally no vacuum_delay_point in this loop.
|
|
|
|
*/
|
2009-06-11 16:49:15 +02:00
|
|
|
if (PageGetMaxOffsetNumber(page) != maxoff)
|
2009-03-24 21:17:18 +01:00
|
|
|
{
|
|
|
|
ginInitBA(&accum);
|
2009-06-11 16:49:15 +02:00
|
|
|
processPendingPage(&accum, &datums, page, maxoff + 1);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
2010-08-01 04:12:42 +02:00
|
|
|
ginBeginBAScan(&accum);
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
while ((list = ginGetBAEntry(&accum,
|
2011-04-10 17:42:00 +02:00
|
|
|
&attnum, &key, &category, &nlist)) != NULL)
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
ginEntryInsert(ginstate, attnum, key, category,
|
|
|
|
list, nlist, NULL);
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remember next page - it will become the new list head
|
|
|
|
*/
|
|
|
|
blkno = GinPageGetOpaque(page)->rightlink;
|
2009-06-11 16:49:15 +02:00
|
|
|
UnlockReleaseBuffer(buffer); /* shiftList will do exclusive
|
|
|
|
* locking */
|
2009-03-24 21:17:18 +01:00
|
|
|
|
|
|
|
/*
|
2016-06-10 00:02:36 +02:00
|
|
|
* remove read pages from pending list, at this point all content
|
|
|
|
* of read pages is in regular structure
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
2016-04-28 15:21:42 +02:00
|
|
|
shiftList(index, metabuffer, blkno, fill_fsm, stats);
|
2009-03-24 21:17:18 +01:00
|
|
|
|
2015-09-07 15:24:01 +02:00
|
|
|
/* At this point, some pending pages have been freed up */
|
|
|
|
fsm_vac = true;
|
|
|
|
|
2009-06-11 16:49:15 +02:00
|
|
|
Assert(blkno == metadata->head);
|
2009-03-24 21:17:18 +01:00
|
|
|
LockBuffer(metabuffer, GIN_UNLOCK);
|
|
|
|
|
|
|
|
/*
|
2016-04-28 15:21:42 +02:00
|
|
|
* if we removed the whole pending list or we cleanup tail (which
|
|
|
|
* we remembered on start our cleanup process) then just exit
|
2009-03-24 21:17:18 +01:00
|
|
|
*/
|
2016-04-28 15:21:42 +02:00
|
|
|
if (blkno == InvalidBlockNumber || cleanupFinish)
|
2009-03-24 21:17:18 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* release memory used so far and reinit state
|
|
|
|
*/
|
|
|
|
MemoryContextReset(opCtx);
|
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by
extractValue and extractQuery functions, and will be stored in the index.
Also, placeholder entries are made for indexable items that are NULL or
contain no keys according to extractValue. This means that the index is
now always complete, having at least one entry for every indexed heap TID,
and so we can get rid of the prohibition on full-index scans. A full-index
scan is implemented much the same way as partial-match scans were already:
we build a bitmap representing all the TIDs found in the index, and then
drive the results off that.
Also, introduce a concept of a "search mode" that can be requested by
extractQuery when the operator requires matching to empty items (this is
just as cheap as matching to a single key) or requires a full index scan
(which is not so cheap, but it sure beats failing or giving wrong answers).
The behavior remains backward compatible for opclasses that don't return
any null keys or request a non-default search mode.
Using these features, we can now make the GIN index opclass for anyarray
behave in a way that matches the actual anyarray operators for &&, <@, @>,
and = ... which it failed to do before in assorted corner cases.
This commit fixes the core GIN code and ginarrayprocs.c, updates the
documentation, and adds some simple regression test cases for the new
behaviors using the array operators. The tsearch and contrib GIN opclass
support functions still need to be looked over and probably fixed.
Another thing I intend to fix separately is that this is pretty inefficient
for cases where more than one scan condition needs a full-index search:
we'll run duplicate GinScanEntrys, each one of which builds a large bitmap.
There is some existing logic to merge duplicate GinScanEntrys but it needs
refactoring to make it work for entries belonging to different scan keys.
Note that most of gin.h has been split out into a new file gin_private.h,
so that gin.h doesn't export anything that's not supposed to be used by GIN
opclasses or the rest of the backend. I did quite a bit of other code
beautification work as well, mostly fixing comments and choosing more
appropriate names for things.
2011-01-08 01:16:24 +01:00
|
|
|
initKeyArray(&datums, datums.maxvalues);
|
2009-03-24 21:17:18 +01:00
|
|
|
ginInitBA(&accum);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
blkno = GinPageGetOpaque(page)->rightlink;
|
|
|
|
UnlockReleaseBuffer(buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read next page in pending list
|
|
|
|
*/
|
2015-09-07 16:16:29 +02:00
|
|
|
vacuum_delay_point();
|
2009-03-24 21:17:18 +01:00
|
|
|
buffer = ReadBuffer(index, blkno);
|
|
|
|
LockBuffer(buffer, GIN_SHARE);
|
2016-04-20 15:31:19 +02:00
|
|
|
page = BufferGetPage(buffer);
|
2009-03-24 21:17:18 +01:00
|
|
|
}
|
|
|
|
|
2016-04-28 15:21:42 +02:00
|
|
|
UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock);
|
2009-03-24 21:17:18 +01:00
|
|
|
ReleaseBuffer(metabuffer);
|
|
|
|
|
2015-09-07 15:24:01 +02:00
|
|
|
/*
|
2016-06-10 00:02:36 +02:00
|
|
|
* As pending list pages can have a high churn rate, it is desirable to
|
|
|
|
* recycle them immediately to the FreeSpace Map when ordinary backends
|
|
|
|
* clean the list.
|
2015-09-07 15:24:01 +02:00
|
|
|
*/
|
2015-09-23 14:33:51 +02:00
|
|
|
if (fsm_vac && fill_fsm)
|
2015-09-07 15:24:01 +02:00
|
|
|
IndexFreeSpaceMapVacuum(index);
|
|
|
|
|
|
|
|
|
2009-03-24 21:17:18 +01:00
|
|
|
/* Clean up temporary space */
|
|
|
|
MemoryContextSwitchTo(oldCtx);
|
|
|
|
MemoryContextDelete(opCtx);
|
|
|
|
}
|
2016-01-28 04:57:52 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* SQL-callable function to clean the insert pending list
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
gin_clean_pending_list(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
Oid indexoid = PG_GETARG_OID(0);
|
|
|
|
Relation indexRel = index_open(indexoid, AccessShareLock);
|
|
|
|
IndexBulkDeleteResult stats;
|
|
|
|
GinState ginstate;
|
|
|
|
|
|
|
|
if (RecoveryInProgress())
|
2016-01-30 21:58:20 +01:00
|
|
|
ereport(ERROR,
|
2016-01-28 04:57:52 +01:00
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
|
|
errmsg("recovery is in progress"),
|
2016-06-10 00:02:36 +02:00
|
|
|
errhint("GIN pending list cannot be cleaned up during recovery.")));
|
2016-01-28 04:57:52 +01:00
|
|
|
|
|
|
|
/* Must be a GIN index */
|
|
|
|
if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
|
|
|
|
indexRel->rd_rel->relam != GIN_AM_OID)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
|
|
|
errmsg("\"%s\" is not a GIN index",
|
|
|
|
RelationGetRelationName(indexRel))));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reject attempts to read non-local temporary relations; we would be
|
|
|
|
* likely to get wrong data since we have no visibility into the owning
|
|
|
|
* session's local buffers.
|
|
|
|
*/
|
|
|
|
if (RELATION_IS_OTHER_TEMP(indexRel))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("cannot access temporary indexes of other sessions")));
|
|
|
|
|
|
|
|
/* User must own the index (comparable to privileges needed for VACUUM) */
|
|
|
|
if (!pg_class_ownercheck(indexoid, GetUserId()))
|
|
|
|
aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
|
|
|
|
RelationGetRelationName(indexRel));
|
|
|
|
|
|
|
|
memset(&stats, 0, sizeof(stats));
|
|
|
|
initGinState(&ginstate, indexRel);
|
2016-04-28 15:21:42 +02:00
|
|
|
ginInsertCleanup(&ginstate, true, true, &stats);
|
2016-01-28 04:57:52 +01:00
|
|
|
|
|
|
|
index_close(indexRel, AccessShareLock);
|
|
|
|
|
|
|
|
PG_RETURN_INT64((int64) stats.pages_deleted);
|
|
|
|
}
|