2005-04-20 00:35:18 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* nodeBitmapHeapscan.c
|
|
|
|
* Routines to support bitmapped scans of relations
|
|
|
|
*
|
|
|
|
* NOTE: it is critical that this plan type only be used with MVCC-compliant
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
* snapshots (ie, regular snapshots, not SnapshotAny or one of the other
|
2014-05-06 18:12:18 +02:00
|
|
|
* special snapshots). The reason is that since index and heap scans are
|
2005-04-20 00:35:18 +02:00
|
|
|
* decoupled, there can be no assurance that the index tuple prompting a
|
|
|
|
* visit to a particular heap TID still exists when the visit is made.
|
|
|
|
* Therefore the tuple might not exist anymore either (which is OK because
|
|
|
|
* heap_fetch will cope) --- but worse, the tuple slot could have been
|
|
|
|
* re-used for a newer tuple. With an MVCC snapshot the newer tuple is
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
* certain to fail the time qual and so it will not be mistakenly returned,
|
|
|
|
* but with anything else we might return a tuple that doesn't meet the
|
|
|
|
* required index qual conditions.
|
2005-04-20 00:35:18 +02:00
|
|
|
*
|
|
|
|
*
|
2017-01-03 19:48:53 +01:00
|
|
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
2005-04-20 00:35:18 +02:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/executor/nodeBitmapHeapscan.c
|
2005-04-20 00:35:18 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* INTERFACE ROUTINES
|
|
|
|
* ExecBitmapHeapScan scans a relation using bitmap info
|
|
|
|
* ExecBitmapHeapNext workhorse for above
|
|
|
|
* ExecInitBitmapHeapScan creates and initializes state info.
|
2010-07-12 19:01:06 +02:00
|
|
|
* ExecReScanBitmapHeapScan prepares to rescan the plan.
|
2005-04-20 00:35:18 +02:00
|
|
|
* ExecEndBitmapHeapScan releases all storage.
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2017-02-23 21:57:08 +01:00
|
|
|
#include <math.h>
|
|
|
|
|
2008-06-19 02:46:06 +02:00
|
|
|
#include "access/relscan.h"
|
2008-09-11 16:01:10 +02:00
|
|
|
#include "access/transam.h"
|
2005-04-20 00:35:18 +02:00
|
|
|
#include "executor/execdebug.h"
|
|
|
|
#include "executor/nodeBitmapHeapscan.h"
|
2005-10-06 04:29:23 +02:00
|
|
|
#include "pgstat.h"
|
2008-05-12 02:00:54 +02:00
|
|
|
#include "storage/bufmgr.h"
|
2011-06-29 20:40:27 +02:00
|
|
|
#include "storage/predicate.h"
|
2005-05-06 19:24:55 +02:00
|
|
|
#include "utils/memutils.h"
|
2011-02-23 18:18:09 +01:00
|
|
|
#include "utils/rel.h"
|
2015-09-08 17:51:42 +02:00
|
|
|
#include "utils/spccache.h"
|
2008-03-26 19:48:59 +01:00
|
|
|
#include "utils/snapmgr.h"
|
2008-03-26 22:10:39 +01:00
|
|
|
#include "utils/tqual.h"
|
2005-04-20 00:35:18 +02:00
|
|
|
|
|
|
|
|
|
|
|
static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
|
2005-11-26 04:03:07 +01:00
|
|
|
static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
static inline void BitmapDoneInitializingSharedState(
|
|
|
|
ParallelBitmapHeapState *pstate);
|
2017-03-02 14:17:40 +01:00
|
|
|
static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
|
|
|
|
TBMIterateResult *tbmres);
|
|
|
|
static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
|
|
|
|
static inline void BitmapPrefetch(BitmapHeapScanState *node,
|
|
|
|
HeapScanDesc scan);
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
static bool BitmapShouldInitializeSharedState(
|
|
|
|
ParallelBitmapHeapState *pstate);
|
2005-04-20 00:35:18 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* BitmapHeapNext
|
|
|
|
*
|
|
|
|
* Retrieve next tuple from the BitmapHeapScan node's currentRelation
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
static TupleTableSlot *
|
|
|
|
BitmapHeapNext(BitmapHeapScanState *node)
|
|
|
|
{
|
|
|
|
ExprContext *econtext;
|
2005-11-26 04:03:07 +01:00
|
|
|
HeapScanDesc scan;
|
2005-04-20 00:35:18 +02:00
|
|
|
TIDBitmap *tbm;
|
2017-03-08 18:43:39 +01:00
|
|
|
TBMIterator *tbmiterator = NULL;
|
|
|
|
TBMSharedIterator *shared_tbmiterator = NULL;
|
2005-04-20 00:35:18 +02:00
|
|
|
TBMIterateResult *tbmres;
|
|
|
|
OffsetNumber targoffset;
|
|
|
|
TupleTableSlot *slot;
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
ParallelBitmapHeapState *pstate = node->pstate;
|
|
|
|
dsa_area *dsa = node->ss.ps.state->es_query_dsa;
|
2005-04-20 00:35:18 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* extract necessary information from index scan node
|
|
|
|
*/
|
|
|
|
econtext = node->ss.ps.ps_ExprContext;
|
|
|
|
slot = node->ss.ss_ScanTupleSlot;
|
2005-11-26 04:03:07 +01:00
|
|
|
scan = node->ss.ss_currentScanDesc;
|
2005-04-20 00:35:18 +02:00
|
|
|
tbm = node->tbm;
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
if (pstate == NULL)
|
|
|
|
tbmiterator = node->tbmiterator;
|
|
|
|
else
|
|
|
|
shared_tbmiterator = node->shared_tbmiterator;
|
2005-04-20 00:35:18 +02:00
|
|
|
tbmres = node->tbmres;
|
|
|
|
|
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* If we haven't yet performed the underlying index scan, do it, and begin
|
|
|
|
* the iteration over the bitmap.
|
2009-01-12 06:10:45 +01:00
|
|
|
*
|
|
|
|
* For prefetching, we use *two* iterators, one for the pages we are
|
|
|
|
* actually scanning and another that runs ahead of the first for
|
2009-06-11 16:49:15 +02:00
|
|
|
* prefetching. node->prefetch_pages tracks exactly how many pages ahead
|
|
|
|
* the prefetch iterator is. Also, node->prefetch_target tracks the
|
|
|
|
* desired prefetch distance, which starts small and increases up to the
|
2015-09-08 17:51:42 +02:00
|
|
|
* node->prefetch_maximum. This is to avoid doing a lot of prefetching in
|
|
|
|
* a scan that stops after a few tuples because of a LIMIT.
|
2005-04-20 00:35:18 +02:00
|
|
|
*/
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
if (!node->initialized)
|
2005-04-20 00:35:18 +02:00
|
|
|
{
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
if (!pstate)
|
|
|
|
{
|
|
|
|
tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
|
2005-04-20 00:35:18 +02:00
|
|
|
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
if (!tbm || !IsA(tbm, TIDBitmap))
|
|
|
|
elog(ERROR, "unrecognized result from subplan");
|
2005-04-20 00:35:18 +02:00
|
|
|
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
node->tbm = tbm;
|
|
|
|
node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
|
|
|
|
node->tbmres = tbmres = NULL;
|
2009-01-12 06:10:45 +01:00
|
|
|
|
|
|
|
#ifdef USE_PREFETCH
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
if (node->prefetch_maximum > 0)
|
|
|
|
{
|
|
|
|
node->prefetch_iterator = tbm_begin_iterate(tbm);
|
|
|
|
node->prefetch_pages = 0;
|
|
|
|
node->prefetch_target = -1;
|
|
|
|
}
|
|
|
|
#endif /* USE_PREFETCH */
|
2009-01-12 06:10:45 +01:00
|
|
|
}
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The leader will immediately come out of the function, but
|
|
|
|
* others will be blocked until leader populates the TBM and wakes
|
|
|
|
* them up.
|
|
|
|
*/
|
|
|
|
if (BitmapShouldInitializeSharedState(pstate))
|
|
|
|
{
|
|
|
|
tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
|
|
|
|
if (!tbm || !IsA(tbm, TIDBitmap))
|
|
|
|
elog(ERROR, "unrecognized result from subplan");
|
|
|
|
|
|
|
|
node->tbm = tbm;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prepare to iterate over the TBM. This will return the
|
|
|
|
* dsa_pointer of the iterator state which will be used by
|
|
|
|
* multiple processes to iterate jointly.
|
|
|
|
*/
|
|
|
|
pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
|
|
|
|
#ifdef USE_PREFETCH
|
|
|
|
if (node->prefetch_maximum > 0)
|
|
|
|
{
|
|
|
|
pstate->prefetch_iterator =
|
|
|
|
tbm_prepare_shared_iterate(tbm);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't need the mutex here as we haven't yet woke up
|
|
|
|
* others.
|
|
|
|
*/
|
|
|
|
pstate->prefetch_pages = 0;
|
|
|
|
pstate->prefetch_target = -1;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* We have initialized the shared state so wake up others. */
|
|
|
|
BitmapDoneInitializingSharedState(pstate);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate a private iterator and attach the shared state to it */
|
|
|
|
node->shared_tbmiterator = shared_tbmiterator =
|
|
|
|
tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
|
|
|
|
node->tbmres = tbmres = NULL;
|
|
|
|
|
|
|
|
#ifdef USE_PREFETCH
|
|
|
|
if (node->prefetch_maximum > 0)
|
|
|
|
{
|
|
|
|
node->shared_prefetch_iterator =
|
|
|
|
tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
|
|
|
|
}
|
2009-06-11 16:49:15 +02:00
|
|
|
#endif /* USE_PREFETCH */
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
}
|
|
|
|
node->initialized = true;
|
2005-04-20 00:35:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
for (;;)
|
|
|
|
{
|
2005-11-26 04:03:07 +01:00
|
|
|
Page dp;
|
|
|
|
ItemId lp;
|
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
/*
|
|
|
|
* Get next page of results if needed
|
|
|
|
*/
|
|
|
|
if (tbmres == NULL)
|
|
|
|
{
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
if (!pstate)
|
|
|
|
node->tbmres = tbmres = tbm_iterate(tbmiterator);
|
|
|
|
else
|
|
|
|
node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
|
2005-04-20 00:35:18 +02:00
|
|
|
if (tbmres == NULL)
|
|
|
|
{
|
|
|
|
/* no more entries in the bitmap */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2017-03-02 14:17:40 +01:00
|
|
|
BitmapAdjustPrefetchIterator(node, tbmres);
|
2009-01-12 06:10:45 +01:00
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Ignore any claimed entries past what we think is the end of the
|
2005-12-02 21:03:42 +01:00
|
|
|
* relation. (This is probably not necessary given that we got at
|
|
|
|
* least AccessShareLock on the table before performing any of the
|
|
|
|
* indexscans, but let's be safe.)
|
2005-04-20 00:35:18 +02:00
|
|
|
*/
|
2005-11-26 04:03:07 +01:00
|
|
|
if (tbmres->blockno >= scan->rs_nblocks)
|
2005-04-20 00:35:18 +02:00
|
|
|
{
|
|
|
|
node->tbmres = tbmres = NULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2005-11-26 04:03:07 +01:00
|
|
|
* Fetch the current heap page and identify candidate tuples.
|
2005-04-20 00:35:18 +02:00
|
|
|
*/
|
2005-11-26 04:03:07 +01:00
|
|
|
bitgetpage(scan, tbmres);
|
2005-04-20 00:35:18 +02:00
|
|
|
|
2014-01-13 20:42:16 +01:00
|
|
|
if (tbmres->ntuples >= 0)
|
|
|
|
node->exact_pages++;
|
|
|
|
else
|
|
|
|
node->lossy_pages++;
|
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
/*
|
2005-11-26 04:03:07 +01:00
|
|
|
* Set rs_cindex to first slot to examine
|
2005-04-20 00:35:18 +02:00
|
|
|
*/
|
2005-11-26 04:03:07 +01:00
|
|
|
scan->rs_cindex = 0;
|
2009-01-12 06:10:45 +01:00
|
|
|
|
2017-03-02 14:17:40 +01:00
|
|
|
/* Adjust the prefetch target */
|
|
|
|
BitmapAdjustPrefetchTarget(node);
|
2005-04-20 00:35:18 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2005-11-26 04:03:07 +01:00
|
|
|
* Continuing in previously obtained page; advance rs_cindex
|
2005-04-20 00:35:18 +02:00
|
|
|
*/
|
2005-11-26 04:03:07 +01:00
|
|
|
scan->rs_cindex++;
|
2009-01-12 06:10:45 +01:00
|
|
|
|
|
|
|
#ifdef USE_PREFETCH
|
2009-06-11 16:49:15 +02:00
|
|
|
|
2009-01-12 06:10:45 +01:00
|
|
|
/*
|
|
|
|
* Try to prefetch at least a few pages even before we get to the
|
|
|
|
* second page if we don't stop reading after the first tuple.
|
|
|
|
*/
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
if (!pstate)
|
|
|
|
{
|
|
|
|
if (node->prefetch_target < node->prefetch_maximum)
|
|
|
|
node->prefetch_target++;
|
|
|
|
}
|
|
|
|
else if (pstate->prefetch_target < node->prefetch_maximum)
|
|
|
|
{
|
|
|
|
/* take spinlock while updating shared state */
|
|
|
|
SpinLockAcquire(&pstate->mutex);
|
|
|
|
if (pstate->prefetch_target < node->prefetch_maximum)
|
|
|
|
pstate->prefetch_target++;
|
|
|
|
SpinLockRelease(&pstate->mutex);
|
|
|
|
}
|
2009-06-11 16:49:15 +02:00
|
|
|
#endif /* USE_PREFETCH */
|
2009-01-12 06:10:45 +01:00
|
|
|
}
|
|
|
|
|
2009-01-12 17:00:41 +01:00
|
|
|
/*
|
|
|
|
* Out of range? If so, nothing more to look at on this page
|
|
|
|
*/
|
|
|
|
if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
|
|
|
|
{
|
|
|
|
node->tbmres = tbmres = NULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2009-01-12 06:10:45 +01:00
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* We issue prefetch requests *after* fetching the current page to try
|
|
|
|
* to avoid having prefetching interfere with the main I/O. Also, this
|
|
|
|
* should happen only when we have determined there is still something
|
|
|
|
* to do on the current page, else we may uselessly prefetch the same
|
|
|
|
* page we are just about to request for real.
|
2009-01-12 06:10:45 +01:00
|
|
|
*/
|
2017-03-02 14:17:40 +01:00
|
|
|
BitmapPrefetch(node, scan);
|
2005-04-20 00:35:18 +02:00
|
|
|
|
|
|
|
/*
|
2005-11-26 04:03:07 +01:00
|
|
|
* Okay to fetch the tuple
|
|
|
|
*/
|
|
|
|
targoffset = scan->rs_vistuples[scan->rs_cindex];
|
2016-04-20 15:31:19 +02:00
|
|
|
dp = (Page) BufferGetPage(scan->rs_cbuf);
|
2005-11-26 04:03:07 +01:00
|
|
|
lp = PageGetItemId(dp, targoffset);
|
2007-09-13 00:10:26 +02:00
|
|
|
Assert(ItemIdIsNormal(lp));
|
2005-11-26 04:03:07 +01:00
|
|
|
|
|
|
|
scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
|
|
|
|
scan->rs_ctup.t_len = ItemIdGetLength(lp);
|
2013-07-22 19:26:33 +02:00
|
|
|
scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
|
2005-11-26 04:03:07 +01:00
|
|
|
ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
|
|
|
|
|
2007-05-27 05:50:39 +02:00
|
|
|
pgstat_count_heap_fetch(scan->rs_rd);
|
2005-11-26 04:03:07 +01:00
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Set up the result slot to point to this tuple. Note that the slot
|
|
|
|
* acquires a pin on the buffer.
|
2005-04-20 00:35:18 +02:00
|
|
|
*/
|
2005-11-26 04:03:07 +01:00
|
|
|
ExecStoreTuple(&scan->rs_ctup,
|
|
|
|
slot,
|
|
|
|
scan->rs_cbuf,
|
|
|
|
false);
|
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* If we are using lossy info, we have to recheck the qual conditions
|
|
|
|
* at every tuple.
|
2005-11-26 04:03:07 +01:00
|
|
|
*/
|
2008-04-11 00:25:26 +02:00
|
|
|
if (tbmres->recheck)
|
2005-11-26 04:03:07 +01:00
|
|
|
{
|
|
|
|
econtext->ecxt_scantuple = slot;
|
|
|
|
ResetExprContext(econtext);
|
|
|
|
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
if (!ExecQual(node->bitmapqualorig, econtext))
|
2005-11-26 04:03:07 +01:00
|
|
|
{
|
|
|
|
/* Fails recheck, so drop it and loop back for another */
|
2011-09-22 17:29:18 +02:00
|
|
|
InstrCountFiltered2(node, 1);
|
2005-11-26 04:03:07 +01:00
|
|
|
ExecClearTuple(slot);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* OK to return this tuple */
|
|
|
|
return slot;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* if we get here it means we are at the end of the scan..
|
|
|
|
*/
|
|
|
|
return ExecClearTuple(slot);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bitgetpage - subroutine for BitmapHeapNext()
|
|
|
|
*
|
|
|
|
* This routine reads and pins the specified page of the relation, then
|
|
|
|
* builds an array indicating which tuples on the page are both potentially
|
|
|
|
* interesting according to the bitmap, and visible according to the snapshot.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
|
|
|
|
{
|
2006-10-04 02:30:14 +02:00
|
|
|
BlockNumber page = tbmres->blockno;
|
2005-11-26 04:03:07 +01:00
|
|
|
Buffer buffer;
|
|
|
|
Snapshot snapshot;
|
|
|
|
int ntup;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Acquire pin on the target heap page, trading in any pin we held before.
|
|
|
|
*/
|
|
|
|
Assert(page < scan->rs_nblocks);
|
|
|
|
|
|
|
|
scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
|
|
|
|
scan->rs_rd,
|
|
|
|
page);
|
|
|
|
buffer = scan->rs_cbuf;
|
|
|
|
snapshot = scan->rs_snapshot;
|
|
|
|
|
2007-09-20 19:56:33 +02:00
|
|
|
ntup = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prune and repair fragmentation for the whole page, if possible.
|
|
|
|
*/
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
heap_page_prune_opt(scan->rs_rd, buffer);
|
2007-09-20 19:56:33 +02:00
|
|
|
|
2005-11-26 04:03:07 +01:00
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* We must hold share lock on the buffer content while examining tuple
|
2014-05-06 18:12:18 +02:00
|
|
|
* visibility. Afterwards, however, the tuples we have found to be
|
2006-10-04 02:30:14 +02:00
|
|
|
* visible are guaranteed good as long as we hold the buffer pin.
|
2005-11-26 04:03:07 +01:00
|
|
|
*/
|
|
|
|
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
|
|
|
|
|
|
|
/*
|
2007-09-20 19:56:33 +02:00
|
|
|
* We need two separate strategies for lossy and non-lossy cases.
|
2005-11-26 04:03:07 +01:00
|
|
|
*/
|
|
|
|
if (tbmres->ntuples >= 0)
|
|
|
|
{
|
2005-04-20 00:35:18 +02:00
|
|
|
/*
|
2007-09-20 19:56:33 +02:00
|
|
|
* Bitmap is non-lossy, so we just look through the offsets listed in
|
|
|
|
* tbmres; but we have to follow any HOT chain starting at each such
|
|
|
|
* offset.
|
2005-04-20 00:35:18 +02:00
|
|
|
*/
|
2007-11-15 22:14:46 +01:00
|
|
|
int curslot;
|
2005-04-20 00:35:18 +02:00
|
|
|
|
2007-09-20 19:56:33 +02:00
|
|
|
for (curslot = 0; curslot < tbmres->ntuples; curslot++)
|
|
|
|
{
|
|
|
|
OffsetNumber offnum = tbmres->offsets[curslot];
|
|
|
|
ItemPointerData tid;
|
2012-06-10 21:20:04 +02:00
|
|
|
HeapTupleData heapTuple;
|
2005-04-20 00:35:18 +02:00
|
|
|
|
2007-09-20 19:56:33 +02:00
|
|
|
ItemPointerSet(&tid, page, offnum);
|
2011-06-27 16:27:17 +02:00
|
|
|
if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
|
|
|
|
&heapTuple, NULL, true))
|
2007-09-20 19:56:33 +02:00
|
|
|
scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2005-11-26 04:03:07 +01:00
|
|
|
/*
|
2007-09-20 19:56:33 +02:00
|
|
|
* Bitmap is lossy, so we must examine each item pointer on the page.
|
|
|
|
* But we can ignore HOT chains, since we'll check each tuple anyway.
|
2005-11-26 04:03:07 +01:00
|
|
|
*/
|
2016-04-20 15:31:19 +02:00
|
|
|
Page dp = (Page) BufferGetPage(buffer);
|
2007-09-20 19:56:33 +02:00
|
|
|
OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
|
|
|
|
OffsetNumber offnum;
|
2005-04-20 00:35:18 +02:00
|
|
|
|
2008-05-13 17:44:08 +02:00
|
|
|
for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
|
2007-09-20 19:56:33 +02:00
|
|
|
{
|
|
|
|
ItemId lp;
|
|
|
|
HeapTupleData loctup;
|
2011-06-29 20:40:27 +02:00
|
|
|
bool valid;
|
2005-11-26 04:03:07 +01:00
|
|
|
|
2007-09-20 19:56:33 +02:00
|
|
|
lp = PageGetItemId(dp, offnum);
|
|
|
|
if (!ItemIdIsNormal(lp))
|
|
|
|
continue;
|
|
|
|
loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
|
|
|
|
loctup.t_len = ItemIdGetLength(lp);
|
2011-06-29 20:40:27 +02:00
|
|
|
loctup.t_tableOid = scan->rs_rd->rd_id;
|
|
|
|
ItemPointerSet(&loctup.t_self, page, offnum);
|
|
|
|
valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
|
|
|
|
if (valid)
|
|
|
|
{
|
2007-09-20 19:56:33 +02:00
|
|
|
scan->rs_vistuples[ntup++] = offnum;
|
2011-06-29 20:40:27 +02:00
|
|
|
PredicateLockTuple(scan->rs_rd, &loctup, snapshot);
|
|
|
|
}
|
|
|
|
CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
|
|
|
|
buffer, snapshot);
|
2007-09-20 19:56:33 +02:00
|
|
|
}
|
2005-04-20 00:35:18 +02:00
|
|
|
}
|
|
|
|
|
2005-11-26 04:03:07 +01:00
|
|
|
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
|
|
|
|
|
|
|
|
Assert(ntup <= MaxHeapTuplesPerPage);
|
|
|
|
scan->rs_ntuples = ntup;
|
2005-04-20 00:35:18 +02:00
|
|
|
}
|
|
|
|
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
/*
|
|
|
|
* BitmapDoneInitializingSharedState - Shared state is initialized
|
|
|
|
*
|
|
|
|
* By this time the leader has already populated the TBM and initialized the
|
|
|
|
* shared state so wake up other processes.
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
|
|
|
|
{
|
|
|
|
SpinLockAcquire(&pstate->mutex);
|
|
|
|
pstate->state = BM_FINISHED;
|
|
|
|
SpinLockRelease(&pstate->mutex);
|
|
|
|
ConditionVariableBroadcast(&pstate->cv);
|
|
|
|
}
|
|
|
|
|
2017-03-02 14:17:40 +01:00
|
|
|
/*
|
|
|
|
* BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
|
|
|
|
TBMIterateResult *tbmres)
|
|
|
|
{
|
|
|
|
#ifdef USE_PREFETCH
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
ParallelBitmapHeapState *pstate = node->pstate;
|
2017-03-02 14:17:40 +01:00
|
|
|
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
if (pstate == NULL)
|
2017-03-02 14:17:40 +01:00
|
|
|
{
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
TBMIterator *prefetch_iterator = node->prefetch_iterator;
|
|
|
|
|
|
|
|
if (node->prefetch_pages > 0)
|
|
|
|
{
|
|
|
|
/* The main iterator has closed the distance by one page */
|
|
|
|
node->prefetch_pages--;
|
|
|
|
}
|
|
|
|
else if (prefetch_iterator)
|
|
|
|
{
|
|
|
|
/* Do not let the prefetch iterator get behind the main one */
|
|
|
|
TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
|
|
|
|
|
|
|
|
if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
|
|
|
|
elog(ERROR, "prefetch and main iterators are out of sync");
|
|
|
|
}
|
|
|
|
return;
|
2017-03-02 14:17:40 +01:00
|
|
|
}
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
|
|
|
|
if (node->prefetch_maximum > 0)
|
2017-03-02 14:17:40 +01:00
|
|
|
{
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
|
|
|
|
|
|
|
|
SpinLockAcquire(&pstate->mutex);
|
|
|
|
if (pstate->prefetch_pages > 0)
|
|
|
|
{
|
2017-04-04 15:03:41 +02:00
|
|
|
pstate->prefetch_pages--;
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
SpinLockRelease(&pstate->mutex);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Release the mutex before iterating */
|
|
|
|
SpinLockRelease(&pstate->mutex);
|
2017-03-02 14:17:40 +01:00
|
|
|
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
/*
|
|
|
|
* In case of shared mode, we can not ensure that the current
|
|
|
|
* blockno of the main iterator and that of the prefetch iterator
|
|
|
|
* are same. It's possible that whatever blockno we are
|
2017-05-17 22:31:56 +02:00
|
|
|
* prefetching will be processed by another process. Therefore,
|
|
|
|
* we don't validate the blockno here as we do in non-parallel
|
|
|
|
* case.
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
*/
|
|
|
|
if (prefetch_iterator)
|
|
|
|
tbm_shared_iterate(prefetch_iterator);
|
|
|
|
}
|
2017-03-02 14:17:40 +01:00
|
|
|
}
|
|
|
|
#endif /* USE_PREFETCH */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BitmapAdjustPrefetchTarget - Adjust the prefetch target
|
|
|
|
*
|
|
|
|
* Increase prefetch target if it's not yet at the max. Note that
|
|
|
|
* we will increase it to zero after fetching the very first
|
|
|
|
* page/tuple, then to one after the second tuple is fetched, then
|
|
|
|
* it doubles as later pages are fetched.
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
|
|
|
|
{
|
|
|
|
#ifdef USE_PREFETCH
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
ParallelBitmapHeapState *pstate = node->pstate;
|
|
|
|
|
|
|
|
if (pstate == NULL)
|
|
|
|
{
|
|
|
|
if (node->prefetch_target >= node->prefetch_maximum)
|
|
|
|
/* don't increase any further */ ;
|
|
|
|
else if (node->prefetch_target >= node->prefetch_maximum / 2)
|
|
|
|
node->prefetch_target = node->prefetch_maximum;
|
|
|
|
else if (node->prefetch_target > 0)
|
|
|
|
node->prefetch_target *= 2;
|
|
|
|
else
|
|
|
|
node->prefetch_target++;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Do an unlocked check first to save spinlock acquisitions. */
|
|
|
|
if (pstate->prefetch_target < node->prefetch_maximum)
|
|
|
|
{
|
|
|
|
SpinLockAcquire(&pstate->mutex);
|
|
|
|
if (pstate->prefetch_target >= node->prefetch_maximum)
|
|
|
|
/* don't increase any further */ ;
|
|
|
|
else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
|
|
|
|
pstate->prefetch_target = node->prefetch_maximum;
|
|
|
|
else if (pstate->prefetch_target > 0)
|
|
|
|
pstate->prefetch_target *= 2;
|
|
|
|
else
|
|
|
|
pstate->prefetch_target++;
|
|
|
|
SpinLockRelease(&pstate->mutex);
|
|
|
|
}
|
2017-03-02 14:17:40 +01:00
|
|
|
#endif /* USE_PREFETCH */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
|
|
|
|
{
|
|
|
|
#ifdef USE_PREFETCH
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
ParallelBitmapHeapState *pstate = node->pstate;
|
2017-03-02 14:17:40 +01:00
|
|
|
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
if (pstate == NULL)
|
2017-03-02 14:17:40 +01:00
|
|
|
{
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
TBMIterator *prefetch_iterator = node->prefetch_iterator;
|
|
|
|
|
|
|
|
if (prefetch_iterator)
|
2017-03-02 14:17:40 +01:00
|
|
|
{
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
while (node->prefetch_pages < node->prefetch_target)
|
|
|
|
{
|
|
|
|
TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
|
|
|
|
|
|
|
|
if (tbmpre == NULL)
|
|
|
|
{
|
|
|
|
/* No more pages to prefetch */
|
|
|
|
tbm_end_iterate(prefetch_iterator);
|
|
|
|
node->prefetch_iterator = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
node->prefetch_pages++;
|
|
|
|
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
2017-03-02 14:17:40 +01:00
|
|
|
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
if (pstate->prefetch_pages < pstate->prefetch_target)
|
|
|
|
{
|
|
|
|
TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
|
|
|
|
|
|
|
|
if (prefetch_iterator)
|
|
|
|
{
|
|
|
|
while (1)
|
2017-03-02 14:17:40 +01:00
|
|
|
{
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
TBMIterateResult *tbmpre;
|
|
|
|
bool do_prefetch = false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Recheck under the mutex. If some other process has already
|
|
|
|
* done enough prefetching then we need not to do anything.
|
|
|
|
*/
|
|
|
|
SpinLockAcquire(&pstate->mutex);
|
|
|
|
if (pstate->prefetch_pages < pstate->prefetch_target)
|
|
|
|
{
|
|
|
|
pstate->prefetch_pages++;
|
|
|
|
do_prefetch = true;
|
|
|
|
}
|
|
|
|
SpinLockRelease(&pstate->mutex);
|
|
|
|
|
|
|
|
if (!do_prefetch)
|
|
|
|
return;
|
|
|
|
|
|
|
|
tbmpre = tbm_shared_iterate(prefetch_iterator);
|
|
|
|
if (tbmpre == NULL)
|
|
|
|
{
|
|
|
|
/* No more pages to prefetch */
|
|
|
|
tbm_end_shared_iterate(prefetch_iterator);
|
|
|
|
node->shared_prefetch_iterator = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
|
2017-03-02 14:17:40 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* USE_PREFETCH */
|
|
|
|
}
|
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
/*
|
|
|
|
* BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
|
|
|
|
{
|
|
|
|
ExprContext *econtext;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* extract necessary information from index scan node
|
|
|
|
*/
|
|
|
|
econtext = node->ss.ps.ps_ExprContext;
|
|
|
|
|
|
|
|
/* Does the tuple meet the original qual conditions? */
|
|
|
|
econtext->ecxt_scantuple = slot;
|
|
|
|
|
|
|
|
ResetExprContext(econtext);
|
|
|
|
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
return ExecQual(node->bitmapqualorig, econtext);
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
}
|
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecBitmapHeapScan(node)
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
TupleTableSlot *
|
|
|
|
ExecBitmapHeapScan(BitmapHeapScanState *node)
|
|
|
|
{
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
return ExecScan(&node->ss,
|
|
|
|
(ExecScanAccessMtd) BitmapHeapNext,
|
|
|
|
(ExecScanRecheckMtd) BitmapHeapRecheck);
|
2005-04-20 00:35:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
2010-07-12 19:01:06 +02:00
|
|
|
* ExecReScanBitmapHeapScan(node)
|
2005-04-20 00:35:18 +02:00
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
2010-07-12 19:01:06 +02:00
|
|
|
ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
|
2005-04-20 00:35:18 +02:00
|
|
|
{
|
2015-05-24 03:35:49 +02:00
|
|
|
PlanState *outerPlan = outerPlanState(node);
|
2015-05-04 22:13:07 +02:00
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
/* rescan to release any page pin */
|
|
|
|
heap_rescan(node->ss.ss_currentScanDesc, NULL);
|
|
|
|
|
2009-01-10 22:08:36 +01:00
|
|
|
if (node->tbmiterator)
|
|
|
|
tbm_end_iterate(node->tbmiterator);
|
2009-01-12 06:10:45 +01:00
|
|
|
if (node->prefetch_iterator)
|
|
|
|
tbm_end_iterate(node->prefetch_iterator);
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
if (node->shared_tbmiterator)
|
|
|
|
tbm_end_shared_iterate(node->shared_tbmiterator);
|
|
|
|
if (node->shared_prefetch_iterator)
|
|
|
|
tbm_end_shared_iterate(node->shared_prefetch_iterator);
|
2005-04-20 00:35:18 +02:00
|
|
|
if (node->tbm)
|
|
|
|
tbm_free(node->tbm);
|
|
|
|
node->tbm = NULL;
|
2009-01-10 22:08:36 +01:00
|
|
|
node->tbmiterator = NULL;
|
2005-04-20 00:35:18 +02:00
|
|
|
node->tbmres = NULL;
|
2009-01-12 06:10:45 +01:00
|
|
|
node->prefetch_iterator = NULL;
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
node->initialized = false;
|
|
|
|
node->shared_tbmiterator = NULL;
|
|
|
|
node->shared_prefetch_iterator = NULL;
|
|
|
|
|
|
|
|
/* Reset parallel bitmap state, if present */
|
|
|
|
if (node->pstate)
|
|
|
|
{
|
|
|
|
dsa_area *dsa = node->ss.ps.state->es_query_dsa;
|
|
|
|
|
|
|
|
node->pstate->state = BM_INITIAL;
|
|
|
|
|
|
|
|
if (DsaPointerIsValid(node->pstate->tbmiterator))
|
|
|
|
tbm_free_shared_area(dsa, node->pstate->tbmiterator);
|
|
|
|
|
|
|
|
if (DsaPointerIsValid(node->pstate->prefetch_iterator))
|
|
|
|
tbm_free_shared_area(dsa, node->pstate->prefetch_iterator);
|
|
|
|
|
|
|
|
node->pstate->tbmiterator = InvalidDsaPointer;
|
|
|
|
node->pstate->prefetch_iterator = InvalidDsaPointer;
|
|
|
|
}
|
2005-04-20 00:35:18 +02:00
|
|
|
|
Re-implement EvalPlanQual processing to improve its performance and eliminate
a lot of strange behaviors that occurred in join cases. We now identify the
"current" row for every joined relation in UPDATE, DELETE, and SELECT FOR
UPDATE/SHARE queries. If an EvalPlanQual recheck is necessary, we jam the
appropriate row into each scan node in the rechecking plan, forcing it to emit
only that one row. The former behavior could rescan the whole of each joined
relation for each recheck, which was terrible for performance, and what's much
worse could result in duplicated output tuples.
Also, the original implementation of EvalPlanQual could not re-use the recheck
execution tree --- it had to go through a full executor init and shutdown for
every row to be tested. To avoid this overhead, I've associated a special
runtime Param with each LockRows or ModifyTable plan node, and arranged to
make every scan node below such a node depend on that Param. Thus, by
signaling a change in that Param, the EPQ machinery can just rescan the
already-built test plan.
This patch also adds a prohibition on set-returning functions in the
targetlist of SELECT FOR UPDATE/SHARE. This is needed to avoid the
duplicate-output-tuple problem. It seems fairly reasonable since the
other restrictions on SELECT FOR UPDATE are meant to ensure that there
is a unique correspondence between source tuples and result tuples,
which an output SRF destroys as much as anything else does.
2009-10-26 03:26:45 +01:00
|
|
|
ExecScanReScan(&node->ss);
|
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
/*
|
2010-07-12 19:01:06 +02:00
|
|
|
* if chgParam of subnode is not null then plan will be re-scanned by
|
|
|
|
* first ExecProcNode.
|
2005-04-20 00:35:18 +02:00
|
|
|
*/
|
2015-05-04 22:13:07 +02:00
|
|
|
if (outerPlan->chgParam == NULL)
|
|
|
|
ExecReScan(outerPlan);
|
2005-04-20 00:35:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecEndBitmapHeapScan
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecEndBitmapHeapScan(BitmapHeapScanState *node)
|
|
|
|
{
|
|
|
|
Relation relation;
|
|
|
|
HeapScanDesc scanDesc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* extract information from the node
|
|
|
|
*/
|
|
|
|
relation = node->ss.ss_currentRelation;
|
|
|
|
scanDesc = node->ss.ss_currentScanDesc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free the exprcontext
|
|
|
|
*/
|
|
|
|
ExecFreeExprContext(&node->ss.ps);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* clear out tuple table slots
|
|
|
|
*/
|
|
|
|
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
|
|
|
|
ExecClearTuple(node->ss.ss_ScanTupleSlot);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* close down subplans
|
|
|
|
*/
|
|
|
|
ExecEndNode(outerPlanState(node));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* release bitmap if any
|
|
|
|
*/
|
2009-01-10 22:08:36 +01:00
|
|
|
if (node->tbmiterator)
|
|
|
|
tbm_end_iterate(node->tbmiterator);
|
2009-01-12 06:10:45 +01:00
|
|
|
if (node->prefetch_iterator)
|
|
|
|
tbm_end_iterate(node->prefetch_iterator);
|
2005-04-20 00:35:18 +02:00
|
|
|
if (node->tbm)
|
|
|
|
tbm_free(node->tbm);
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
if (node->shared_tbmiterator)
|
|
|
|
tbm_end_shared_iterate(node->shared_tbmiterator);
|
|
|
|
if (node->shared_prefetch_iterator)
|
|
|
|
tbm_end_shared_iterate(node->shared_prefetch_iterator);
|
2005-04-20 00:35:18 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* close heap scan
|
|
|
|
*/
|
|
|
|
heap_endscan(scanDesc);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* close the heap relation.
|
|
|
|
*/
|
2005-12-02 21:03:42 +01:00
|
|
|
ExecCloseScanRelation(relation);
|
2005-04-20 00:35:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecInitBitmapHeapScan
|
|
|
|
*
|
|
|
|
* Initializes the scan's state information.
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
BitmapHeapScanState *
|
2006-02-28 05:10:28 +01:00
|
|
|
ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
|
2005-04-20 00:35:18 +02:00
|
|
|
{
|
|
|
|
BitmapHeapScanState *scanstate;
|
|
|
|
Relation currentRelation;
|
2015-09-08 17:51:42 +02:00
|
|
|
int io_concurrency;
|
2005-04-20 00:35:18 +02:00
|
|
|
|
2006-02-28 05:10:28 +01:00
|
|
|
/* check for unsupported flags */
|
|
|
|
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
|
|
|
|
|
2005-11-26 04:03:07 +01:00
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Assert caller didn't ask for an unsafe snapshot --- see comments at
|
|
|
|
* head of file.
|
2005-11-26 04:03:07 +01:00
|
|
|
*/
|
|
|
|
Assert(IsMVCCSnapshot(estate->es_snapshot));
|
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
/*
|
|
|
|
* create state structure
|
|
|
|
*/
|
|
|
|
scanstate = makeNode(BitmapHeapScanState);
|
|
|
|
scanstate->ss.ps.plan = (Plan *) node;
|
|
|
|
scanstate->ss.ps.state = estate;
|
|
|
|
|
|
|
|
scanstate->tbm = NULL;
|
2009-01-10 22:08:36 +01:00
|
|
|
scanstate->tbmiterator = NULL;
|
2005-04-20 00:35:18 +02:00
|
|
|
scanstate->tbmres = NULL;
|
2014-01-13 20:42:16 +01:00
|
|
|
scanstate->exact_pages = 0;
|
|
|
|
scanstate->lossy_pages = 0;
|
2009-01-12 06:10:45 +01:00
|
|
|
scanstate->prefetch_iterator = NULL;
|
|
|
|
scanstate->prefetch_pages = 0;
|
|
|
|
scanstate->prefetch_target = 0;
|
2015-09-08 17:51:42 +02:00
|
|
|
/* may be updated below */
|
|
|
|
scanstate->prefetch_maximum = target_prefetch_pages;
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
scanstate->pscan_len = 0;
|
|
|
|
scanstate->initialized = false;
|
|
|
|
scanstate->shared_tbmiterator = NULL;
|
|
|
|
scanstate->pstate = NULL;
|
2005-04-20 00:35:18 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Miscellaneous initialization
|
|
|
|
*
|
|
|
|
* create expression context for node
|
|
|
|
*/
|
|
|
|
ExecAssignExprContext(estate, &scanstate->ss.ps);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* initialize child expressions
|
|
|
|
*/
|
Faster expression evaluation and targetlist projection.
This replaces the old, recursive tree-walk based evaluation, with
non-recursive, opcode dispatch based, expression evaluation.
Projection is now implemented as part of expression evaluation.
This both leads to significant performance improvements, and makes
future just-in-time compilation of expressions easier.
The speed gains primarily come from:
- non-recursive implementation reduces stack usage / overhead
- simple sub-expressions are implemented with a single jump, without
function calls
- sharing some state between different sub-expressions
- reduced amount of indirect/hard to predict memory accesses by laying
out operation metadata sequentially; including the avoidance of
nearly all of the previously used linked lists
- more code has been moved to expression initialization, avoiding
constant re-checks at evaluation time
Future just-in-time compilation (JIT) has become easier, as
demonstrated by released patches intended to be merged in a later
release, for primarily two reasons: Firstly, due to a stricter split
between expression initialization and evaluation, less code has to be
handled by the JIT. Secondly, due to the non-recursive nature of the
generated "instructions", less performance-critical code-paths can
easily be shared between interpreted and compiled evaluation.
The new framework allows for significant future optimizations. E.g.:
- basic infrastructure for to later reduce the per executor-startup
overhead of expression evaluation, by caching state in prepared
statements. That'd be helpful in OLTPish scenarios where
initialization overhead is measurable.
- optimizing the generated "code". A number of proposals for potential
work has already been made.
- optimizing the interpreter. Similarly a number of proposals have
been made here too.
The move of logic into the expression initialization step leads to some
backward-incompatible changes:
- Function permission checks are now done during expression
initialization, whereas previously they were done during
execution. In edge cases this can lead to errors being raised that
previously wouldn't have been, e.g. a NULL array being coerced to a
different array type previously didn't perform checks.
- The set of domain constraints to be checked, is now evaluated once
during expression initialization, previously it was re-built
every time a domain check was evaluated. For normal queries this
doesn't change much, but e.g. for plpgsql functions, which caches
ExprStates, the old set could stick around longer. The behavior
around might still change.
Author: Andres Freund, with significant changes by Tom Lane,
changes by Heikki Linnakangas
Reviewed-By: Tom Lane, Heikki Linnakangas
Discussion: https://postgr.es/m/20161206034955.bh33paeralxbtluv@alap3.anarazel.de
2017-03-14 23:45:36 +01:00
|
|
|
scanstate->ss.ps.qual =
|
|
|
|
ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
|
|
|
|
scanstate->bitmapqualorig =
|
|
|
|
ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
|
2005-04-20 00:35:18 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* tuple table initialization
|
|
|
|
*/
|
|
|
|
ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
|
|
|
|
ExecInitScanTupleSlot(estate, &scanstate->ss);
|
|
|
|
|
|
|
|
/*
|
2005-12-02 21:03:42 +01:00
|
|
|
* open the base relation and acquire appropriate lock on it.
|
2005-04-20 00:35:18 +02:00
|
|
|
*/
|
2013-04-27 23:48:57 +02:00
|
|
|
currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
|
2005-04-20 00:35:18 +02:00
|
|
|
|
2015-09-08 17:51:42 +02:00
|
|
|
/*
|
|
|
|
* Determine the maximum for prefetch_target. If the tablespace has a
|
|
|
|
* specific IO concurrency set, use that to compute the corresponding
|
|
|
|
* maximum value; otherwise, we already initialized to the value computed
|
|
|
|
* by the GUC machinery.
|
|
|
|
*/
|
|
|
|
io_concurrency =
|
|
|
|
get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
|
|
|
|
if (io_concurrency != effective_io_concurrency)
|
|
|
|
{
|
|
|
|
double maximum;
|
|
|
|
|
|
|
|
if (ComputeIoConcurrency(io_concurrency, &maximum))
|
|
|
|
scanstate->prefetch_maximum = rint(maximum);
|
|
|
|
}
|
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
scanstate->ss.ss_currentRelation = currentRelation;
|
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Even though we aren't going to do a conventional seqscan, it is useful
|
2007-06-09 20:49:55 +02:00
|
|
|
* to create a HeapScanDesc --- most of the fields in it are usable.
|
2005-10-06 04:29:23 +02:00
|
|
|
*/
|
2007-06-09 20:49:55 +02:00
|
|
|
scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
|
|
|
|
estate->es_snapshot,
|
|
|
|
0,
|
|
|
|
NULL);
|
2005-10-06 04:29:23 +02:00
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
/*
|
|
|
|
* get the scan type from the relation descriptor.
|
|
|
|
*/
|
2006-06-16 20:42:24 +02:00
|
|
|
ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
|
2005-04-20 00:35:18 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize result tuple type and projection info.
|
|
|
|
*/
|
|
|
|
ExecAssignResultTypeFromTL(&scanstate->ss.ps);
|
|
|
|
ExecAssignScanProjectionInfo(&scanstate->ss);
|
|
|
|
|
2005-12-02 02:29:55 +01:00
|
|
|
/*
|
|
|
|
* initialize child nodes
|
|
|
|
*
|
|
|
|
* We do this last because the child nodes will open indexscans on our
|
2006-10-04 02:30:14 +02:00
|
|
|
* relation's indexes, and we want to be sure we have acquired a lock on
|
|
|
|
* the relation first.
|
2005-12-02 02:29:55 +01:00
|
|
|
*/
|
2006-02-28 05:10:28 +01:00
|
|
|
outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
|
2005-12-02 02:29:55 +01:00
|
|
|
|
2005-04-20 00:35:18 +02:00
|
|
|
/*
|
|
|
|
* all done.
|
|
|
|
*/
|
|
|
|
return scanstate;
|
|
|
|
}
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
|
|
|
|
/*----------------
|
|
|
|
* BitmapShouldInitializeSharedState
|
|
|
|
*
|
|
|
|
* The first process to come here and see the state to the BM_INITIAL
|
|
|
|
* will become the leader for the parallel bitmap scan and will be
|
|
|
|
* responsible for populating the TIDBitmap. The other processes will
|
|
|
|
* be blocked by the condition variable until the leader wakes them up.
|
|
|
|
* ---------------
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
|
|
|
|
{
|
|
|
|
SharedBitmapState state;
|
|
|
|
|
|
|
|
while (1)
|
|
|
|
{
|
|
|
|
SpinLockAcquire(&pstate->mutex);
|
|
|
|
state = pstate->state;
|
|
|
|
if (pstate->state == BM_INITIAL)
|
|
|
|
pstate->state = BM_INPROGRESS;
|
|
|
|
SpinLockRelease(&pstate->mutex);
|
|
|
|
|
|
|
|
/* Exit if bitmap is done, or if we're the leader. */
|
|
|
|
if (state != BM_INPROGRESS)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Wait for the leader to wake us up. */
|
|
|
|
ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
|
|
|
|
}
|
|
|
|
|
|
|
|
ConditionVariableCancelSleep();
|
|
|
|
|
|
|
|
return (state == BM_INITIAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecBitmapHeapEstimate
|
|
|
|
*
|
|
|
|
* estimates the space required to serialize bitmap scan node.
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecBitmapHeapEstimate(BitmapHeapScanState *node,
|
|
|
|
ParallelContext *pcxt)
|
|
|
|
{
|
|
|
|
EState *estate = node->ss.ps.state;
|
|
|
|
|
|
|
|
node->pscan_len = add_size(offsetof(ParallelBitmapHeapState,
|
|
|
|
phs_snapshot_data),
|
|
|
|
EstimateSnapshotSpace(estate->es_snapshot));
|
|
|
|
|
|
|
|
shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
|
|
|
|
shm_toc_estimate_keys(&pcxt->estimator, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecBitmapHeapInitializeDSM
|
|
|
|
*
|
|
|
|
* Set up a parallel bitmap heap scan descriptor.
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
|
|
|
|
ParallelContext *pcxt)
|
|
|
|
{
|
|
|
|
ParallelBitmapHeapState *pstate;
|
|
|
|
EState *estate = node->ss.ps.state;
|
|
|
|
|
|
|
|
pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
|
|
|
|
|
|
|
|
pstate->tbmiterator = 0;
|
|
|
|
pstate->prefetch_iterator = 0;
|
|
|
|
|
|
|
|
/* Initialize the mutex */
|
|
|
|
SpinLockInit(&pstate->mutex);
|
|
|
|
pstate->prefetch_pages = 0;
|
|
|
|
pstate->prefetch_target = 0;
|
|
|
|
pstate->state = BM_INITIAL;
|
|
|
|
|
|
|
|
ConditionVariableInit(&pstate->cv);
|
|
|
|
SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data);
|
|
|
|
|
|
|
|
shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
|
|
|
|
node->pstate = pstate;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------------------------------------------------------------
|
|
|
|
* ExecBitmapHeapInitializeWorker
|
|
|
|
*
|
|
|
|
* Copy relevant information from TOC into planstate.
|
|
|
|
* ----------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, shm_toc *toc)
|
|
|
|
{
|
|
|
|
ParallelBitmapHeapState *pstate;
|
|
|
|
Snapshot snapshot;
|
|
|
|
|
2017-06-05 18:05:42 +02:00
|
|
|
pstate = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
|
Support parallel bitmap heap scans.
The index is scanned by a single process, but then all cooperating
processes can iterate jointly over the resulting set of heap blocks.
In the future, we might also want to support using a parallel bitmap
index scan to set up for a parallel bitmap heap scan, but that's a
job for another day.
Dilip Kumar, with some corrections and cosmetic changes by me. The
larger patch set of which this is a part has been reviewed and tested
by (at least) Andres Freund, Amit Khandekar, Tushar Ahuja, Rafia
Sabih, Haribabu Kommi, Thomas Munro, and me.
Discussion: http://postgr.es/m/CAFiTN-uc4=0WxRGfCzs-xfkMYcSEWUC-Fon6thkJGjkh9i=13A@mail.gmail.com
2017-03-08 18:05:43 +01:00
|
|
|
node->pstate = pstate;
|
|
|
|
|
|
|
|
snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
|
|
|
|
heap_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
|
|
|
|
}
|