Add optimizer and executor support for parallel index scans.

In combination with 569174f1be, which
taught the btree AM how to perform parallel index scans, this allows
parallel index scan plans on btree indexes.  This infrastructure
should be general enough to support parallel index scans for other
index AMs as well, if someone updates them to support parallel
scans.

Amit Kapila, reviewed and tested by Anastasia Lubennikova, Tushar
Ahuja, and Haribabu Kommi, and me.
This commit is contained in:
Robert Haas 2017-02-15 13:53:24 -05:00
parent 51ee6f3160
commit 5262f7a4fc
29 changed files with 366 additions and 55 deletions

View File

@ -24,7 +24,8 @@
void void
blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost, Cost *indexStartupCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, double *indexCorrelation) Selectivity *indexSelectivity, double *indexCorrelation,
double *indexPages)
{ {
IndexOptInfo *index = path->indexinfo; IndexOptInfo *index = path->indexinfo;
List *qinfos; List *qinfos;
@ -45,4 +46,5 @@ blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexTotalCost = costs.indexTotalCost; *indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity; *indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation; *indexCorrelation = costs.indexCorrelation;
*indexPages = costs.numIndexPages;
} }

View File

@ -208,6 +208,6 @@ extern bytea *bloptions(Datum reloptions, bool validate);
extern void blcostestimate(PlannerInfo *root, IndexPath *path, extern void blcostestimate(PlannerInfo *root, IndexPath *path,
double loop_count, Cost *indexStartupCost, double loop_count, Cost *indexStartupCost,
Cost *indexTotalCost, Selectivity *indexSelectivity, Cost *indexTotalCost, Selectivity *indexSelectivity,
double *indexCorrelation); double *indexCorrelation, double *indexPages);
#endif #endif

View File

@ -119,6 +119,7 @@ blhandler(PG_FUNCTION_ARGS)
amroutine->amstorage = false; amroutine->amstorage = false;
amroutine->amclusterable = false; amroutine->amclusterable = false;
amroutine->ampredlocks = false; amroutine->ampredlocks = false;
amroutine->amcanparallel = false;
amroutine->amkeytype = InvalidOid; amroutine->amkeytype = InvalidOid;
amroutine->ambuild = blbuild; amroutine->ambuild = blbuild;

View File

@ -110,6 +110,8 @@ typedef struct IndexAmRoutine
bool amclusterable; bool amclusterable;
/* does AM handle predicate locks? */ /* does AM handle predicate locks? */
bool ampredlocks; bool ampredlocks;
/* does AM support parallel scan? */
bool amcanparallel;
/* type of data stored in index, or InvalidOid if variable */ /* type of data stored in index, or InvalidOid if variable */
Oid amkeytype; Oid amkeytype;

View File

@ -93,6 +93,7 @@ brinhandler(PG_FUNCTION_ARGS)
amroutine->amstorage = true; amroutine->amstorage = true;
amroutine->amclusterable = false; amroutine->amclusterable = false;
amroutine->ampredlocks = false; amroutine->ampredlocks = false;
amroutine->amcanparallel = false;
amroutine->amkeytype = InvalidOid; amroutine->amkeytype = InvalidOid;
amroutine->ambuild = brinbuild; amroutine->ambuild = brinbuild;

View File

@ -50,6 +50,7 @@ ginhandler(PG_FUNCTION_ARGS)
amroutine->amstorage = true; amroutine->amstorage = true;
amroutine->amclusterable = false; amroutine->amclusterable = false;
amroutine->ampredlocks = false; amroutine->ampredlocks = false;
amroutine->amcanparallel = false;
amroutine->amkeytype = InvalidOid; amroutine->amkeytype = InvalidOid;
amroutine->ambuild = ginbuild; amroutine->ambuild = ginbuild;

View File

@ -71,6 +71,7 @@ gisthandler(PG_FUNCTION_ARGS)
amroutine->amstorage = true; amroutine->amstorage = true;
amroutine->amclusterable = true; amroutine->amclusterable = true;
amroutine->ampredlocks = false; amroutine->ampredlocks = false;
amroutine->amcanparallel = false;
amroutine->amkeytype = InvalidOid; amroutine->amkeytype = InvalidOid;
amroutine->ambuild = gistbuild; amroutine->ambuild = gistbuild;

View File

@ -67,6 +67,7 @@ hashhandler(PG_FUNCTION_ARGS)
amroutine->amstorage = false; amroutine->amstorage = false;
amroutine->amclusterable = false; amroutine->amclusterable = false;
amroutine->ampredlocks = false; amroutine->ampredlocks = false;
amroutine->amcanparallel = false;
amroutine->amkeytype = INT4OID; amroutine->amkeytype = INT4OID;
amroutine->ambuild = hashbuild; amroutine->ambuild = hashbuild;

View File

@ -140,6 +140,7 @@ bthandler(PG_FUNCTION_ARGS)
amroutine->amstorage = false; amroutine->amstorage = false;
amroutine->amclusterable = true; amroutine->amclusterable = true;
amroutine->ampredlocks = true; amroutine->ampredlocks = true;
amroutine->amcanparallel = true;
amroutine->amkeytype = InvalidOid; amroutine->amkeytype = InvalidOid;
amroutine->ambuild = btbuild; amroutine->ambuild = btbuild;

View File

@ -49,6 +49,7 @@ spghandler(PG_FUNCTION_ARGS)
amroutine->amstorage = false; amroutine->amstorage = false;
amroutine->amclusterable = false; amroutine->amclusterable = false;
amroutine->ampredlocks = false; amroutine->ampredlocks = false;
amroutine->amcanparallel = false;
amroutine->amkeytype = InvalidOid; amroutine->amkeytype = InvalidOid;
amroutine->ambuild = spgbuild; amroutine->ambuild = spgbuild;

View File

@ -28,6 +28,7 @@
#include "executor/nodeCustom.h" #include "executor/nodeCustom.h"
#include "executor/nodeForeignscan.h" #include "executor/nodeForeignscan.h"
#include "executor/nodeSeqscan.h" #include "executor/nodeSeqscan.h"
#include "executor/nodeIndexscan.h"
#include "executor/tqueue.h" #include "executor/tqueue.h"
#include "nodes/nodeFuncs.h" #include "nodes/nodeFuncs.h"
#include "optimizer/planmain.h" #include "optimizer/planmain.h"
@ -197,6 +198,10 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e)
ExecSeqScanEstimate((SeqScanState *) planstate, ExecSeqScanEstimate((SeqScanState *) planstate,
e->pcxt); e->pcxt);
break; break;
case T_IndexScanState:
ExecIndexScanEstimate((IndexScanState *) planstate,
e->pcxt);
break;
case T_ForeignScanState: case T_ForeignScanState:
ExecForeignScanEstimate((ForeignScanState *) planstate, ExecForeignScanEstimate((ForeignScanState *) planstate,
e->pcxt); e->pcxt);
@ -249,6 +254,10 @@ ExecParallelInitializeDSM(PlanState *planstate,
ExecSeqScanInitializeDSM((SeqScanState *) planstate, ExecSeqScanInitializeDSM((SeqScanState *) planstate,
d->pcxt); d->pcxt);
break; break;
case T_IndexScanState:
ExecIndexScanInitializeDSM((IndexScanState *) planstate,
d->pcxt);
break;
case T_ForeignScanState: case T_ForeignScanState:
ExecForeignScanInitializeDSM((ForeignScanState *) planstate, ExecForeignScanInitializeDSM((ForeignScanState *) planstate,
d->pcxt); d->pcxt);
@ -725,6 +734,9 @@ ExecParallelInitializeWorker(PlanState *planstate, shm_toc *toc)
case T_SeqScanState: case T_SeqScanState:
ExecSeqScanInitializeWorker((SeqScanState *) planstate, toc); ExecSeqScanInitializeWorker((SeqScanState *) planstate, toc);
break; break;
case T_IndexScanState:
ExecIndexScanInitializeWorker((IndexScanState *) planstate, toc);
break;
case T_ForeignScanState: case T_ForeignScanState:
ExecForeignScanInitializeWorker((ForeignScanState *) planstate, ExecForeignScanInitializeWorker((ForeignScanState *) planstate,
toc); toc);

View File

@ -22,6 +22,9 @@
* ExecEndIndexScan releases all storage. * ExecEndIndexScan releases all storage.
* ExecIndexMarkPos marks scan position. * ExecIndexMarkPos marks scan position.
* ExecIndexRestrPos restores scan position. * ExecIndexRestrPos restores scan position.
* ExecIndexScanEstimate estimates DSM space needed for parallel index scan
* ExecIndexScanInitializeDSM initialize DSM for parallel indexscan
* ExecIndexScanInitializeWorker attach to DSM info in parallel worker
*/ */
#include "postgres.h" #include "postgres.h"
@ -514,6 +517,18 @@ ExecIndexScan(IndexScanState *node)
void void
ExecReScanIndexScan(IndexScanState *node) ExecReScanIndexScan(IndexScanState *node)
{ {
bool reset_parallel_scan = true;
/*
* If we are here to just update the scan keys, then don't reset parallel
* scan. We don't want each of the participating process in the parallel
* scan to update the shared parallel scan state at the start of the scan.
* It is quite possible that one of the participants has already begun
* scanning the index when another has yet to start it.
*/
if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
reset_parallel_scan = false;
/* /*
* If we are doing runtime key calculations (ie, any of the index key * If we are doing runtime key calculations (ie, any of the index key
* values weren't simple Consts), compute the new key values. But first, * values weren't simple Consts), compute the new key values. But first,
@ -539,10 +554,21 @@ ExecReScanIndexScan(IndexScanState *node)
reorderqueue_pop(node); reorderqueue_pop(node);
} }
/* reset index scan */ /*
index_rescan(node->iss_ScanDesc, * Reset (parallel) index scan. For parallel-aware nodes, the scan
node->iss_ScanKeys, node->iss_NumScanKeys, * descriptor is initialized during actual execution of node and we can
node->iss_OrderByKeys, node->iss_NumOrderByKeys); * reach here before that (ex. during execution of nest loop join). So,
* avoid updating the scan descriptor at that time.
*/
if (node->iss_ScanDesc)
{
index_rescan(node->iss_ScanDesc,
node->iss_ScanKeys, node->iss_NumScanKeys,
node->iss_OrderByKeys, node->iss_NumOrderByKeys);
if (reset_parallel_scan && node->iss_ScanDesc->parallel_scan)
index_parallelrescan(node->iss_ScanDesc);
}
node->iss_ReachedEnd = false; node->iss_ReachedEnd = false;
ExecScanReScan(&node->ss); ExecScanReScan(&node->ss);
@ -1013,22 +1039,29 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
} }
/* /*
* Initialize scan descriptor. * for parallel-aware node, we initialize the scan descriptor after
* initializing the shared memory for parallel execution.
*/ */
indexstate->iss_ScanDesc = index_beginscan(currentRelation, if (!node->scan.plan.parallel_aware)
indexstate->iss_RelationDesc, {
estate->es_snapshot, /*
indexstate->iss_NumScanKeys, * Initialize scan descriptor.
*/
indexstate->iss_ScanDesc = index_beginscan(currentRelation,
indexstate->iss_RelationDesc,
estate->es_snapshot,
indexstate->iss_NumScanKeys,
indexstate->iss_NumOrderByKeys); indexstate->iss_NumOrderByKeys);
/* /*
* If no run-time keys to calculate, go ahead and pass the scankeys to the * If no run-time keys to calculate, go ahead and pass the scankeys to
* index AM. * the index AM.
*/ */
if (indexstate->iss_NumRuntimeKeys == 0) if (indexstate->iss_NumRuntimeKeys == 0)
index_rescan(indexstate->iss_ScanDesc, index_rescan(indexstate->iss_ScanDesc,
indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys, indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys,
indexstate->iss_OrderByKeys, indexstate->iss_NumOrderByKeys); indexstate->iss_OrderByKeys, indexstate->iss_NumOrderByKeys);
}
/* /*
* all done. * all done.
@ -1590,3 +1623,91 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
else if (n_array_keys != 0) else if (n_array_keys != 0)
elog(ERROR, "ScalarArrayOpExpr index qual found where not allowed"); elog(ERROR, "ScalarArrayOpExpr index qual found where not allowed");
} }
/* ----------------------------------------------------------------
* Parallel Scan Support
* ----------------------------------------------------------------
*/
/* ----------------------------------------------------------------
* ExecIndexScanEstimate
*
* estimates the space required to serialize indexscan node.
* ----------------------------------------------------------------
*/
void
ExecIndexScanEstimate(IndexScanState *node,
ParallelContext *pcxt)
{
EState *estate = node->ss.ps.state;
node->iss_PscanLen = index_parallelscan_estimate(node->iss_RelationDesc,
estate->es_snapshot);
shm_toc_estimate_chunk(&pcxt->estimator, node->iss_PscanLen);
shm_toc_estimate_keys(&pcxt->estimator, 1);
}
/* ----------------------------------------------------------------
* ExecIndexScanInitializeDSM
*
* Set up a parallel index scan descriptor.
* ----------------------------------------------------------------
*/
void
ExecIndexScanInitializeDSM(IndexScanState *node,
ParallelContext *pcxt)
{
EState *estate = node->ss.ps.state;
ParallelIndexScanDesc piscan;
piscan = shm_toc_allocate(pcxt->toc, node->iss_PscanLen);
index_parallelscan_initialize(node->ss.ss_currentRelation,
node->iss_RelationDesc,
estate->es_snapshot,
piscan);
shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
node->iss_ScanDesc =
index_beginscan_parallel(node->ss.ss_currentRelation,
node->iss_RelationDesc,
node->iss_NumScanKeys,
node->iss_NumOrderByKeys,
piscan);
/*
* If no run-time keys to calculate, go ahead and pass the scankeys to the
* index AM.
*/
if (node->iss_NumRuntimeKeys == 0)
index_rescan(node->iss_ScanDesc,
node->iss_ScanKeys, node->iss_NumScanKeys,
node->iss_OrderByKeys, node->iss_NumOrderByKeys);
}
/* ----------------------------------------------------------------
* ExecIndexScanInitializeWorker
*
* Copy relevant information from TOC into planstate.
* ----------------------------------------------------------------
*/
void
ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc)
{
ParallelIndexScanDesc piscan;
piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id);
node->iss_ScanDesc =
index_beginscan_parallel(node->ss.ss_currentRelation,
node->iss_RelationDesc,
node->iss_NumScanKeys,
node->iss_NumOrderByKeys,
piscan);
/*
* If no run-time keys to calculate, go ahead and pass the scankeys to the
* index AM.
*/
if (node->iss_NumRuntimeKeys == 0)
index_rescan(node->iss_ScanDesc,
node->iss_ScanKeys, node->iss_NumScanKeys,
node->iss_OrderByKeys, node->iss_NumOrderByKeys);
}

View File

@ -127,8 +127,6 @@ static void subquery_push_qual(Query *subquery,
static void recurse_push_qual(Node *setOp, Query *topquery, static void recurse_push_qual(Node *setOp, Query *topquery,
RangeTblEntry *rte, Index rti, Node *qual); RangeTblEntry *rte, Index rti, Node *qual);
static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
static int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
BlockNumber index_pages);
/* /*
@ -2885,7 +2883,7 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
* "heap_pages" is the number of pages from the table that we expect to scan. * "heap_pages" is the number of pages from the table that we expect to scan.
* "index_pages" is the number of pages from the index that we expect to scan. * "index_pages" is the number of pages from the index that we expect to scan.
*/ */
static int int
compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages, compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
BlockNumber index_pages) BlockNumber index_pages)
{ {

View File

@ -391,7 +391,8 @@ cost_gather(GatherPath *path, PlannerInfo *root,
* we have to fetch from the table, so they don't reduce the scan cost. * we have to fetch from the table, so they don't reduce the scan cost.
*/ */
void void
cost_index(IndexPath *path, PlannerInfo *root, double loop_count) cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
bool partial_path)
{ {
IndexOptInfo *index = path->indexinfo; IndexOptInfo *index = path->indexinfo;
RelOptInfo *baserel = index->rel; RelOptInfo *baserel = index->rel;
@ -400,6 +401,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
List *qpquals; List *qpquals;
Cost startup_cost = 0; Cost startup_cost = 0;
Cost run_cost = 0; Cost run_cost = 0;
Cost cpu_run_cost = 0;
Cost indexStartupCost; Cost indexStartupCost;
Cost indexTotalCost; Cost indexTotalCost;
Selectivity indexSelectivity; Selectivity indexSelectivity;
@ -413,6 +415,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
Cost cpu_per_tuple; Cost cpu_per_tuple;
double tuples_fetched; double tuples_fetched;
double pages_fetched; double pages_fetched;
double rand_heap_pages;
double index_pages;
/* Should only be applied to base relations */ /* Should only be applied to base relations */
Assert(IsA(baserel, RelOptInfo) && Assert(IsA(baserel, RelOptInfo) &&
@ -459,7 +463,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
amcostestimate = (amcostestimate_function) index->amcostestimate; amcostestimate = (amcostestimate_function) index->amcostestimate;
amcostestimate(root, path, loop_count, amcostestimate(root, path, loop_count,
&indexStartupCost, &indexTotalCost, &indexStartupCost, &indexTotalCost,
&indexSelectivity, &indexCorrelation); &indexSelectivity, &indexCorrelation,
&index_pages);
/* /*
* Save amcostestimate's results for possible use in bitmap scan planning. * Save amcostestimate's results for possible use in bitmap scan planning.
@ -526,6 +531,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
if (indexonly) if (indexonly)
pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
rand_heap_pages = pages_fetched;
max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count; max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
/* /*
@ -564,6 +571,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
if (indexonly) if (indexonly)
pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
rand_heap_pages = pages_fetched;
/* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */ /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
max_IO_cost = pages_fetched * spc_random_page_cost; max_IO_cost = pages_fetched * spc_random_page_cost;
@ -583,6 +592,29 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
min_IO_cost = 0; min_IO_cost = 0;
} }
if (partial_path)
{
/*
* Estimate the number of parallel workers required to scan index. Use
* the number of heap pages computed considering heap fetches won't be
* sequential as for parallel scans the pages are accessed in random
* order.
*/
path->path.parallel_workers = compute_parallel_worker(baserel,
(BlockNumber) rand_heap_pages,
(BlockNumber) index_pages);
/*
* Fall out if workers can't be assigned for parallel scan, because in
* such a case this path will be rejected. So there is no benefit in
* doing extra computation.
*/
if (path->path.parallel_workers <= 0)
return;
path->path.parallel_aware = true;
}
/* /*
* Now interpolate based on estimated index order correlation to get total * Now interpolate based on estimated index order correlation to get total
* disk I/O cost for main table accesses. * disk I/O cost for main table accesses.
@ -602,11 +634,24 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
startup_cost += qpqual_cost.startup; startup_cost += qpqual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
run_cost += cpu_per_tuple * tuples_fetched; cpu_run_cost += cpu_per_tuple * tuples_fetched;
/* tlist eval costs are paid per output row, not per tuple scanned */ /* tlist eval costs are paid per output row, not per tuple scanned */
startup_cost += path->path.pathtarget->cost.startup; startup_cost += path->path.pathtarget->cost.startup;
run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; cpu_run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
/* Adjust costing for parallelism, if used. */
if (path->path.parallel_workers > 0)
{
double parallel_divisor = get_parallel_divisor(&path->path);
path->path.rows = clamp_row_est(path->path.rows / parallel_divisor);
/* The CPU cost is divided among all the workers. */
cpu_run_cost /= parallel_divisor;
}
run_cost += cpu_run_cost;
path->path.startup_cost = startup_cost; path->path.startup_cost = startup_cost;
path->path.total_cost = startup_cost + run_cost; path->path.total_cost = startup_cost + run_cost;

View File

@ -813,7 +813,7 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel,
/* /*
* build_index_paths * build_index_paths
* Given an index and a set of index clauses for it, construct zero * Given an index and a set of index clauses for it, construct zero
* or more IndexPaths. * or more IndexPaths. It also constructs zero or more partial IndexPaths.
* *
* We return a list of paths because (1) this routine checks some cases * We return a list of paths because (1) this routine checks some cases
* that should cause us to not generate any IndexPath, and (2) in some * that should cause us to not generate any IndexPath, and (2) in some
@ -1042,8 +1042,41 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
NoMovementScanDirection, NoMovementScanDirection,
index_only_scan, index_only_scan,
outer_relids, outer_relids,
loop_count); loop_count,
false);
result = lappend(result, ipath); result = lappend(result, ipath);
/*
* If appropriate, consider parallel index scan. We don't allow
* parallel index scan for bitmap or index only scans.
*/
if (index->amcanparallel && !index_only_scan &&
rel->consider_parallel && outer_relids == NULL &&
scantype != ST_BITMAPSCAN)
{
ipath = create_index_path(root, index,
index_clauses,
clause_columns,
orderbyclauses,
orderbyclausecols,
useful_pathkeys,
index_is_ordered ?
ForwardScanDirection :
NoMovementScanDirection,
index_only_scan,
outer_relids,
loop_count,
true);
/*
* if, after costing the path, we find that it's not worth
* using parallel workers, just free it.
*/
if (ipath->path.parallel_workers > 0)
add_partial_path(rel, (Path *) ipath);
else
pfree(ipath);
}
} }
/* /*
@ -1066,8 +1099,36 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
BackwardScanDirection, BackwardScanDirection,
index_only_scan, index_only_scan,
outer_relids, outer_relids,
loop_count); loop_count,
false);
result = lappend(result, ipath); result = lappend(result, ipath);
/* If appropriate, consider parallel index scan */
if (index->amcanparallel && !index_only_scan &&
rel->consider_parallel && outer_relids == NULL &&
scantype != ST_BITMAPSCAN)
{
ipath = create_index_path(root, index,
index_clauses,
clause_columns,
NIL,
NIL,
useful_pathkeys,
BackwardScanDirection,
index_only_scan,
outer_relids,
loop_count,
true);
/*
* if, after costing the path, we find that it's not worth
* using parallel workers, just free it.
*/
if (ipath->path.parallel_workers > 0)
add_partial_path(rel, (Path *) ipath);
else
pfree(ipath);
}
} }
} }

View File

@ -5333,7 +5333,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
indexScanPath = create_index_path(root, indexInfo, indexScanPath = create_index_path(root, indexInfo,
NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL, NIL,
ForwardScanDirection, false, ForwardScanDirection, false,
NULL, 1.0); NULL, 1.0, false);
return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost); return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
} }

View File

@ -744,10 +744,9 @@ add_path_precheck(RelOptInfo *parent_rel,
* As with add_path, we pfree paths that are found to be dominated by * As with add_path, we pfree paths that are found to be dominated by
* another partial path; this requires that there be no other references to * another partial path; this requires that there be no other references to
* such paths yet. Hence, GatherPaths must not be created for a rel until * such paths yet. Hence, GatherPaths must not be created for a rel until
* we're done creating all partial paths for it. We do not currently build * we're done creating all partial paths for it. Unlike add_path, we don't
* partial indexscan paths, so there is no need for an exception for * take an exception for IndexPaths as partial index paths won't be
* IndexPaths here; for safety, we instead Assert that a path to be freed * referenced by partial BitmapHeapPaths.
* isn't an IndexPath.
*/ */
void void
add_partial_path(RelOptInfo *parent_rel, Path *new_path) add_partial_path(RelOptInfo *parent_rel, Path *new_path)
@ -826,8 +825,6 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
{ {
parent_rel->partial_pathlist = parent_rel->partial_pathlist =
list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev); list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev);
/* we should not see IndexPaths here, so always safe to delete */
Assert(!IsA(old_path, IndexPath));
pfree(old_path); pfree(old_path);
/* p1_prev does not advance */ /* p1_prev does not advance */
} }
@ -860,8 +857,6 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
} }
else else
{ {
/* we should not see IndexPaths here, so always safe to delete */
Assert(!IsA(new_path, IndexPath));
/* Reject and recycle the new path */ /* Reject and recycle the new path */
pfree(new_path); pfree(new_path);
} }
@ -1005,6 +1000,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer
* 'required_outer' is the set of outer relids for a parameterized path. * 'required_outer' is the set of outer relids for a parameterized path.
* 'loop_count' is the number of repetitions of the indexscan to factor into * 'loop_count' is the number of repetitions of the indexscan to factor into
* estimates of caching behavior. * estimates of caching behavior.
* 'partial_path' is true if constructing a parallel index scan path.
* *
* Returns the new path node. * Returns the new path node.
*/ */
@ -1019,7 +1015,8 @@ create_index_path(PlannerInfo *root,
ScanDirection indexscandir, ScanDirection indexscandir,
bool indexonly, bool indexonly,
Relids required_outer, Relids required_outer,
double loop_count) double loop_count,
bool partial_path)
{ {
IndexPath *pathnode = makeNode(IndexPath); IndexPath *pathnode = makeNode(IndexPath);
RelOptInfo *rel = index->rel; RelOptInfo *rel = index->rel;
@ -1049,7 +1046,7 @@ create_index_path(PlannerInfo *root,
pathnode->indexorderbycols = indexorderbycols; pathnode->indexorderbycols = indexorderbycols;
pathnode->indexscandir = indexscandir; pathnode->indexscandir = indexscandir;
cost_index(pathnode, root, loop_count); cost_index(pathnode, root, loop_count, partial_path);
return pathnode; return pathnode;
} }
@ -3247,7 +3244,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
memcpy(newpath, ipath, sizeof(IndexPath)); memcpy(newpath, ipath, sizeof(IndexPath));
newpath->path.param_info = newpath->path.param_info =
get_baserel_parampathinfo(root, rel, required_outer); get_baserel_parampathinfo(root, rel, required_outer);
cost_index(newpath, root, loop_count); cost_index(newpath, root, loop_count, false);
return (Path *) newpath; return (Path *) newpath;
} }
case T_BitmapHeapScan: case T_BitmapHeapScan:

View File

@ -241,6 +241,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
info->amoptionalkey = amroutine->amoptionalkey; info->amoptionalkey = amroutine->amoptionalkey;
info->amsearcharray = amroutine->amsearcharray; info->amsearcharray = amroutine->amsearcharray;
info->amsearchnulls = amroutine->amsearchnulls; info->amsearchnulls = amroutine->amsearchnulls;
info->amcanparallel = amroutine->amcanparallel;
info->amhasgettuple = (amroutine->amgettuple != NULL); info->amhasgettuple = (amroutine->amgettuple != NULL);
info->amhasgetbitmap = (amroutine->amgetbitmap != NULL); info->amhasgetbitmap = (amroutine->amgetbitmap != NULL);
info->amcostestimate = amroutine->amcostestimate; info->amcostestimate = amroutine->amcostestimate;

View File

@ -6471,7 +6471,8 @@ add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
void void
btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost, Cost *indexStartupCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, double *indexCorrelation) Selectivity *indexSelectivity, double *indexCorrelation,
double *indexPages)
{ {
IndexOptInfo *index = path->indexinfo; IndexOptInfo *index = path->indexinfo;
List *qinfos; List *qinfos;
@ -6761,12 +6762,14 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexTotalCost = costs.indexTotalCost; *indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity; *indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation; *indexCorrelation = costs.indexCorrelation;
*indexPages = costs.numIndexPages;
} }
void void
hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost, Cost *indexStartupCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, double *indexCorrelation) Selectivity *indexSelectivity, double *indexCorrelation,
double *indexPages)
{ {
List *qinfos; List *qinfos;
GenericCosts costs; GenericCosts costs;
@ -6807,12 +6810,14 @@ hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexTotalCost = costs.indexTotalCost; *indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity; *indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation; *indexCorrelation = costs.indexCorrelation;
*indexPages = costs.numIndexPages;
} }
void void
gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost, Cost *indexStartupCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, double *indexCorrelation) Selectivity *indexSelectivity, double *indexCorrelation,
double *indexPages)
{ {
IndexOptInfo *index = path->indexinfo; IndexOptInfo *index = path->indexinfo;
List *qinfos; List *qinfos;
@ -6866,12 +6871,14 @@ gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexTotalCost = costs.indexTotalCost; *indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity; *indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation; *indexCorrelation = costs.indexCorrelation;
*indexPages = costs.numIndexPages;
} }
void void
spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost, Cost *indexStartupCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, double *indexCorrelation) Selectivity *indexSelectivity, double *indexCorrelation,
double *indexPages)
{ {
IndexOptInfo *index = path->indexinfo; IndexOptInfo *index = path->indexinfo;
List *qinfos; List *qinfos;
@ -6925,6 +6932,7 @@ spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexTotalCost = costs.indexTotalCost; *indexTotalCost = costs.indexTotalCost;
*indexSelectivity = costs.indexSelectivity; *indexSelectivity = costs.indexSelectivity;
*indexCorrelation = costs.indexCorrelation; *indexCorrelation = costs.indexCorrelation;
*indexPages = costs.numIndexPages;
} }
@ -7222,7 +7230,8 @@ gincost_scalararrayopexpr(PlannerInfo *root,
void void
gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost, Cost *indexStartupCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, double *indexCorrelation) Selectivity *indexSelectivity, double *indexCorrelation,
double *indexPages)
{ {
IndexOptInfo *index = path->indexinfo; IndexOptInfo *index = path->indexinfo;
List *indexQuals = path->indexquals; List *indexQuals = path->indexquals;
@ -7537,6 +7546,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexStartupCost += qual_arg_cost; *indexStartupCost += qual_arg_cost;
*indexTotalCost += qual_arg_cost; *indexTotalCost += qual_arg_cost;
*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost); *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
*indexPages = dataPagesFetched;
} }
/* /*
@ -7545,7 +7555,8 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
void void
brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost, Cost *indexStartupCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, double *indexCorrelation) Selectivity *indexSelectivity, double *indexCorrelation,
double *indexPages)
{ {
IndexOptInfo *index = path->indexinfo; IndexOptInfo *index = path->indexinfo;
List *indexQuals = path->indexquals; List *indexQuals = path->indexquals;
@ -7597,6 +7608,7 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexStartupCost += qual_arg_cost; *indexStartupCost += qual_arg_cost;
*indexTotalCost += qual_arg_cost; *indexTotalCost += qual_arg_cost;
*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost); *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
*indexPages = index->pages;
/* XXX what about pages_per_range? */ /* XXX what about pages_per_range? */
} }

View File

@ -95,7 +95,8 @@ typedef void (*amcostestimate_function) (struct PlannerInfo *root,
Cost *indexStartupCost, Cost *indexStartupCost,
Cost *indexTotalCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, Selectivity *indexSelectivity,
double *indexCorrelation); double *indexCorrelation,
double *indexPages);
/* parse index reloptions */ /* parse index reloptions */
typedef bytea *(*amoptions_function) (Datum reloptions, typedef bytea *(*amoptions_function) (Datum reloptions,
@ -188,6 +189,8 @@ typedef struct IndexAmRoutine
bool amclusterable; bool amclusterable;
/* does AM handle predicate locks? */ /* does AM handle predicate locks? */
bool ampredlocks; bool ampredlocks;
/* does AM support parallel scan? */
bool amcanparallel;
/* type of data stored in index, or InvalidOid if variable */ /* type of data stored in index, or InvalidOid if variable */
Oid amkeytype; Oid amkeytype;

View File

@ -14,6 +14,7 @@
#ifndef NODEINDEXSCAN_H #ifndef NODEINDEXSCAN_H
#define NODEINDEXSCAN_H #define NODEINDEXSCAN_H
#include "access/parallel.h"
#include "nodes/execnodes.h" #include "nodes/execnodes.h"
extern IndexScanState *ExecInitIndexScan(IndexScan *node, EState *estate, int eflags); extern IndexScanState *ExecInitIndexScan(IndexScan *node, EState *estate, int eflags);
@ -22,6 +23,9 @@ extern void ExecEndIndexScan(IndexScanState *node);
extern void ExecIndexMarkPos(IndexScanState *node); extern void ExecIndexMarkPos(IndexScanState *node);
extern void ExecIndexRestrPos(IndexScanState *node); extern void ExecIndexRestrPos(IndexScanState *node);
extern void ExecReScanIndexScan(IndexScanState *node); extern void ExecReScanIndexScan(IndexScanState *node);
extern void ExecIndexScanEstimate(IndexScanState *node, ParallelContext *pcxt);
extern void ExecIndexScanInitializeDSM(IndexScanState *node, ParallelContext *pcxt);
extern void ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc);
/* /*
* These routines are exported to share code with nodeIndexonlyscan.c and * These routines are exported to share code with nodeIndexonlyscan.c and

View File

@ -1363,6 +1363,7 @@ typedef struct
* SortSupport for reordering ORDER BY exprs * SortSupport for reordering ORDER BY exprs
* OrderByTypByVals is the datatype of order by expression pass-by-value? * OrderByTypByVals is the datatype of order by expression pass-by-value?
* OrderByTypLens typlens of the datatypes of order by expressions * OrderByTypLens typlens of the datatypes of order by expressions
* pscan_len size of parallel index scan descriptor
* ---------------- * ----------------
*/ */
typedef struct IndexScanState typedef struct IndexScanState
@ -1389,6 +1390,7 @@ typedef struct IndexScanState
SortSupport iss_SortSupport; SortSupport iss_SortSupport;
bool *iss_OrderByTypByVals; bool *iss_OrderByTypByVals;
int16 *iss_OrderByTypLens; int16 *iss_OrderByTypLens;
Size iss_PscanLen;
} IndexScanState; } IndexScanState;
/* ---------------- /* ----------------

View File

@ -629,6 +629,7 @@ typedef struct IndexOptInfo
bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */ bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */
bool amhasgettuple; /* does AM have amgettuple interface? */ bool amhasgettuple; /* does AM have amgettuple interface? */
bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ bool amhasgetbitmap; /* does AM have amgetbitmap interface? */
bool amcanparallel; /* does AM support parallel scan? */
/* Rather than include amapi.h here, we declare amcostestimate like this */ /* Rather than include amapi.h here, we declare amcostestimate like this */
void (*amcostestimate) (); /* AM's cost estimator */ void (*amcostestimate) (); /* AM's cost estimator */
} IndexOptInfo; } IndexOptInfo;

View File

@ -76,7 +76,7 @@ extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel, extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
ParamPathInfo *param_info); ParamPathInfo *param_info);
extern void cost_index(IndexPath *path, PlannerInfo *root, extern void cost_index(IndexPath *path, PlannerInfo *root,
double loop_count); double loop_count, bool partial_path);
extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
ParamPathInfo *param_info, ParamPathInfo *param_info,
Path *bitmapqual, double loop_count); Path *bitmapqual, double loop_count);

View File

@ -47,7 +47,8 @@ extern IndexPath *create_index_path(PlannerInfo *root,
ScanDirection indexscandir, ScanDirection indexscandir,
bool indexonly, bool indexonly,
Relids required_outer, Relids required_outer,
double loop_count); double loop_count,
bool partial_path);
extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root, extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root,
RelOptInfo *rel, RelOptInfo *rel,
Path *bitmapqual, Path *bitmapqual,

View File

@ -54,6 +54,8 @@ extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed,
List *initial_rels); List *initial_rels);
extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel); extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel);
extern int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
BlockNumber index_pages);
#ifdef OPTIMIZER_DEBUG #ifdef OPTIMIZER_DEBUG
extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel); extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);

View File

@ -28,41 +28,47 @@ extern void brincostestimate(struct PlannerInfo *root,
Cost *indexStartupCost, Cost *indexStartupCost,
Cost *indexTotalCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, Selectivity *indexSelectivity,
double *indexCorrelation); double *indexCorrelation,
double *indexPages);
extern void btcostestimate(struct PlannerInfo *root, extern void btcostestimate(struct PlannerInfo *root,
struct IndexPath *path, struct IndexPath *path,
double loop_count, double loop_count,
Cost *indexStartupCost, Cost *indexStartupCost,
Cost *indexTotalCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, Selectivity *indexSelectivity,
double *indexCorrelation); double *indexCorrelation,
double *indexPages);
extern void hashcostestimate(struct PlannerInfo *root, extern void hashcostestimate(struct PlannerInfo *root,
struct IndexPath *path, struct IndexPath *path,
double loop_count, double loop_count,
Cost *indexStartupCost, Cost *indexStartupCost,
Cost *indexTotalCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, Selectivity *indexSelectivity,
double *indexCorrelation); double *indexCorrelation,
double *indexPages);
extern void gistcostestimate(struct PlannerInfo *root, extern void gistcostestimate(struct PlannerInfo *root,
struct IndexPath *path, struct IndexPath *path,
double loop_count, double loop_count,
Cost *indexStartupCost, Cost *indexStartupCost,
Cost *indexTotalCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, Selectivity *indexSelectivity,
double *indexCorrelation); double *indexCorrelation,
double *indexPages);
extern void spgcostestimate(struct PlannerInfo *root, extern void spgcostestimate(struct PlannerInfo *root,
struct IndexPath *path, struct IndexPath *path,
double loop_count, double loop_count,
Cost *indexStartupCost, Cost *indexStartupCost,
Cost *indexTotalCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, Selectivity *indexSelectivity,
double *indexCorrelation); double *indexCorrelation,
double *indexPages);
extern void gincostestimate(struct PlannerInfo *root, extern void gincostestimate(struct PlannerInfo *root,
struct IndexPath *path, struct IndexPath *path,
double loop_count, double loop_count,
Cost *indexStartupCost, Cost *indexStartupCost,
Cost *indexTotalCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, Selectivity *indexSelectivity,
double *indexCorrelation); double *indexCorrelation,
double *indexPages);
#endif /* INDEX_SELFUNCS_H */ #endif /* INDEX_SELFUNCS_H */

View File

@ -125,6 +125,29 @@ select count(*) from tenk1 where (two, four) not in
(1 row) (1 row)
alter table tenk2 reset (parallel_workers); alter table tenk2 reset (parallel_workers);
-- test parallel index scans.
set enable_seqscan to off;
set enable_bitmapscan to off;
explain (costs off)
select count((unique1)) from tenk1 where hundred > 1;
QUERY PLAN
--------------------------------------------------------------------
Finalize Aggregate
-> Gather
Workers Planned: 4
-> Partial Aggregate
-> Parallel Index Scan using tenk1_hundred on tenk1
Index Cond: (hundred > 1)
(6 rows)
select count((unique1)) from tenk1 where hundred > 1;
count
-------
9800
(1 row)
reset enable_seqscan;
reset enable_bitmapscan;
set force_parallel_mode=1; set force_parallel_mode=1;
explain (costs off) explain (costs off)
select stringu1::int2 from tenk1 where unique1 = 1; select stringu1::int2 from tenk1 where unique1 = 1;

View File

@ -48,6 +48,17 @@ select count(*) from tenk1 where (two, four) not in
(select hundred, thousand from tenk2 where thousand > 100); (select hundred, thousand from tenk2 where thousand > 100);
alter table tenk2 reset (parallel_workers); alter table tenk2 reset (parallel_workers);
-- test parallel index scans.
set enable_seqscan to off;
set enable_bitmapscan to off;
explain (costs off)
select count((unique1)) from tenk1 where hundred > 1;
select count((unique1)) from tenk1 where hundred > 1;
reset enable_seqscan;
reset enable_bitmapscan;
set force_parallel_mode=1; set force_parallel_mode=1;
explain (costs off) explain (costs off)