diff --git a/contrib/bloom/blcost.c b/contrib/bloom/blcost.c index 98a2228edf..ba39f627fd 100644 --- a/contrib/bloom/blcost.c +++ b/contrib/bloom/blcost.c @@ -24,7 +24,8 @@ void blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, - Selectivity *indexSelectivity, double *indexCorrelation) + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) { IndexOptInfo *index = path->indexinfo; List *qinfos; @@ -45,4 +46,5 @@ blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; *indexCorrelation = costs.indexCorrelation; + *indexPages = costs.numIndexPages; } diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h index 39d8d05c5d..0cfe49aad8 100644 --- a/contrib/bloom/bloom.h +++ b/contrib/bloom/bloom.h @@ -208,6 +208,6 @@ extern bytea *bloptions(Datum reloptions, bool validate); extern void blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, - double *indexCorrelation); + double *indexCorrelation, double *indexPages); #endif diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index 858798db85..f2eda67e0a 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -119,6 +119,7 @@ blhandler(PG_FUNCTION_ARGS) amroutine->amstorage = false; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanparallel = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = blbuild; diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index 9afd7f6417..401b11598e 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -110,6 +110,8 @@ typedef struct IndexAmRoutine bool amclusterable; /* does AM handle predicate locks? */ bool ampredlocks; + /* does AM support parallel scan? */ + bool amcanparallel; /* type of data stored in index, or InvalidOid if variable */ Oid amkeytype; diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 4ff046b4b0..b22563bf7c 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -93,6 +93,7 @@ brinhandler(PG_FUNCTION_ARGS) amroutine->amstorage = true; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanparallel = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = brinbuild; diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index a98d4fc397..d03d59da6a 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -50,6 +50,7 @@ ginhandler(PG_FUNCTION_ARGS) amroutine->amstorage = true; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanparallel = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = ginbuild; diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 96ead531ea..6593771361 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -71,6 +71,7 @@ gisthandler(PG_FUNCTION_ARGS) amroutine->amstorage = true; amroutine->amclusterable = true; amroutine->ampredlocks = false; + amroutine->amcanparallel = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = gistbuild; diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index bca77a80c3..24510e78f5 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -67,6 +67,7 @@ hashhandler(PG_FUNCTION_ARGS) amroutine->amstorage = false; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanparallel = false; amroutine->amkeytype = INT4OID; amroutine->ambuild = hashbuild; diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index cbc575d5cf..775f2ff1f8 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -140,6 +140,7 @@ bthandler(PG_FUNCTION_ARGS) amroutine->amstorage = false; amroutine->amclusterable = true; amroutine->ampredlocks = true; + amroutine->amcanparallel = true; amroutine->amkeytype = InvalidOid; amroutine->ambuild = btbuild; diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index 78846bec66..e57ac49c6b 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -49,6 +49,7 @@ spghandler(PG_FUNCTION_ARGS) amroutine->amstorage = false; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanparallel = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = spgbuild; diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 784dbaf590..98d4f1eca7 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -28,6 +28,7 @@ #include "executor/nodeCustom.h" #include "executor/nodeForeignscan.h" #include "executor/nodeSeqscan.h" +#include "executor/nodeIndexscan.h" #include "executor/tqueue.h" #include "nodes/nodeFuncs.h" #include "optimizer/planmain.h" @@ -197,6 +198,10 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e) ExecSeqScanEstimate((SeqScanState *) planstate, e->pcxt); break; + case T_IndexScanState: + ExecIndexScanEstimate((IndexScanState *) planstate, + e->pcxt); + break; case T_ForeignScanState: ExecForeignScanEstimate((ForeignScanState *) planstate, e->pcxt); @@ -249,6 +254,10 @@ ExecParallelInitializeDSM(PlanState *planstate, ExecSeqScanInitializeDSM((SeqScanState *) planstate, d->pcxt); break; + case T_IndexScanState: + ExecIndexScanInitializeDSM((IndexScanState *) planstate, + d->pcxt); + break; case T_ForeignScanState: ExecForeignScanInitializeDSM((ForeignScanState *) planstate, d->pcxt); @@ -725,6 +734,9 @@ ExecParallelInitializeWorker(PlanState *planstate, shm_toc *toc) case T_SeqScanState: ExecSeqScanInitializeWorker((SeqScanState *) planstate, toc); break; + case T_IndexScanState: + ExecIndexScanInitializeWorker((IndexScanState *) planstate, toc); + break; case T_ForeignScanState: ExecForeignScanInitializeWorker((ForeignScanState *) planstate, toc); diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 5734550d2c..0a9dfdbaf3 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -22,6 +22,9 @@ * ExecEndIndexScan releases all storage. * ExecIndexMarkPos marks scan position. * ExecIndexRestrPos restores scan position. + * ExecIndexScanEstimate estimates DSM space needed for parallel index scan + * ExecIndexScanInitializeDSM initialize DSM for parallel indexscan + * ExecIndexScanInitializeWorker attach to DSM info in parallel worker */ #include "postgres.h" @@ -514,6 +517,18 @@ ExecIndexScan(IndexScanState *node) void ExecReScanIndexScan(IndexScanState *node) { + bool reset_parallel_scan = true; + + /* + * If we are here to just update the scan keys, then don't reset parallel + * scan. We don't want each of the participating process in the parallel + * scan to update the shared parallel scan state at the start of the scan. + * It is quite possible that one of the participants has already begun + * scanning the index when another has yet to start it. + */ + if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady) + reset_parallel_scan = false; + /* * If we are doing runtime key calculations (ie, any of the index key * values weren't simple Consts), compute the new key values. But first, @@ -539,10 +554,21 @@ ExecReScanIndexScan(IndexScanState *node) reorderqueue_pop(node); } - /* reset index scan */ - index_rescan(node->iss_ScanDesc, - node->iss_ScanKeys, node->iss_NumScanKeys, - node->iss_OrderByKeys, node->iss_NumOrderByKeys); + /* + * Reset (parallel) index scan. For parallel-aware nodes, the scan + * descriptor is initialized during actual execution of node and we can + * reach here before that (ex. during execution of nest loop join). So, + * avoid updating the scan descriptor at that time. + */ + if (node->iss_ScanDesc) + { + index_rescan(node->iss_ScanDesc, + node->iss_ScanKeys, node->iss_NumScanKeys, + node->iss_OrderByKeys, node->iss_NumOrderByKeys); + + if (reset_parallel_scan && node->iss_ScanDesc->parallel_scan) + index_parallelrescan(node->iss_ScanDesc); + } node->iss_ReachedEnd = false; ExecScanReScan(&node->ss); @@ -1013,22 +1039,29 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) } /* - * Initialize scan descriptor. + * for parallel-aware node, we initialize the scan descriptor after + * initializing the shared memory for parallel execution. */ - indexstate->iss_ScanDesc = index_beginscan(currentRelation, - indexstate->iss_RelationDesc, - estate->es_snapshot, - indexstate->iss_NumScanKeys, + if (!node->scan.plan.parallel_aware) + { + /* + * Initialize scan descriptor. + */ + indexstate->iss_ScanDesc = index_beginscan(currentRelation, + indexstate->iss_RelationDesc, + estate->es_snapshot, + indexstate->iss_NumScanKeys, indexstate->iss_NumOrderByKeys); - /* - * If no run-time keys to calculate, go ahead and pass the scankeys to the - * index AM. - */ - if (indexstate->iss_NumRuntimeKeys == 0) - index_rescan(indexstate->iss_ScanDesc, - indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys, + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to + * the index AM. + */ + if (indexstate->iss_NumRuntimeKeys == 0) + index_rescan(indexstate->iss_ScanDesc, + indexstate->iss_ScanKeys, indexstate->iss_NumScanKeys, indexstate->iss_OrderByKeys, indexstate->iss_NumOrderByKeys); + } /* * all done. @@ -1590,3 +1623,91 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, else if (n_array_keys != 0) elog(ERROR, "ScalarArrayOpExpr index qual found where not allowed"); } + +/* ---------------------------------------------------------------- + * Parallel Scan Support + * ---------------------------------------------------------------- + */ + +/* ---------------------------------------------------------------- + * ExecIndexScanEstimate + * + * estimates the space required to serialize indexscan node. + * ---------------------------------------------------------------- + */ +void +ExecIndexScanEstimate(IndexScanState *node, + ParallelContext *pcxt) +{ + EState *estate = node->ss.ps.state; + + node->iss_PscanLen = index_parallelscan_estimate(node->iss_RelationDesc, + estate->es_snapshot); + shm_toc_estimate_chunk(&pcxt->estimator, node->iss_PscanLen); + shm_toc_estimate_keys(&pcxt->estimator, 1); +} + +/* ---------------------------------------------------------------- + * ExecIndexScanInitializeDSM + * + * Set up a parallel index scan descriptor. + * ---------------------------------------------------------------- + */ +void +ExecIndexScanInitializeDSM(IndexScanState *node, + ParallelContext *pcxt) +{ + EState *estate = node->ss.ps.state; + ParallelIndexScanDesc piscan; + + piscan = shm_toc_allocate(pcxt->toc, node->iss_PscanLen); + index_parallelscan_initialize(node->ss.ss_currentRelation, + node->iss_RelationDesc, + estate->es_snapshot, + piscan); + shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan); + node->iss_ScanDesc = + index_beginscan_parallel(node->ss.ss_currentRelation, + node->iss_RelationDesc, + node->iss_NumScanKeys, + node->iss_NumOrderByKeys, + piscan); + + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to the + * index AM. + */ + if (node->iss_NumRuntimeKeys == 0) + index_rescan(node->iss_ScanDesc, + node->iss_ScanKeys, node->iss_NumScanKeys, + node->iss_OrderByKeys, node->iss_NumOrderByKeys); +} + +/* ---------------------------------------------------------------- + * ExecIndexScanInitializeWorker + * + * Copy relevant information from TOC into planstate. + * ---------------------------------------------------------------- + */ +void +ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc) +{ + ParallelIndexScanDesc piscan; + + piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id); + node->iss_ScanDesc = + index_beginscan_parallel(node->ss.ss_currentRelation, + node->iss_RelationDesc, + node->iss_NumScanKeys, + node->iss_NumOrderByKeys, + piscan); + + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to the + * index AM. + */ + if (node->iss_NumRuntimeKeys == 0) + index_rescan(node->iss_ScanDesc, + node->iss_ScanKeys, node->iss_NumScanKeys, + node->iss_OrderByKeys, node->iss_NumOrderByKeys); +} diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 85505c57d3..eeacf815e3 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -127,8 +127,6 @@ static void subquery_push_qual(Query *subquery, static void recurse_push_qual(Node *setOp, Query *topquery, RangeTblEntry *rte, Index rti, Node *qual); static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); -static int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages, - BlockNumber index_pages); /* @@ -2885,7 +2883,7 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel) * "heap_pages" is the number of pages from the table that we expect to scan. * "index_pages" is the number of pages from the index that we expect to scan. */ -static int +int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages, BlockNumber index_pages) { diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index a43daa744c..d01630f8db 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -391,7 +391,8 @@ cost_gather(GatherPath *path, PlannerInfo *root, * we have to fetch from the table, so they don't reduce the scan cost. */ void -cost_index(IndexPath *path, PlannerInfo *root, double loop_count) +cost_index(IndexPath *path, PlannerInfo *root, double loop_count, + bool partial_path) { IndexOptInfo *index = path->indexinfo; RelOptInfo *baserel = index->rel; @@ -400,6 +401,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) List *qpquals; Cost startup_cost = 0; Cost run_cost = 0; + Cost cpu_run_cost = 0; Cost indexStartupCost; Cost indexTotalCost; Selectivity indexSelectivity; @@ -413,6 +415,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) Cost cpu_per_tuple; double tuples_fetched; double pages_fetched; + double rand_heap_pages; + double index_pages; /* Should only be applied to base relations */ Assert(IsA(baserel, RelOptInfo) && @@ -459,7 +463,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) amcostestimate = (amcostestimate_function) index->amcostestimate; amcostestimate(root, path, loop_count, &indexStartupCost, &indexTotalCost, - &indexSelectivity, &indexCorrelation); + &indexSelectivity, &indexCorrelation, + &index_pages); /* * Save amcostestimate's results for possible use in bitmap scan planning. @@ -526,6 +531,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) if (indexonly) pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); + rand_heap_pages = pages_fetched; + max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count; /* @@ -564,6 +571,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) if (indexonly) pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); + rand_heap_pages = pages_fetched; + /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */ max_IO_cost = pages_fetched * spc_random_page_cost; @@ -583,6 +592,29 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) min_IO_cost = 0; } + if (partial_path) + { + /* + * Estimate the number of parallel workers required to scan index. Use + * the number of heap pages computed considering heap fetches won't be + * sequential as for parallel scans the pages are accessed in random + * order. + */ + path->path.parallel_workers = compute_parallel_worker(baserel, + (BlockNumber) rand_heap_pages, + (BlockNumber) index_pages); + + /* + * Fall out if workers can't be assigned for parallel scan, because in + * such a case this path will be rejected. So there is no benefit in + * doing extra computation. + */ + if (path->path.parallel_workers <= 0) + return; + + path->path.parallel_aware = true; + } + /* * Now interpolate based on estimated index order correlation to get total * disk I/O cost for main table accesses. @@ -602,11 +634,24 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) startup_cost += qpqual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; - run_cost += cpu_per_tuple * tuples_fetched; + cpu_run_cost += cpu_per_tuple * tuples_fetched; /* tlist eval costs are paid per output row, not per tuple scanned */ startup_cost += path->path.pathtarget->cost.startup; - run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; + cpu_run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; + + /* Adjust costing for parallelism, if used. */ + if (path->path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->path); + + path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); + + /* The CPU cost is divided among all the workers. */ + cpu_run_cost /= parallel_divisor; + } + + run_cost += cpu_run_cost; path->path.startup_cost = startup_cost; path->path.total_cost = startup_cost + run_cost; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 5283468988..56eccafd7b 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -813,7 +813,7 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel, /* * build_index_paths * Given an index and a set of index clauses for it, construct zero - * or more IndexPaths. + * or more IndexPaths. It also constructs zero or more partial IndexPaths. * * We return a list of paths because (1) this routine checks some cases * that should cause us to not generate any IndexPath, and (2) in some @@ -1042,8 +1042,41 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, NoMovementScanDirection, index_only_scan, outer_relids, - loop_count); + loop_count, + false); result = lappend(result, ipath); + + /* + * If appropriate, consider parallel index scan. We don't allow + * parallel index scan for bitmap or index only scans. + */ + if (index->amcanparallel && !index_only_scan && + rel->consider_parallel && outer_relids == NULL && + scantype != ST_BITMAPSCAN) + { + ipath = create_index_path(root, index, + index_clauses, + clause_columns, + orderbyclauses, + orderbyclausecols, + useful_pathkeys, + index_is_ordered ? + ForwardScanDirection : + NoMovementScanDirection, + index_only_scan, + outer_relids, + loop_count, + true); + + /* + * if, after costing the path, we find that it's not worth + * using parallel workers, just free it. + */ + if (ipath->path.parallel_workers > 0) + add_partial_path(rel, (Path *) ipath); + else + pfree(ipath); + } } /* @@ -1066,8 +1099,36 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, BackwardScanDirection, index_only_scan, outer_relids, - loop_count); + loop_count, + false); result = lappend(result, ipath); + + /* If appropriate, consider parallel index scan */ + if (index->amcanparallel && !index_only_scan && + rel->consider_parallel && outer_relids == NULL && + scantype != ST_BITMAPSCAN) + { + ipath = create_index_path(root, index, + index_clauses, + clause_columns, + NIL, + NIL, + useful_pathkeys, + BackwardScanDirection, + index_only_scan, + outer_relids, + loop_count, + true); + + /* + * if, after costing the path, we find that it's not worth + * using parallel workers, just free it. + */ + if (ipath->path.parallel_workers > 0) + add_partial_path(rel, (Path *) ipath); + else + pfree(ipath); + } } } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index abb4f12cea..3d33d46971 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -5333,7 +5333,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) indexScanPath = create_index_path(root, indexInfo, NIL, NIL, NIL, NIL, NIL, ForwardScanDirection, false, - NULL, 1.0); + NULL, 1.0, false); return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost); } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index f440875ceb..324829690d 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -744,10 +744,9 @@ add_path_precheck(RelOptInfo *parent_rel, * As with add_path, we pfree paths that are found to be dominated by * another partial path; this requires that there be no other references to * such paths yet. Hence, GatherPaths must not be created for a rel until - * we're done creating all partial paths for it. We do not currently build - * partial indexscan paths, so there is no need for an exception for - * IndexPaths here; for safety, we instead Assert that a path to be freed - * isn't an IndexPath. + * we're done creating all partial paths for it. Unlike add_path, we don't + * take an exception for IndexPaths as partial index paths won't be + * referenced by partial BitmapHeapPaths. */ void add_partial_path(RelOptInfo *parent_rel, Path *new_path) @@ -826,8 +825,6 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path) { parent_rel->partial_pathlist = list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev); - /* we should not see IndexPaths here, so always safe to delete */ - Assert(!IsA(old_path, IndexPath)); pfree(old_path); /* p1_prev does not advance */ } @@ -860,8 +857,6 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path) } else { - /* we should not see IndexPaths here, so always safe to delete */ - Assert(!IsA(new_path, IndexPath)); /* Reject and recycle the new path */ pfree(new_path); } @@ -1005,6 +1000,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer * 'required_outer' is the set of outer relids for a parameterized path. * 'loop_count' is the number of repetitions of the indexscan to factor into * estimates of caching behavior. + * 'partial_path' is true if constructing a parallel index scan path. * * Returns the new path node. */ @@ -1019,7 +1015,8 @@ create_index_path(PlannerInfo *root, ScanDirection indexscandir, bool indexonly, Relids required_outer, - double loop_count) + double loop_count, + bool partial_path) { IndexPath *pathnode = makeNode(IndexPath); RelOptInfo *rel = index->rel; @@ -1049,7 +1046,7 @@ create_index_path(PlannerInfo *root, pathnode->indexorderbycols = indexorderbycols; pathnode->indexscandir = indexscandir; - cost_index(pathnode, root, loop_count); + cost_index(pathnode, root, loop_count, partial_path); return pathnode; } @@ -3247,7 +3244,7 @@ reparameterize_path(PlannerInfo *root, Path *path, memcpy(newpath, ipath, sizeof(IndexPath)); newpath->path.param_info = get_baserel_parampathinfo(root, rel, required_outer); - cost_index(newpath, root, loop_count); + cost_index(newpath, root, loop_count, false); return (Path *) newpath; } case T_BitmapHeapScan: diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 7836e6b3f8..4ed27054a1 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -241,6 +241,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, info->amoptionalkey = amroutine->amoptionalkey; info->amsearcharray = amroutine->amsearcharray; info->amsearchnulls = amroutine->amsearchnulls; + info->amcanparallel = amroutine->amcanparallel; info->amhasgettuple = (amroutine->amgettuple != NULL); info->amhasgetbitmap = (amroutine->amgetbitmap != NULL); info->amcostestimate = amroutine->amcostestimate; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index fa32e9eabe..d14f0f97a8 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -6471,7 +6471,8 @@ add_predicate_to_quals(IndexOptInfo *index, List *indexQuals) void btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, - Selectivity *indexSelectivity, double *indexCorrelation) + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) { IndexOptInfo *index = path->indexinfo; List *qinfos; @@ -6761,12 +6762,14 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; *indexCorrelation = costs.indexCorrelation; + *indexPages = costs.numIndexPages; } void hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, - Selectivity *indexSelectivity, double *indexCorrelation) + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) { List *qinfos; GenericCosts costs; @@ -6807,12 +6810,14 @@ hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; *indexCorrelation = costs.indexCorrelation; + *indexPages = costs.numIndexPages; } void gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, - Selectivity *indexSelectivity, double *indexCorrelation) + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) { IndexOptInfo *index = path->indexinfo; List *qinfos; @@ -6866,12 +6871,14 @@ gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; *indexCorrelation = costs.indexCorrelation; + *indexPages = costs.numIndexPages; } void spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, - Selectivity *indexSelectivity, double *indexCorrelation) + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) { IndexOptInfo *index = path->indexinfo; List *qinfos; @@ -6925,6 +6932,7 @@ spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; *indexCorrelation = costs.indexCorrelation; + *indexPages = costs.numIndexPages; } @@ -7222,7 +7230,8 @@ gincost_scalararrayopexpr(PlannerInfo *root, void gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, - Selectivity *indexSelectivity, double *indexCorrelation) + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) { IndexOptInfo *index = path->indexinfo; List *indexQuals = path->indexquals; @@ -7537,6 +7546,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, *indexStartupCost += qual_arg_cost; *indexTotalCost += qual_arg_cost; *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost); + *indexPages = dataPagesFetched; } /* @@ -7545,7 +7555,8 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, void brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, - Selectivity *indexSelectivity, double *indexCorrelation) + Selectivity *indexSelectivity, double *indexCorrelation, + double *indexPages) { IndexOptInfo *index = path->indexinfo; List *indexQuals = path->indexquals; @@ -7597,6 +7608,7 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, *indexStartupCost += qual_arg_cost; *indexTotalCost += qual_arg_cost; *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost); + *indexPages = index->pages; /* XXX what about pages_per_range? */ } diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index b0730bfefa..f919cf8b87 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -95,7 +95,8 @@ typedef void (*amcostestimate_function) (struct PlannerInfo *root, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, - double *indexCorrelation); + double *indexCorrelation, + double *indexPages); /* parse index reloptions */ typedef bytea *(*amoptions_function) (Datum reloptions, @@ -188,6 +189,8 @@ typedef struct IndexAmRoutine bool amclusterable; /* does AM handle predicate locks? */ bool ampredlocks; + /* does AM support parallel scan? */ + bool amcanparallel; /* type of data stored in index, or InvalidOid if variable */ Oid amkeytype; diff --git a/src/include/executor/nodeIndexscan.h b/src/include/executor/nodeIndexscan.h index 46d6f45e83..ea3f3a5cc4 100644 --- a/src/include/executor/nodeIndexscan.h +++ b/src/include/executor/nodeIndexscan.h @@ -14,6 +14,7 @@ #ifndef NODEINDEXSCAN_H #define NODEINDEXSCAN_H +#include "access/parallel.h" #include "nodes/execnodes.h" extern IndexScanState *ExecInitIndexScan(IndexScan *node, EState *estate, int eflags); @@ -22,6 +23,9 @@ extern void ExecEndIndexScan(IndexScanState *node); extern void ExecIndexMarkPos(IndexScanState *node); extern void ExecIndexRestrPos(IndexScanState *node); extern void ExecReScanIndexScan(IndexScanState *node); +extern void ExecIndexScanEstimate(IndexScanState *node, ParallelContext *pcxt); +extern void ExecIndexScanInitializeDSM(IndexScanState *node, ParallelContext *pcxt); +extern void ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc); /* * These routines are exported to share code with nodeIndexonlyscan.c and diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 42c6c58ff9..9f41babf35 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1363,6 +1363,7 @@ typedef struct * SortSupport for reordering ORDER BY exprs * OrderByTypByVals is the datatype of order by expression pass-by-value? * OrderByTypLens typlens of the datatypes of order by expressions + * pscan_len size of parallel index scan descriptor * ---------------- */ typedef struct IndexScanState @@ -1389,6 +1390,7 @@ typedef struct IndexScanState SortSupport iss_SortSupport; bool *iss_OrderByTypByVals; int16 *iss_OrderByTypLens; + Size iss_PscanLen; } IndexScanState; /* ---------------- diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 643be54d40..f7ac6f600f 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -629,6 +629,7 @@ typedef struct IndexOptInfo bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */ bool amhasgettuple; /* does AM have amgettuple interface? */ bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ + bool amcanparallel; /* does AM support parallel scan? */ /* Rather than include amapi.h here, we declare amcostestimate like this */ void (*amcostestimate) (); /* AM's cost estimator */ } IndexOptInfo; diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 0e68264a41..72200fa531 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -76,7 +76,7 @@ extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel, extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info); extern void cost_index(IndexPath *path, PlannerInfo *root, - double loop_count); + double loop_count, bool partial_path); extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info, Path *bitmapqual, double loop_count); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 7b41317621..53cad247dc 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -47,7 +47,8 @@ extern IndexPath *create_index_path(PlannerInfo *root, ScanDirection indexscandir, bool indexonly, Relids required_outer, - double loop_count); + double loop_count, + bool partial_path); extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root, RelOptInfo *rel, Path *bitmapqual, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 81e7a4274d..ebda308c41 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -54,6 +54,8 @@ extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels); extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel); +extern int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages, + BlockNumber index_pages); #ifdef OPTIMIZER_DEBUG extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel); diff --git a/src/include/utils/index_selfuncs.h b/src/include/utils/index_selfuncs.h index d3172420f9..17d165ca65 100644 --- a/src/include/utils/index_selfuncs.h +++ b/src/include/utils/index_selfuncs.h @@ -28,41 +28,47 @@ extern void brincostestimate(struct PlannerInfo *root, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, - double *indexCorrelation); + double *indexCorrelation, + double *indexPages); extern void btcostestimate(struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, - double *indexCorrelation); + double *indexCorrelation, + double *indexPages); extern void hashcostestimate(struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, - double *indexCorrelation); + double *indexCorrelation, + double *indexPages); extern void gistcostestimate(struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, - double *indexCorrelation); + double *indexCorrelation, + double *indexPages); extern void spgcostestimate(struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, - double *indexCorrelation); + double *indexCorrelation, + double *indexPages); extern void gincostestimate(struct PlannerInfo *root, struct IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, - double *indexCorrelation); + double *indexCorrelation, + double *indexPages); #endif /* INDEX_SELFUNCS_H */ diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 3692d4f1b8..48fb80e90c 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -125,6 +125,29 @@ select count(*) from tenk1 where (two, four) not in (1 row) alter table tenk2 reset (parallel_workers); +-- test parallel index scans. +set enable_seqscan to off; +set enable_bitmapscan to off; +explain (costs off) + select count((unique1)) from tenk1 where hundred > 1; + QUERY PLAN +-------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Parallel Index Scan using tenk1_hundred on tenk1 + Index Cond: (hundred > 1) +(6 rows) + +select count((unique1)) from tenk1 where hundred > 1; + count +------- + 9800 +(1 row) + +reset enable_seqscan; +reset enable_bitmapscan; set force_parallel_mode=1; explain (costs off) select stringu1::int2 from tenk1 where unique1 = 1; diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql index f4f9dd5ab6..f5bc4d1873 100644 --- a/src/test/regress/sql/select_parallel.sql +++ b/src/test/regress/sql/select_parallel.sql @@ -48,6 +48,17 @@ select count(*) from tenk1 where (two, four) not in (select hundred, thousand from tenk2 where thousand > 100); alter table tenk2 reset (parallel_workers); +-- test parallel index scans. +set enable_seqscan to off; +set enable_bitmapscan to off; + +explain (costs off) + select count((unique1)) from tenk1 where hundred > 1; +select count((unique1)) from tenk1 where hundred > 1; + +reset enable_seqscan; +reset enable_bitmapscan; + set force_parallel_mode=1; explain (costs off)