Replace min_parallel_relation_size with two new GUCs.

When min_parallel_relation_size was added, the only supported type
of parallel scan was a parallel sequential scan, but there are
pending patches for parallel index scan, parallel index-only scan,
and parallel bitmap heap scan.  Those patches introduce two new
types of complications: first, what's relevant is not really the
total size of the relation but the portion of it that we will scan;
and second, index pages and heap pages shouldn't necessarily be
treated in exactly the same way.  Typically, the number of index
pages will be quite small, but that doesn't necessarily mean that
a parallel index scan can't pay off.

Therefore, we introduce min_parallel_table_scan_size, which works
out a degree of parallelism for scans based on the number of table
pages that will be scanned (and which is therefore equivalent to
min_parallel_relation_size for parallel sequential scans) and also
min_parallel_index_scan_size which can be used to work out a degree
of parallelism based on the number of index pages that will be
scanned.

Amit Kapila and Robert Haas

Discussion: http://postgr.es/m/CAA4eK1KowGSYYVpd2qPpaPPA5R90r++QwDFbrRECTE9H_HvpOg@mail.gmail.com
Discussion: http://postgr.es/m/CAA4eK1+TnM4pXQbvn7OXqam+k_HZqb0ROZUMxOiL6DWJYCyYow@mail.gmail.com
This commit is contained in:
Robert Haas 2017-02-15 13:37:24 -05:00
parent 5d40286985
commit 51ee6f3160
8 changed files with 105 additions and 38 deletions

View File

@ -3835,16 +3835,37 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
</listitem>
</varlistentry>
<varlistentry id="guc-min-parallel-relation-size" xreflabel="min_parallel_relation_size">
<term><varname>min_parallel_relation_size</varname> (<type>integer</type>)
<varlistentry id="guc-min-parallel-table-scan-size" xreflabel="min_parallel_table_scan_size">
<term><varname>min_parallel_table_scan_size</varname> (<type>integer</type>)
<indexterm>
<primary><varname>min_parallel_relation_size</> configuration parameter</primary>
<primary><varname>min_parallel_table_scan_size</> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Sets the minimum size of relations to be considered for parallel scan.
The default is 8 megabytes (<literal>8MB</>).
Sets the minimum amount of table data that must be scanned in order
for a parallel scan to be considered. For a parallel sequential scan,
the amount of table data scanned is always equal to the size of the
table, but when indexes are used the amount of table data
scanned will normally be less. The default is 8
megabytes (<literal>8MB</>).
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-min-parallel-index-scan-size" xreflabel="min_parallel_index_scan_size">
<term><varname>min_parallel_index_scan_size</varname> (<type>integer</type>)
<indexterm>
<primary><varname>min_parallel_index_scan_size</> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Sets the minimum amount of index data that must be scanned in order
for a parallel scan to be considered. Note that a parallel index scan
typically won't touch the entire index; it is the number of pages
which the planner believes will actually be touched by the scan which
is relevant. The default is 512 kilobytes (<literal>512kB</>).
</para>
</listitem>
</varlistentry>

View File

@ -2407,8 +2407,8 @@ and many others in the same vein
is available through other new configuration parameters
<xref linkend="guc-force-parallel-mode">,
<xref linkend="guc-parallel-setup-cost">, <xref
linkend="guc-parallel-tuple-cost">, and <xref
linkend="guc-min-parallel-relation-size">.
linkend="guc-parallel-tuple-cost">, and
<literal>min_parallel_relation_size</literal>.
</para>
</listitem>

View File

@ -57,7 +57,8 @@ typedef struct pushdown_safety_info
/* These parameters are set by GUC */
bool enable_geqo = false; /* just in case GUC doesn't set it */
int geqo_threshold;
int min_parallel_relation_size;
int min_parallel_table_scan_size;
int min_parallel_index_scan_size;
/* Hook for plugins to get control in set_rel_pathlist() */
set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL;
@ -126,7 +127,8 @@ static void subquery_push_qual(Query *subquery,
static void recurse_push_qual(Node *setOp, Query *topquery,
RangeTblEntry *rte, Index rti, Node *qual);
static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
static int compute_parallel_worker(RelOptInfo *rel, BlockNumber pages);
static int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
BlockNumber index_pages);
/*
@ -679,7 +681,7 @@ create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
{
int parallel_workers;
parallel_workers = compute_parallel_worker(rel, rel->pages);
parallel_workers = compute_parallel_worker(rel, rel->pages, 0);
/* If any limit was set to zero, the user doesn't want a parallel scan. */
if (parallel_workers <= 0)
@ -2876,13 +2878,20 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel)
/*
* Compute the number of parallel workers that should be used to scan a
* relation. "pages" is the number of pages from the relation that we
* expect to scan.
* relation. We compute the parallel workers based on the size of the heap to
* be scanned and the size of the index to be scanned, then choose a minimum
* of those.
*
* "heap_pages" is the number of pages from the table that we expect to scan.
* "index_pages" is the number of pages from the index that we expect to scan.
*/
static int
compute_parallel_worker(RelOptInfo *rel, BlockNumber pages)
compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages,
BlockNumber index_pages)
{
int parallel_workers;
int parallel_workers = 0;
int heap_parallel_workers = 1;
int index_parallel_workers = 1;
/*
* If the user has set the parallel_workers reloption, use that; otherwise
@ -2892,7 +2901,8 @@ compute_parallel_worker(RelOptInfo *rel, BlockNumber pages)
parallel_workers = rel->rel_parallel_workers;
else
{
int parallel_threshold;
int heap_parallel_threshold;
int index_parallel_threshold;
/*
* If this relation is too small to be worth a parallel scan, just
@ -2901,25 +2911,48 @@ compute_parallel_worker(RelOptInfo *rel, BlockNumber pages)
* might not be worthwhile just for this relation, but when combined
* with all of its inheritance siblings it may well pay off.
*/
if (pages < (BlockNumber) min_parallel_relation_size &&
if (heap_pages < (BlockNumber) min_parallel_table_scan_size &&
index_pages < (BlockNumber) min_parallel_index_scan_size &&
rel->reloptkind == RELOPT_BASEREL)
return 0;
/*
* Select the number of workers based on the log of the size of the
* relation. This probably needs to be a good deal more
* sophisticated, but we need something here for now. Note that the
* upper limit of the min_parallel_relation_size GUC is chosen to
* prevent overflow here.
*/
parallel_workers = 1;
parallel_threshold = Max(min_parallel_relation_size, 1);
while (pages >= (BlockNumber) (parallel_threshold * 3))
if (heap_pages > 0)
{
parallel_workers++;
parallel_threshold *= 3;
if (parallel_threshold > INT_MAX / 3)
break; /* avoid overflow */
/*
* Select the number of workers based on the log of the size of
* the relation. This probably needs to be a good deal more
* sophisticated, but we need something here for now. Note that
* the upper limit of the min_parallel_table_scan_size GUC is
* chosen to prevent overflow here.
*/
heap_parallel_threshold = Max(min_parallel_table_scan_size, 1);
while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3))
{
heap_parallel_workers++;
heap_parallel_threshold *= 3;
if (heap_parallel_threshold > INT_MAX / 3)
break; /* avoid overflow */
}
parallel_workers = heap_parallel_workers;
}
if (index_pages > 0)
{
/* same calculation as for heap_pages above */
index_parallel_threshold = Max(min_parallel_index_scan_size, 1);
while (index_pages >= (BlockNumber) (index_parallel_threshold * 3))
{
index_parallel_workers++;
index_parallel_threshold *= 3;
if (index_parallel_threshold > INT_MAX / 3)
break; /* avoid overflow */
}
if (parallel_workers > 0)
parallel_workers = Min(parallel_workers, index_parallel_workers);
else
parallel_workers = index_parallel_workers;
}
}

View File

@ -2776,16 +2776,27 @@ static struct config_int ConfigureNamesInt[] =
},
{
{"min_parallel_relation_size", PGC_USERSET, QUERY_TUNING_COST,
gettext_noop("Sets the minimum size of relations to be considered for parallel scan."),
NULL,
{"min_parallel_table_scan_size", PGC_USERSET, QUERY_TUNING_COST,
gettext_noop("Sets the minimum amount of table data for a parallel scan."),
gettext_noop("If the planner estimates that it will read a number of table pages too small to reach this limit, a parallel scan will not be considered."),
GUC_UNIT_BLOCKS,
},
&min_parallel_relation_size,
&min_parallel_table_scan_size,
(8 * 1024 * 1024) / BLCKSZ, 0, INT_MAX / 3,
NULL, NULL, NULL
},
{
{"min_parallel_index_scan_size", PGC_USERSET, QUERY_TUNING_COST,
gettext_noop("Sets the minimum amount of index data for a parallel scan."),
gettext_noop("If the planner estimates that it will read a number of index pages too small to reach this limit, a parallel scan will not be considered."),
GUC_UNIT_BLOCKS,
},
&min_parallel_index_scan_size,
(512 * 1024) / BLCKSZ, 0, INT_MAX / 3,
NULL, NULL, NULL
},
{
/* Can't be set in postgresql.conf */
{"server_version_num", PGC_INTERNAL, PRESET_OPTIONS,

View File

@ -300,7 +300,8 @@
#cpu_operator_cost = 0.0025 # same scale as above
#parallel_tuple_cost = 0.1 # same scale as above
#parallel_setup_cost = 1000.0 # same scale as above
#min_parallel_relation_size = 8MB
#min_parallel_table_scan_size = 8MB
#min_parallel_index_scan_size = 512kB
#effective_cache_size = 4GB
# - Genetic Query Optimizer -

View File

@ -22,7 +22,8 @@
*/
extern bool enable_geqo;
extern int geqo_threshold;
extern int min_parallel_relation_size;
extern int min_parallel_table_scan_size;
extern int min_parallel_index_scan_size;
/* Hook for plugins to get control in set_rel_pathlist() */
typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,

View File

@ -9,7 +9,7 @@ begin isolation level repeatable read;
-- encourage use of parallel plans
set parallel_setup_cost=0;
set parallel_tuple_cost=0;
set min_parallel_relation_size=0;
set min_parallel_table_scan_size=0;
set max_parallel_workers_per_gather=4;
explain (costs off)
select count(*) from a_star;

View File

@ -12,7 +12,7 @@ begin isolation level repeatable read;
-- encourage use of parallel plans
set parallel_setup_cost=0;
set parallel_tuple_cost=0;
set min_parallel_relation_size=0;
set min_parallel_table_scan_size=0;
set max_parallel_workers_per_gather=4;
explain (costs off)