From 51ee6f3160d2e1515ed6197594bda67eb99dc2cc Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 15 Feb 2017 13:37:24 -0500 Subject: [PATCH] Replace min_parallel_relation_size with two new GUCs. When min_parallel_relation_size was added, the only supported type of parallel scan was a parallel sequential scan, but there are pending patches for parallel index scan, parallel index-only scan, and parallel bitmap heap scan. Those patches introduce two new types of complications: first, what's relevant is not really the total size of the relation but the portion of it that we will scan; and second, index pages and heap pages shouldn't necessarily be treated in exactly the same way. Typically, the number of index pages will be quite small, but that doesn't necessarily mean that a parallel index scan can't pay off. Therefore, we introduce min_parallel_table_scan_size, which works out a degree of parallelism for scans based on the number of table pages that will be scanned (and which is therefore equivalent to min_parallel_relation_size for parallel sequential scans) and also min_parallel_index_scan_size which can be used to work out a degree of parallelism based on the number of index pages that will be scanned. Amit Kapila and Robert Haas Discussion: http://postgr.es/m/CAA4eK1KowGSYYVpd2qPpaPPA5R90r++QwDFbrRECTE9H_HvpOg@mail.gmail.com Discussion: http://postgr.es/m/CAA4eK1+TnM4pXQbvn7OXqam+k_HZqb0ROZUMxOiL6DWJYCyYow@mail.gmail.com --- doc/src/sgml/config.sgml | 31 ++++++-- doc/src/sgml/release-9.6.sgml | 4 +- src/backend/optimizer/path/allpaths.c | 79 +++++++++++++------ src/backend/utils/misc/guc.c | 19 ++++- src/backend/utils/misc/postgresql.conf.sample | 3 +- src/include/optimizer/paths.h | 3 +- src/test/regress/expected/select_parallel.out | 2 +- src/test/regress/sql/select_parallel.sql | 2 +- 8 files changed, 105 insertions(+), 38 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index dc63d7d5e4..95afc2c483 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3835,16 +3835,37 @@ ANY num_sync ( - min_parallel_relation_size (integer) + + min_parallel_table_scan_size (integer) - min_parallel_relation_size configuration parameter + min_parallel_table_scan_size configuration parameter - Sets the minimum size of relations to be considered for parallel scan. - The default is 8 megabytes (8MB). + Sets the minimum amount of table data that must be scanned in order + for a parallel scan to be considered. For a parallel sequential scan, + the amount of table data scanned is always equal to the size of the + table, but when indexes are used the amount of table data + scanned will normally be less. The default is 8 + megabytes (8MB). + + + + + + min_parallel_index_scan_size (integer) + + min_parallel_index_scan_size configuration parameter + + + + + Sets the minimum amount of index data that must be scanned in order + for a parallel scan to be considered. Note that a parallel index scan + typically won't touch the entire index; it is the number of pages + which the planner believes will actually be touched by the scan which + is relevant. The default is 512 kilobytes (512kB). diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml index bffcaac46e..02cc8c9003 100644 --- a/doc/src/sgml/release-9.6.sgml +++ b/doc/src/sgml/release-9.6.sgml @@ -2407,8 +2407,8 @@ and many others in the same vein is available through other new configuration parameters , , , and . + linkend="guc-parallel-tuple-cost">, and + min_parallel_relation_size. diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 5c189874ef..85505c57d3 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -57,7 +57,8 @@ typedef struct pushdown_safety_info /* These parameters are set by GUC */ bool enable_geqo = false; /* just in case GUC doesn't set it */ int geqo_threshold; -int min_parallel_relation_size; +int min_parallel_table_scan_size; +int min_parallel_index_scan_size; /* Hook for plugins to get control in set_rel_pathlist() */ set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL; @@ -126,7 +127,8 @@ static void subquery_push_qual(Query *subquery, static void recurse_push_qual(Node *setOp, Query *topquery, RangeTblEntry *rte, Index rti, Node *qual); static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); -static int compute_parallel_worker(RelOptInfo *rel, BlockNumber pages); +static int compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages, + BlockNumber index_pages); /* @@ -679,7 +681,7 @@ create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel) { int parallel_workers; - parallel_workers = compute_parallel_worker(rel, rel->pages); + parallel_workers = compute_parallel_worker(rel, rel->pages, 0); /* If any limit was set to zero, the user doesn't want a parallel scan. */ if (parallel_workers <= 0) @@ -2876,13 +2878,20 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel) /* * Compute the number of parallel workers that should be used to scan a - * relation. "pages" is the number of pages from the relation that we - * expect to scan. + * relation. We compute the parallel workers based on the size of the heap to + * be scanned and the size of the index to be scanned, then choose a minimum + * of those. + * + * "heap_pages" is the number of pages from the table that we expect to scan. + * "index_pages" is the number of pages from the index that we expect to scan. */ static int -compute_parallel_worker(RelOptInfo *rel, BlockNumber pages) +compute_parallel_worker(RelOptInfo *rel, BlockNumber heap_pages, + BlockNumber index_pages) { - int parallel_workers; + int parallel_workers = 0; + int heap_parallel_workers = 1; + int index_parallel_workers = 1; /* * If the user has set the parallel_workers reloption, use that; otherwise @@ -2892,7 +2901,8 @@ compute_parallel_worker(RelOptInfo *rel, BlockNumber pages) parallel_workers = rel->rel_parallel_workers; else { - int parallel_threshold; + int heap_parallel_threshold; + int index_parallel_threshold; /* * If this relation is too small to be worth a parallel scan, just @@ -2901,25 +2911,48 @@ compute_parallel_worker(RelOptInfo *rel, BlockNumber pages) * might not be worthwhile just for this relation, but when combined * with all of its inheritance siblings it may well pay off. */ - if (pages < (BlockNumber) min_parallel_relation_size && + if (heap_pages < (BlockNumber) min_parallel_table_scan_size && + index_pages < (BlockNumber) min_parallel_index_scan_size && rel->reloptkind == RELOPT_BASEREL) return 0; - /* - * Select the number of workers based on the log of the size of the - * relation. This probably needs to be a good deal more - * sophisticated, but we need something here for now. Note that the - * upper limit of the min_parallel_relation_size GUC is chosen to - * prevent overflow here. - */ - parallel_workers = 1; - parallel_threshold = Max(min_parallel_relation_size, 1); - while (pages >= (BlockNumber) (parallel_threshold * 3)) + if (heap_pages > 0) { - parallel_workers++; - parallel_threshold *= 3; - if (parallel_threshold > INT_MAX / 3) - break; /* avoid overflow */ + /* + * Select the number of workers based on the log of the size of + * the relation. This probably needs to be a good deal more + * sophisticated, but we need something here for now. Note that + * the upper limit of the min_parallel_table_scan_size GUC is + * chosen to prevent overflow here. + */ + heap_parallel_threshold = Max(min_parallel_table_scan_size, 1); + while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3)) + { + heap_parallel_workers++; + heap_parallel_threshold *= 3; + if (heap_parallel_threshold > INT_MAX / 3) + break; /* avoid overflow */ + } + + parallel_workers = heap_parallel_workers; + } + + if (index_pages > 0) + { + /* same calculation as for heap_pages above */ + index_parallel_threshold = Max(min_parallel_index_scan_size, 1); + while (index_pages >= (BlockNumber) (index_parallel_threshold * 3)) + { + index_parallel_workers++; + index_parallel_threshold *= 3; + if (index_parallel_threshold > INT_MAX / 3) + break; /* avoid overflow */ + } + + if (parallel_workers > 0) + parallel_workers = Min(parallel_workers, index_parallel_workers); + else + parallel_workers = index_parallel_workers; } } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 0249721204..5d8fb2edb8 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2776,16 +2776,27 @@ static struct config_int ConfigureNamesInt[] = }, { - {"min_parallel_relation_size", PGC_USERSET, QUERY_TUNING_COST, - gettext_noop("Sets the minimum size of relations to be considered for parallel scan."), - NULL, + {"min_parallel_table_scan_size", PGC_USERSET, QUERY_TUNING_COST, + gettext_noop("Sets the minimum amount of table data for a parallel scan."), + gettext_noop("If the planner estimates that it will read a number of table pages too small to reach this limit, a parallel scan will not be considered."), GUC_UNIT_BLOCKS, }, - &min_parallel_relation_size, + &min_parallel_table_scan_size, (8 * 1024 * 1024) / BLCKSZ, 0, INT_MAX / 3, NULL, NULL, NULL }, + { + {"min_parallel_index_scan_size", PGC_USERSET, QUERY_TUNING_COST, + gettext_noop("Sets the minimum amount of index data for a parallel scan."), + gettext_noop("If the planner estimates that it will read a number of index pages too small to reach this limit, a parallel scan will not be considered."), + GUC_UNIT_BLOCKS, + }, + &min_parallel_index_scan_size, + (512 * 1024) / BLCKSZ, 0, INT_MAX / 3, + NULL, NULL, NULL + }, + { /* Can't be set in postgresql.conf */ {"server_version_num", PGC_INTERNAL, PRESET_OPTIONS, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 661b0fa9b6..157d775853 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -300,7 +300,8 @@ #cpu_operator_cost = 0.0025 # same scale as above #parallel_tuple_cost = 0.1 # same scale as above #parallel_setup_cost = 1000.0 # same scale as above -#min_parallel_relation_size = 8MB +#min_parallel_table_scan_size = 8MB +#min_parallel_index_scan_size = 512kB #effective_cache_size = 4GB # - Genetic Query Optimizer - diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 81a9be7c67..81e7a4274d 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -22,7 +22,8 @@ */ extern bool enable_geqo; extern int geqo_threshold; -extern int min_parallel_relation_size; +extern int min_parallel_table_scan_size; +extern int min_parallel_index_scan_size; /* Hook for plugins to get control in set_rel_pathlist() */ typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root, diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 8786678f0c..3692d4f1b8 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -9,7 +9,7 @@ begin isolation level repeatable read; -- encourage use of parallel plans set parallel_setup_cost=0; set parallel_tuple_cost=0; -set min_parallel_relation_size=0; +set min_parallel_table_scan_size=0; set max_parallel_workers_per_gather=4; explain (costs off) select count(*) from a_star; diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql index def9939d2e..f4f9dd5ab6 100644 --- a/src/test/regress/sql/select_parallel.sql +++ b/src/test/regress/sql/select_parallel.sql @@ -12,7 +12,7 @@ begin isolation level repeatable read; -- encourage use of parallel plans set parallel_setup_cost=0; set parallel_tuple_cost=0; -set min_parallel_relation_size=0; +set min_parallel_table_scan_size=0; set max_parallel_workers_per_gather=4; explain (costs off)