Add a new GUC and a reloption to enable inserts in parallel-mode.

Commit 05c8482f7f added the implementation of parallel SELECT for
"INSERT INTO ... SELECT ..." which may incur non-negligible overhead in
the additional parallel-safety checks that it performs, even when, in the
end, those checks determine that parallelism can't be used. This is
normally only ever a problem in the case of when the target table has a
large number of partitions.

A new GUC option "enable_parallel_insert" is added, to allow insert in
parallel-mode. The default is on.

In addition to the GUC option, the user may want a mechanism to allow
inserts in parallel-mode with finer granularity at table level. The new
table option "parallel_insert_enabled" allows this. The default is true.

Author: "Hou, Zhijie"
Reviewed-by: Greg Nancarrow, Amit Langote, Takayuki Tsunakawa, Amit Kapila
Discussion: https://postgr.es/m/CAA4eK1K-cW7svLC2D7DHoGHxdAdg3P37BLgebqBOC2ZLc9a6QQ%40mail.gmail.com
Discussion: https://postgr.es/m/CAJcOf-cXnB5cnMKqWEp2E2z7Mvcd04iLVmV=qpFJrR3AcrTS3g@mail.gmail.com
This commit is contained in:
Amit Kapila 2021-03-18 07:25:27 +05:30
parent 5f79580ad6
commit c8f78b6161
15 changed files with 240 additions and 15 deletions

View File

@ -5072,6 +5072,29 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
</listitem>
</varlistentry>
<varlistentry id="guc-enable-parallel-insert" xreflabel="enable_parallel_insert">
<term><varname>enable_parallel_insert</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>enable_parallel_insert</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Enables or disables the query planner's use of parallel plans for
<command>INSERT</command> commands. The default is <literal>on</literal>.
When enabled, the planner performs additional parallel-safety checks
on the target table's attributes and indexes, in order to determine
if it's safe to use a parallel plan for <command>INSERT</command>. In
cases such as when the target table has a large number of partitions,
and particularly also when that table uses something parallel-unsafe
that prevents parallelism, the overhead of these checks may become
prohibitively high. To address this potential overhead in these cases,
this option can be used to disable the use of parallel plans for
<command>INSERT</command>.
</para>
</listitem>
</varlistentry>
</variablelist>
</sect2>
<sect2 id="runtime-config-query-constants">

View File

@ -722,7 +722,8 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
<para>
<literal>SHARE UPDATE EXCLUSIVE</literal> lock will be taken for
fillfactor, toast and autovacuum storage parameters, as well as the
planner parameter <varname>parallel_workers</varname>.
planner parameter <varname>parallel_workers</varname> and
<varname>parallel_insert_enabled</varname>.
</para>
</listitem>
</varlistentry>

View File

@ -1409,6 +1409,32 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
</listitem>
</varlistentry>
<varlistentry id="reloption-parallel-insert-enabled" xreflabel="parallel_insert_enabled">
<term><literal>parallel_insert_enabled</literal> (<type>boolean</type>)
<indexterm>
<primary><varname>parallel_insert_enabled</varname> storage parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Enables or disables the query planner's use of parallel insert for
this table. When enabled (and provided that
<xref linkend="guc-enable-parallel-insert"/> is also <literal>true</literal>),
the planner performs additional parallel-safety checks on the table's
attributes and indexes, in order to determine if it's safe to use a
parallel plan for <command>INSERT</command>. The default is
<literal>true</literal>. In cases such as when the table has a large
number of partitions, and particularly also when that table uses a
parallel-unsafe feature that prevents parallelism, the overhead of these
checks may become prohibitively high. To address this potential overhead
in these cases, this option can be used to disable the use of parallel
insert for this table. Note that if the target table of the parallel
insert is partitioned, the <literal>parallel_insert_enabled</literal>
option values of the partitions are ignored.
</para>
</listitem>
</varlistentry>
<varlistentry id="reloption-autovacuum-enabled" xreflabel="autovacuum_enabled">
<term><literal>autovacuum_enabled</literal>, <literal>toast.autovacuum_enabled</literal> (<type>boolean</type>)
<indexterm>

View File

@ -168,6 +168,15 @@ static relopt_bool boolRelOpts[] =
},
true
},
{
{
"parallel_insert_enabled",
"Enables \"parallel insert\" feature for this table",
RELOPT_KIND_HEAP | RELOPT_KIND_PARTITIONED,
ShareUpdateExclusiveLock
},
true
},
/* list terminator */
{{NULL}}
};
@ -1859,7 +1868,9 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
{"vacuum_index_cleanup", RELOPT_TYPE_BOOL,
offsetof(StdRdOptions, vacuum_index_cleanup)},
{"vacuum_truncate", RELOPT_TYPE_BOOL,
offsetof(StdRdOptions, vacuum_truncate)}
offsetof(StdRdOptions, vacuum_truncate)},
{"parallel_insert_enabled", RELOPT_TYPE_BOOL,
offsetof(StdRdOptions, parallel_insert_enabled)}
};
return (bytea *) build_reloptions(reloptions, validate, kind,
@ -1961,13 +1972,15 @@ build_local_reloptions(local_relopts *relopts, Datum options, bool validate)
bytea *
partitioned_table_reloptions(Datum reloptions, bool validate)
{
/*
* There are no options for partitioned tables yet, but this is able to do
* some validation.
*/
static const relopt_parse_elt tab[] = {
{"parallel_insert_enabled", RELOPT_TYPE_BOOL,
offsetof(PartitionedTableRdOptions, parallel_insert_enabled)}
};
return (bytea *) build_reloptions(reloptions, validate,
RELOPT_KIND_PARTITIONED,
0, NULL, 0);
sizeof(PartitionedTableRdOptions),
tab, lengthof(tab));
}
/*

View File

@ -129,6 +129,8 @@ Cost disable_cost = 1.0e10;
int max_parallel_workers_per_gather = 2;
bool enable_parallel_insert = true;
bool enable_seqscan = true;
bool enable_indexscan = true;
bool enable_indexonlyscan = true;

View File

@ -1265,8 +1265,10 @@ target_rel_chk_constr_max_parallel_hazard(Relation rel,
*
* It's not possible in the following cases:
*
* 1) INSERT...ON CONFLICT...DO UPDATE
* 2) INSERT without SELECT
* 1) enable_parallel_insert is off
* 2) INSERT...ON CONFLICT...DO UPDATE
* 3) INSERT without SELECT
* 4) the reloption parallel_insert_enabled is set to off
*
* (Note: we don't do in-depth parallel-safety checks here, we do only the
* cheaper tests that can quickly exclude obvious cases for which
@ -1277,12 +1279,17 @@ bool
is_parallel_allowed_for_modify(Query *parse)
{
bool hasSubQuery;
bool parallel_enabled;
RangeTblEntry *rte;
ListCell *lc;
Relation rel;
if (!IsModifySupportedInParallelMode(parse->commandType))
return false;
if (!enable_parallel_insert)
return false;
/*
* UPDATE is not currently supported in parallel-mode, so prohibit
* INSERT...ON CONFLICT...DO UPDATE...
@ -1313,7 +1320,28 @@ is_parallel_allowed_for_modify(Query *parse)
}
}
return hasSubQuery;
if (!hasSubQuery)
return false;
/*
* Check if parallel_insert_enabled is enabled for the target table, if
* not, skip the safety checks.
*
* (Note: if the target table is partitioned, the parallel_insert_enabled
* option setting of the partitions are ignored).
*/
rte = rt_fetch(parse->resultRelation, parse->rtable);
/*
* The target table is already locked by the caller (this is done in the
* parse/analyze phase), and remains locked until end-of-transaction.
*/
rel = table_open(rte->relid, NoLock);
parallel_enabled = RelationGetParallelInsert(rel, true);
table_close(rel, NoLock);
return parallel_enabled;
}
/*****************************************************************************

View File

@ -1122,6 +1122,16 @@ static struct config_bool ConfigureNamesBool[] =
true,
NULL, NULL, NULL
},
{
{"enable_parallel_insert", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of parallel plans for INSERT commands."),
NULL,
GUC_EXPLAIN
},
&enable_parallel_insert,
true,
NULL, NULL, NULL
},
{
/* Not for general use --- used by SET SESSION AUTHORIZATION */
{"is_superuser", PGC_INTERNAL, UNGROUPED,

View File

@ -371,6 +371,7 @@
#enable_partitionwise_aggregate = off
#enable_parallel_hash = on
#enable_partition_pruning = on
#enable_parallel_insert = on
# - Planner Cost Constants -

View File

@ -1118,6 +1118,7 @@ static const char *const table_storage_parameters[] = {
"autovacuum_vacuum_threshold",
"fillfactor",
"log_autovacuum_min_duration",
"parallel_insert_enabled",
"parallel_workers",
"toast.autovacuum_enabled",
"toast.autovacuum_freeze_max_age",

View File

@ -47,6 +47,7 @@ typedef enum
/* parameter variables and flags (see also optimizer.h) */
extern PGDLLIMPORT Cost disable_cost;
extern PGDLLIMPORT int max_parallel_workers_per_gather;
extern PGDLLIMPORT bool enable_parallel_insert;
extern PGDLLIMPORT bool enable_seqscan;
extern PGDLLIMPORT bool enable_indexscan;
extern PGDLLIMPORT bool enable_indexonlyscan;

View File

@ -306,6 +306,8 @@ typedef struct StdRdOptions
int parallel_workers; /* max number of parallel workers */
bool vacuum_index_cleanup; /* enables index vacuuming and cleanup */
bool vacuum_truncate; /* enables vacuum to truncate a relation */
bool parallel_insert_enabled; /* enables planner's use of
* parallel insert */
} StdRdOptions;
#define HEAP_MIN_FILLFACTOR 10
@ -423,6 +425,29 @@ typedef struct ViewOptions
((ViewOptions *) (relation)->rd_options)->check_option == \
VIEW_OPTION_CHECK_OPTION_CASCADED)
/*
* PartitionedTableRdOptions
* Contents of rd_options for partitioned tables
*/
typedef struct PartitionedTableRdOptions
{
int32 vl_len_; /* varlena header (do not touch directly!) */
bool parallel_insert_enabled; /* enables planner's use of
* parallel insert */
} PartitionedTableRdOptions;
/*
* RelationGetParallelInsert
* Returns the relation's parallel_insert_enabled reloption setting.
* Note multiple eval of argument!
*/
#define RelationGetParallelInsert(relation, defaultpd) \
((relation)->rd_options ? \
(relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ? \
((PartitionedTableRdOptions *) (relation)->rd_options)->parallel_insert_enabled : \
((StdRdOptions *) (relation)->rd_options)->parallel_insert_enabled) : \
(defaultpd))
/*
* RelationIsValid
* True iff relation descriptor is valid.

View File

@ -61,12 +61,45 @@ set max_parallel_workers_per_gather=4;
create table para_insert_p1 (
unique1 int4 PRIMARY KEY,
stringu1 name
);
) with (parallel_insert_enabled = off);
create table para_insert_f1 (
unique1 int4 REFERENCES para_insert_p1(unique1),
stringu1 name
);
--
-- Disable guc option enable_parallel_insert
--
set enable_parallel_insert = off;
-- Test INSERT with underlying query when enable_parallel_insert=off and reloption.parallel_insert_enabled=off.
-- (should create plan with serial INSERT + SELECT)
--
explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1;
QUERY PLAN
--------------------------
Insert on para_insert_p1
-> Seq Scan on tenk1
(2 rows)
--
-- Reset guc option enable_parallel_insert
--
reset enable_parallel_insert;
--
-- Test INSERT with underlying query when enable_parallel_insert=on and reloption.parallel_insert_enabled=off.
-- (should create plan with serial INSERT + SELECT)
--
explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1;
QUERY PLAN
--------------------------
Insert on para_insert_p1
-> Seq Scan on tenk1
(2 rows)
--
-- Enable reloption parallel_insert_enabled
--
alter table para_insert_p1 set (parallel_insert_enabled = on);
--
-- Test INSERT with underlying query.
-- (should create plan with parallel SELECT, Gather parent node)
--
@ -362,9 +395,28 @@ explain (costs off) insert into testdef(a,d) select a,a*8 from test_data;
--
-- Test INSERT into partition with underlying query.
--
create table parttable1 (a int, b name) partition by range (a);
create table parttable1 (a int, b name) partition by range (a) with (parallel_insert_enabled=off);
create table parttable1_1 partition of parttable1 for values from (0) to (5000);
create table parttable1_2 partition of parttable1 for values from (5000) to (10000);
--
-- Test INSERT into partition when reloption.parallel_insert_enabled=off
-- (should not create a parallel plan)
--
explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1;
QUERY PLAN
-------------------------
Insert on parttable1
-> Seq Scan on tenk1
(2 rows)
--
-- Enable reloption parallel_insert_enabled
--
alter table parttable1 set (parallel_insert_enabled = on);
--
-- Test INSERT into partition when reloption.parallel_insert_enabled=on
-- (should create a parallel plan)
--
explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1;
QUERY PLAN
----------------------------------------

View File

@ -107,13 +107,14 @@ select name, setting from pg_settings where name like 'enable%';
enable_nestloop | on
enable_parallel_append | on
enable_parallel_hash | on
enable_parallel_insert | on
enable_partition_pruning | on
enable_partitionwise_aggregate | off
enable_partitionwise_join | off
enable_seqscan | on
enable_sort | on
enable_tidscan | on
(18 rows)
(19 rows)
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
-- more-or-less working. We can't test their contents in any great detail

View File

@ -77,13 +77,38 @@ set max_parallel_workers_per_gather=4;
create table para_insert_p1 (
unique1 int4 PRIMARY KEY,
stringu1 name
);
) with (parallel_insert_enabled = off);
create table para_insert_f1 (
unique1 int4 REFERENCES para_insert_p1(unique1),
stringu1 name
);
--
-- Disable guc option enable_parallel_insert
--
set enable_parallel_insert = off;
-- Test INSERT with underlying query when enable_parallel_insert=off and reloption.parallel_insert_enabled=off.
-- (should create plan with serial INSERT + SELECT)
--
explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1;
--
-- Reset guc option enable_parallel_insert
--
reset enable_parallel_insert;
--
-- Test INSERT with underlying query when enable_parallel_insert=on and reloption.parallel_insert_enabled=off.
-- (should create plan with serial INSERT + SELECT)
--
explain(costs off) insert into para_insert_p1 select unique1, stringu1 from tenk1;
--
-- Enable reloption parallel_insert_enabled
--
alter table para_insert_p1 set (parallel_insert_enabled = on);
--
-- Test INSERT with underlying query.
@ -208,10 +233,25 @@ explain (costs off) insert into testdef(a,d) select a,a*8 from test_data;
--
-- Test INSERT into partition with underlying query.
--
create table parttable1 (a int, b name) partition by range (a);
create table parttable1 (a int, b name) partition by range (a) with (parallel_insert_enabled=off);
create table parttable1_1 partition of parttable1 for values from (0) to (5000);
create table parttable1_2 partition of parttable1 for values from (5000) to (10000);
--
-- Test INSERT into partition when reloption.parallel_insert_enabled=off
-- (should not create a parallel plan)
--
explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1;
--
-- Enable reloption parallel_insert_enabled
--
alter table parttable1 set (parallel_insert_enabled = on);
--
-- Test INSERT into partition when reloption.parallel_insert_enabled=on
-- (should create a parallel plan)
--
explain (costs off) insert into parttable1 select unique1,stringu1 from tenk1;
insert into parttable1 select unique1,stringu1 from tenk1;
select count(*) from parttable1_1;

View File

@ -1797,6 +1797,7 @@ PartitionSpec
PartitionTupleRouting
PartitionedRelPruneInfo
PartitionedRelPruningData
PartitionedTableRdOptions
PartitionwiseAggregateType
PasswordType
Path