postgres_fdw: Consider requesting sorted data so we can do a merge join.

When use_remote_estimate is enabled, consider adding ORDER BY to the
query we sending to the remote server so that we can use that ordered
data for a merge join.  Commit f18c944b61
arranges to push down the query pathkeys, which seems like the case
mostly likely to be a win, but testing shows this can sometimes win,
too.

For a regular table, we know which indexes are present and therefore
test whether the ordering provided by each such index is useful.  Here,
we take the opposite approach: guess what orderings would be useful if
they could be generated cheaply, and then ask the remote side what those
will cost.

Ashutosh Bapat, with very substantial cosmetic revisions by me.  Also
reviewed by Rushabh Lathia.
This commit is contained in:
Robert Haas 2015-12-22 13:46:40 -05:00
parent f5a4370aea
commit ccd8f97922
5 changed files with 410 additions and 41 deletions

View File

@ -343,6 +343,76 @@ SELECT 'fixed', NULL FROM ft1 t1 WHERE c1 = 1;
fixed |
(1 row)
-- Test forcing the remote server to produce sorted data for a merge join.
SET enable_hashjoin TO false;
SET enable_nestloop TO false;
-- inner join; expressions in the clauses appear in the equivalence class list
EXPLAIN (VERBOSE, COSTS false)
SELECT t1.c1, t2."C 1" FROM ft2 t1 JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
QUERY PLAN
----------------------------------------------------------------------------
Limit
Output: t1.c1, t2."C 1"
-> Merge Join
Output: t1.c1, t2."C 1"
Merge Cond: (t1.c1 = t2."C 1")
-> Foreign Scan on public.ft2 t1
Output: t1.c1
Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC
-> Index Only Scan using t1_pkey on "S 1"."T 1" t2
Output: t2."C 1"
(10 rows)
SELECT t1.c1, t2."C 1" FROM ft2 t1 JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
c1 | C 1
-----+-----
101 | 101
102 | 102
103 | 103
104 | 104
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
-- outer join; expressions in the clauses do not appear in equivalence class
-- list but no output change as compared to the previous query
EXPLAIN (VERBOSE, COSTS false)
SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
QUERY PLAN
----------------------------------------------------------------------------
Limit
Output: t1.c1, t2."C 1"
-> Merge Left Join
Output: t1.c1, t2."C 1"
Merge Cond: (t1.c1 = t2."C 1")
-> Foreign Scan on public.ft2 t1
Output: t1.c1
Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC
-> Index Only Scan using t1_pkey on "S 1"."T 1" t2
Output: t2."C 1"
(10 rows)
SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
c1 | C 1
-----+-----
101 | 101
102 | 102
103 | 103
104 | 104
105 | 105
106 | 106
107 | 107
108 | 108
109 | 109
110 | 110
(10 rows)
RESET enable_hashjoin;
RESET enable_nestloop;
-- ===================================================================
-- WHERE with remotely-executable conditions
-- ===================================================================
@ -3538,6 +3608,101 @@ select tableoid::regclass, * from bar order by 1,2;
bar2 | 7 | 177
(6 rows)
-- Test forcing the remote server to produce sorted data for a merge join,
-- but the foreign table is an inheritance child.
truncate table loct1;
truncate table only foo;
\set num_rows_foo 2000
insert into loct1 select generate_series(0, :num_rows_foo, 2), generate_series(0, :num_rows_foo, 2), generate_series(0, :num_rows_foo, 2);
insert into foo select generate_series(1, :num_rows_foo, 2), generate_series(1, :num_rows_foo, 2);
SET enable_hashjoin to false;
SET enable_nestloop to false;
alter foreign table foo2 options (use_remote_estimate 'true');
create index i_loct1_f1 on loct1(f1);
create index i_foo_f1 on foo(f1);
analyze foo;
analyze loct1;
-- inner join; expressions in the clauses appear in the equivalence class list
explain (verbose, costs off)
select foo.f1, loct1.f1 from foo join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
QUERY PLAN
---------------------------------------------------------------------------------------
Limit
Output: foo.f1, loct1.f1, foo.f2
-> Sort
Output: foo.f1, loct1.f1, foo.f2
Sort Key: foo.f2
-> Merge Join
Output: foo.f1, loct1.f1, foo.f2
Merge Cond: (foo.f1 = loct1.f1)
-> Merge Append
Sort Key: foo.f1
-> Index Scan using i_foo_f1 on public.foo
Output: foo.f1, foo.f2
-> Foreign Scan on public.foo2
Output: foo2.f1, foo2.f2
Remote SQL: SELECT f1, f2 FROM public.loct1 ORDER BY f1 ASC
-> Index Only Scan using i_loct1_f1 on public.loct1
Output: loct1.f1
(17 rows)
select foo.f1, loct1.f1 from foo join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
f1 | f1
----+----
20 | 20
22 | 22
24 | 24
26 | 26
28 | 28
30 | 30
32 | 32
34 | 34
36 | 36
38 | 38
(10 rows)
-- outer join; expressions in the clauses do not appear in equivalence class
-- list but no output change as compared to the previous query
explain (verbose, costs off)
select foo.f1, loct1.f1 from foo left join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
QUERY PLAN
---------------------------------------------------------------------------------------
Limit
Output: foo.f1, loct1.f1, foo.f2
-> Sort
Output: foo.f1, loct1.f1, foo.f2
Sort Key: foo.f2
-> Merge Left Join
Output: foo.f1, loct1.f1, foo.f2
Merge Cond: (foo.f1 = loct1.f1)
-> Merge Append
Sort Key: foo.f1
-> Index Scan using i_foo_f1 on public.foo
Output: foo.f1, foo.f2
-> Foreign Scan on public.foo2
Output: foo2.f1, foo2.f2
Remote SQL: SELECT f1, f2 FROM public.loct1 ORDER BY f1 ASC
-> Index Only Scan using i_loct1_f1 on public.loct1
Output: loct1.f1
(17 rows)
select foo.f1, loct1.f1 from foo left join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
f1 | f1
----+----
10 | 10
11 |
12 | 12
13 |
14 | 14
15 |
16 | 16
17 |
18 | 18
19 |
(10 rows)
RESET enable_hashjoin;
RESET enable_nestloop;
-- Test that WHERE CURRENT OF is not supported
begin;
declare c cursor for select * from bar where f1 = 7;

View File

@ -259,6 +259,9 @@ static bool postgresAnalyzeForeignTable(Relation relation,
BlockNumber *totalpages);
static List *postgresImportForeignSchema(ImportForeignSchemaStmt *stmt,
Oid serverOid);
static List *get_useful_pathkeys_for_relation(PlannerInfo *root,
RelOptInfo *rel);
static List *get_useful_ecs_for_relation(PlannerInfo *root, RelOptInfo *rel);
/*
* Helper functions
@ -508,6 +511,197 @@ postgresGetForeignRelSize(PlannerInfo *root,
}
}
/*
* get_useful_ecs_for_relation
* Determine which EquivalenceClasses might be involved in useful
* orderings of this relation.
*
* This function is in some respects a mirror image of the core function
* pathkeys_useful_for_merging: for a regular table, we know what indexes
* we have and want to test whether any of them are useful. For a foreign
* table, we don't know what indexes are present on the remote side but
* want to speculate about which ones we'd like to use if they existed.
*/
static List *
get_useful_ecs_for_relation(PlannerInfo *root, RelOptInfo *rel)
{
List *useful_eclass_list = NIL;
ListCell *lc;
Relids relids;
/*
* First, consider whether any active EC is potentially useful for a
* merge join against this relation.
*/
if (rel->has_eclass_joins)
{
foreach(lc, root->eq_classes)
{
EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc);
if (eclass_useful_for_merging(root, cur_ec, rel))
useful_eclass_list = lappend(useful_eclass_list, cur_ec);
}
}
/*
* Next, consider whether there are any non-EC derivable join clauses that
* are merge-joinable. If the joininfo list is empty, we can exit
* quickly.
*/
if (rel->joininfo == NIL)
return useful_eclass_list;
/* If this is a child rel, we must use the topmost parent rel to search. */
if (rel->reloptkind == RELOPT_OTHER_MEMBER_REL)
relids = find_childrel_top_parent(root, rel)->relids;
else
relids = rel->relids;
/* Check each join clause in turn. */
foreach(lc, rel->joininfo)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
/* Consider only mergejoinable clauses */
if (restrictinfo->mergeopfamilies == NIL)
continue;
/* Make sure we've got canonical ECs. */
update_mergeclause_eclasses(root, restrictinfo);
/*
* restrictinfo->mergeopfamilies != NIL is sufficient to guarantee
* that left_ec and right_ec will be initialized, per comments in
* distribute_qual_to_rels, and rel->joininfo should only contain ECs
* where this relation appears on one side or the other.
*/
if (bms_is_subset(restrictinfo->right_ec->ec_relids, relids))
useful_eclass_list = list_append_unique_ptr(useful_eclass_list,
restrictinfo->right_ec);
else
{
Assert(bms_is_subset(restrictinfo->left_ec->ec_relids, relids));
useful_eclass_list = list_append_unique_ptr(useful_eclass_list,
restrictinfo->left_ec);
}
}
return useful_eclass_list;
}
/*
* get_useful_pathkeys_for_relation
* Determine which orderings of a relation might be useful.
*
* Getting data in sorted order can be useful either because the requested
* order matches the final output ordering for the overall query we're
* planning, or because it enables an efficient merge join. Here, we try
* to figure out which pathkeys to consider.
*/
static List *
get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
{
List *useful_pathkeys_list = NIL;
List *useful_eclass_list;
PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) rel->fdw_private;
EquivalenceClass *query_ec = NULL;
ListCell *lc;
/*
* Pushing the query_pathkeys to the remote server is always worth
* considering, because it might let us avoid a local sort.
*/
if (root->query_pathkeys)
{
bool query_pathkeys_ok = true;
foreach(lc, root->query_pathkeys)
{
PathKey *pathkey = (PathKey *) lfirst(lc);
EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
Expr *em_expr;
/*
* The planner and executor don't have any clever strategy for
* taking data sorted by a prefix of the query's pathkeys and
* getting it to be sorted by all of those pathkeys. We'll just
* end up resorting the entire data set. So, unless we can push
* down all of the query pathkeys, forget it.
*
* is_foreign_expr would detect volatile expressions as well, but
* checking ec_has_volatile here saves some cycles.
*/
if (pathkey_ec->ec_has_volatile ||
!(em_expr = find_em_expr_for_rel(pathkey_ec, rel)) ||
!is_foreign_expr(root, rel, em_expr))
{
query_pathkeys_ok = false;
break;
}
}
if (query_pathkeys_ok)
useful_pathkeys_list = list_make1(list_copy(root->query_pathkeys));
}
/*
* Even if we're not using remote estimates, having the remote side do
* the sort generally won't be any worse than doing it locally, and it
* might be much better if the remote side can generate data in the right
* order without needing a sort at all. However, what we're going to do
* next is try to generate pathkeys that seem promising for possible merge
* joins, and that's more speculative. A wrong choice might hurt quite a
* bit, so bail out if we can't use remote estimates.
*/
if (!fpinfo->use_remote_estimate)
return useful_pathkeys_list;
/* Get the list of interesting EquivalenceClasses. */
useful_eclass_list = get_useful_ecs_for_relation(root, rel);
/* Extract unique EC for query, if any, so we don't consider it again. */
if (list_length(root->query_pathkeys) == 1)
{
PathKey *query_pathkey = linitial(root->query_pathkeys);
query_ec = query_pathkey->pk_eclass;
}
/*
* As a heuristic, the only pathkeys we consider here are those of length
* one. It's surely possible to consider more, but since each one we
* choose to consider will generate a round-trip to the remote side, we
* need to be a bit cautious here. It would sure be nice to have a local
* cache of information about remote index definitions...
*/
foreach(lc, useful_eclass_list)
{
EquivalenceClass *cur_ec = lfirst(lc);
Expr *em_expr;
PathKey *pathkey;
/* If redundant with what we did above, skip it. */
if (cur_ec == query_ec)
continue;
/* If no pushable expression for this rel, skip it. */
em_expr = find_em_expr_for_rel(cur_ec, rel);
if (em_expr == NULL || !is_foreign_expr(root, rel, em_expr))
continue;
/* Looks like we can generate a pathkey, so let's do it. */
pathkey = make_canonical_pathkey(root, cur_ec,
linitial_oid(cur_ec->ec_opfamilies),
BTLessStrategyNumber,
false);
useful_pathkeys_list = lappend(useful_pathkeys_list,
list_make1(pathkey));
}
return useful_pathkeys_list;
}
/*
* postgresGetForeignPaths
* Create possible scan paths for a scan on the foreign table
@ -521,7 +715,7 @@ postgresGetForeignPaths(PlannerInfo *root,
ForeignPath *path;
List *ppi_list;
ListCell *lc;
List *usable_pathkeys = NIL;
List *useful_pathkeys_list = NIL; /* List of all pathkeys */
/*
* Create simplest ForeignScan path node and add it to baserel. This path
@ -540,48 +734,18 @@ postgresGetForeignPaths(PlannerInfo *root,
NIL); /* no fdw_private list */
add_path(baserel, (Path *) path);
/*
* Determine whether we can potentially push query pathkeys to the remote
* side, avoiding a local sort.
*/
foreach(lc, root->query_pathkeys)
{
PathKey *pathkey = (PathKey *) lfirst(lc);
EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
Expr *em_expr;
useful_pathkeys_list = get_useful_pathkeys_for_relation(root, baserel);
/*
* is_foreign_expr would detect volatile expressions as well, but
* ec_has_volatile saves some cycles.
*/
if (!pathkey_ec->ec_has_volatile &&
(em_expr = find_em_expr_for_rel(pathkey_ec, baserel)) &&
is_foreign_expr(root, baserel, em_expr))
usable_pathkeys = lappend(usable_pathkeys, pathkey);
else
{
/*
* The planner and executor don't have any clever strategy for
* taking data sorted by a prefix of the query's pathkeys and
* getting it to be sorted by all of those pathekeys. We'll just
* end up resorting the entire data set. So, unless we can push
* down all of the query pathkeys, forget it.
*/
list_free(usable_pathkeys);
usable_pathkeys = NIL;
break;
}
}
/* Create a path with useful pathkeys, if we found one. */
if (usable_pathkeys != NULL)
/* Create one path for each set of pathkeys we found above. */
foreach(lc, useful_pathkeys_list)
{
double rows;
int width;
Cost startup_cost;
Cost total_cost;
List *useful_pathkeys = lfirst(lc);
estimate_path_cost_size(root, baserel, NIL, usable_pathkeys,
estimate_path_cost_size(root, baserel, NIL, useful_pathkeys,
&rows, &width, &startup_cost, &total_cost);
add_path(baserel, (Path *)
@ -589,7 +753,7 @@ postgresGetForeignPaths(PlannerInfo *root,
rows,
startup_cost,
total_cost,
usable_pathkeys,
useful_pathkeys,
NULL,
NULL,
NIL));

View File

@ -178,6 +178,20 @@ SELECT * FROM ft1 t1 WHERE t1.c3 = (SELECT MAX(c3) FROM ft2 t2) ORDER BY c1;
WITH t1 AS (SELECT * FROM ft1 WHERE c1 <= 10) SELECT t2.c1, t2.c2, t2.c3, t2.c4 FROM t1, ft2 t2 WHERE t1.c1 = t2.c1 ORDER BY t1.c1;
-- fixed values
SELECT 'fixed', NULL FROM ft1 t1 WHERE c1 = 1;
-- Test forcing the remote server to produce sorted data for a merge join.
SET enable_hashjoin TO false;
SET enable_nestloop TO false;
-- inner join; expressions in the clauses appear in the equivalence class list
EXPLAIN (VERBOSE, COSTS false)
SELECT t1.c1, t2."C 1" FROM ft2 t1 JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
SELECT t1.c1, t2."C 1" FROM ft2 t1 JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
-- outer join; expressions in the clauses do not appear in equivalence class
-- list but no output change as compared to the previous query
EXPLAIN (VERBOSE, COSTS false)
SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
RESET enable_hashjoin;
RESET enable_nestloop;
-- ===================================================================
-- WHERE with remotely-executable conditions
@ -812,6 +826,32 @@ where bar.f1 = ss.f1;
select tableoid::regclass, * from bar order by 1,2;
-- Test forcing the remote server to produce sorted data for a merge join,
-- but the foreign table is an inheritance child.
truncate table loct1;
truncate table only foo;
\set num_rows_foo 2000
insert into loct1 select generate_series(0, :num_rows_foo, 2), generate_series(0, :num_rows_foo, 2), generate_series(0, :num_rows_foo, 2);
insert into foo select generate_series(1, :num_rows_foo, 2), generate_series(1, :num_rows_foo, 2);
SET enable_hashjoin to false;
SET enable_nestloop to false;
alter foreign table foo2 options (use_remote_estimate 'true');
create index i_loct1_f1 on loct1(f1);
create index i_foo_f1 on foo(f1);
analyze foo;
analyze loct1;
-- inner join; expressions in the clauses appear in the equivalence class list
explain (verbose, costs off)
select foo.f1, loct1.f1 from foo join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
select foo.f1, loct1.f1 from foo join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
-- outer join; expressions in the clauses do not appear in equivalence class
-- list but no output change as compared to the previous query
explain (verbose, costs off)
select foo.f1, loct1.f1 from foo left join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
select foo.f1, loct1.f1 from foo left join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10;
RESET enable_hashjoin;
RESET enable_nestloop;
-- Test that WHERE CURRENT OF is not supported
begin;
declare c cursor for select * from bar where f1 = 7;

View File

@ -28,9 +28,6 @@
#include "utils/lsyscache.h"
static PathKey *make_canonical_pathkey(PlannerInfo *root,
EquivalenceClass *eclass, Oid opfamily,
int strategy, bool nulls_first);
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
@ -50,7 +47,7 @@ static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
* equivclass.c will complain if a merge occurs after root->canon_pathkeys
* has become nonempty.)
*/
static PathKey *
PathKey *
make_canonical_pathkey(PlannerInfo *root,
EquivalenceClass *eclass, Oid opfamily,
int strategy, bool nulls_first)

View File

@ -206,5 +206,8 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root,
RelOptInfo *rel,
List *pathkeys);
extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
extern PathKey *make_canonical_pathkey(PlannerInfo *root,
EquivalenceClass *eclass, Oid opfamily,
int strategy, bool nulls_first);
#endif /* PATHS_H */