From ccd8f97922944566d26c7d90eb67ab7848ee9905 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 22 Dec 2015 13:46:40 -0500 Subject: [PATCH] postgres_fdw: Consider requesting sorted data so we can do a merge join. When use_remote_estimate is enabled, consider adding ORDER BY to the query we sending to the remote server so that we can use that ordered data for a merge join. Commit f18c944b6137329ac4a6b2dce5745c5dc21a8578 arranges to push down the query pathkeys, which seems like the case mostly likely to be a win, but testing shows this can sometimes win, too. For a regular table, we know which indexes are present and therefore test whether the ordering provided by each such index is useful. Here, we take the opposite approach: guess what orderings would be useful if they could be generated cheaply, and then ask the remote side what those will cost. Ashutosh Bapat, with very substantial cosmetic revisions by me. Also reviewed by Rushabh Lathia. --- .../postgres_fdw/expected/postgres_fdw.out | 165 ++++++++++++ contrib/postgres_fdw/postgres_fdw.c | 238 +++++++++++++++--- contrib/postgres_fdw/sql/postgres_fdw.sql | 40 +++ src/backend/optimizer/path/pathkeys.c | 5 +- src/include/optimizer/paths.h | 3 + 5 files changed, 410 insertions(+), 41 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 866a09bea5..b471c674af 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -343,6 +343,76 @@ SELECT 'fixed', NULL FROM ft1 t1 WHERE c1 = 1; fixed | (1 row) +-- Test forcing the remote server to produce sorted data for a merge join. +SET enable_hashjoin TO false; +SET enable_nestloop TO false; +-- inner join; expressions in the clauses appear in the equivalence class list +EXPLAIN (VERBOSE, COSTS false) + SELECT t1.c1, t2."C 1" FROM ft2 t1 JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10; + QUERY PLAN +---------------------------------------------------------------------------- + Limit + Output: t1.c1, t2."C 1" + -> Merge Join + Output: t1.c1, t2."C 1" + Merge Cond: (t1.c1 = t2."C 1") + -> Foreign Scan on public.ft2 t1 + Output: t1.c1 + Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC + -> Index Only Scan using t1_pkey on "S 1"."T 1" t2 + Output: t2."C 1" +(10 rows) + +SELECT t1.c1, t2."C 1" FROM ft2 t1 JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10; + c1 | C 1 +-----+----- + 101 | 101 + 102 | 102 + 103 | 103 + 104 | 104 + 105 | 105 + 106 | 106 + 107 | 107 + 108 | 108 + 109 | 109 + 110 | 110 +(10 rows) + +-- outer join; expressions in the clauses do not appear in equivalence class +-- list but no output change as compared to the previous query +EXPLAIN (VERBOSE, COSTS false) + SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10; + QUERY PLAN +---------------------------------------------------------------------------- + Limit + Output: t1.c1, t2."C 1" + -> Merge Left Join + Output: t1.c1, t2."C 1" + Merge Cond: (t1.c1 = t2."C 1") + -> Foreign Scan on public.ft2 t1 + Output: t1.c1 + Remote SQL: SELECT "C 1" FROM "S 1"."T 1" ORDER BY "C 1" ASC + -> Index Only Scan using t1_pkey on "S 1"."T 1" t2 + Output: t2."C 1" +(10 rows) + +SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10; + c1 | C 1 +-----+----- + 101 | 101 + 102 | 102 + 103 | 103 + 104 | 104 + 105 | 105 + 106 | 106 + 107 | 107 + 108 | 108 + 109 | 109 + 110 | 110 +(10 rows) + +RESET enable_hashjoin; +RESET enable_nestloop; -- =================================================================== -- WHERE with remotely-executable conditions -- =================================================================== @@ -3538,6 +3608,101 @@ select tableoid::regclass, * from bar order by 1,2; bar2 | 7 | 177 (6 rows) +-- Test forcing the remote server to produce sorted data for a merge join, +-- but the foreign table is an inheritance child. +truncate table loct1; +truncate table only foo; +\set num_rows_foo 2000 +insert into loct1 select generate_series(0, :num_rows_foo, 2), generate_series(0, :num_rows_foo, 2), generate_series(0, :num_rows_foo, 2); +insert into foo select generate_series(1, :num_rows_foo, 2), generate_series(1, :num_rows_foo, 2); +SET enable_hashjoin to false; +SET enable_nestloop to false; +alter foreign table foo2 options (use_remote_estimate 'true'); +create index i_loct1_f1 on loct1(f1); +create index i_foo_f1 on foo(f1); +analyze foo; +analyze loct1; +-- inner join; expressions in the clauses appear in the equivalence class list +explain (verbose, costs off) + select foo.f1, loct1.f1 from foo join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10; + QUERY PLAN +--------------------------------------------------------------------------------------- + Limit + Output: foo.f1, loct1.f1, foo.f2 + -> Sort + Output: foo.f1, loct1.f1, foo.f2 + Sort Key: foo.f2 + -> Merge Join + Output: foo.f1, loct1.f1, foo.f2 + Merge Cond: (foo.f1 = loct1.f1) + -> Merge Append + Sort Key: foo.f1 + -> Index Scan using i_foo_f1 on public.foo + Output: foo.f1, foo.f2 + -> Foreign Scan on public.foo2 + Output: foo2.f1, foo2.f2 + Remote SQL: SELECT f1, f2 FROM public.loct1 ORDER BY f1 ASC + -> Index Only Scan using i_loct1_f1 on public.loct1 + Output: loct1.f1 +(17 rows) + +select foo.f1, loct1.f1 from foo join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10; + f1 | f1 +----+---- + 20 | 20 + 22 | 22 + 24 | 24 + 26 | 26 + 28 | 28 + 30 | 30 + 32 | 32 + 34 | 34 + 36 | 36 + 38 | 38 +(10 rows) + +-- outer join; expressions in the clauses do not appear in equivalence class +-- list but no output change as compared to the previous query +explain (verbose, costs off) + select foo.f1, loct1.f1 from foo left join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10; + QUERY PLAN +--------------------------------------------------------------------------------------- + Limit + Output: foo.f1, loct1.f1, foo.f2 + -> Sort + Output: foo.f1, loct1.f1, foo.f2 + Sort Key: foo.f2 + -> Merge Left Join + Output: foo.f1, loct1.f1, foo.f2 + Merge Cond: (foo.f1 = loct1.f1) + -> Merge Append + Sort Key: foo.f1 + -> Index Scan using i_foo_f1 on public.foo + Output: foo.f1, foo.f2 + -> Foreign Scan on public.foo2 + Output: foo2.f1, foo2.f2 + Remote SQL: SELECT f1, f2 FROM public.loct1 ORDER BY f1 ASC + -> Index Only Scan using i_loct1_f1 on public.loct1 + Output: loct1.f1 +(17 rows) + +select foo.f1, loct1.f1 from foo left join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10; + f1 | f1 +----+---- + 10 | 10 + 11 | + 12 | 12 + 13 | + 14 | 14 + 15 | + 16 | 16 + 17 | + 18 | 18 + 19 | +(10 rows) + +RESET enable_hashjoin; +RESET enable_nestloop; -- Test that WHERE CURRENT OF is not supported begin; declare c cursor for select * from bar where f1 = 7; diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 9a014d4dba..f501c6c5be 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -259,6 +259,9 @@ static bool postgresAnalyzeForeignTable(Relation relation, BlockNumber *totalpages); static List *postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid); +static List *get_useful_pathkeys_for_relation(PlannerInfo *root, + RelOptInfo *rel); +static List *get_useful_ecs_for_relation(PlannerInfo *root, RelOptInfo *rel); /* * Helper functions @@ -508,6 +511,197 @@ postgresGetForeignRelSize(PlannerInfo *root, } } +/* + * get_useful_ecs_for_relation + * Determine which EquivalenceClasses might be involved in useful + * orderings of this relation. + * + * This function is in some respects a mirror image of the core function + * pathkeys_useful_for_merging: for a regular table, we know what indexes + * we have and want to test whether any of them are useful. For a foreign + * table, we don't know what indexes are present on the remote side but + * want to speculate about which ones we'd like to use if they existed. + */ +static List * +get_useful_ecs_for_relation(PlannerInfo *root, RelOptInfo *rel) +{ + List *useful_eclass_list = NIL; + ListCell *lc; + Relids relids; + + /* + * First, consider whether any active EC is potentially useful for a + * merge join against this relation. + */ + if (rel->has_eclass_joins) + { + foreach(lc, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc); + + if (eclass_useful_for_merging(root, cur_ec, rel)) + useful_eclass_list = lappend(useful_eclass_list, cur_ec); + } + } + + /* + * Next, consider whether there are any non-EC derivable join clauses that + * are merge-joinable. If the joininfo list is empty, we can exit + * quickly. + */ + if (rel->joininfo == NIL) + return useful_eclass_list; + + /* If this is a child rel, we must use the topmost parent rel to search. */ + if (rel->reloptkind == RELOPT_OTHER_MEMBER_REL) + relids = find_childrel_top_parent(root, rel)->relids; + else + relids = rel->relids; + + /* Check each join clause in turn. */ + foreach(lc, rel->joininfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + /* Consider only mergejoinable clauses */ + if (restrictinfo->mergeopfamilies == NIL) + continue; + + /* Make sure we've got canonical ECs. */ + update_mergeclause_eclasses(root, restrictinfo); + + /* + * restrictinfo->mergeopfamilies != NIL is sufficient to guarantee + * that left_ec and right_ec will be initialized, per comments in + * distribute_qual_to_rels, and rel->joininfo should only contain ECs + * where this relation appears on one side or the other. + */ + if (bms_is_subset(restrictinfo->right_ec->ec_relids, relids)) + useful_eclass_list = list_append_unique_ptr(useful_eclass_list, + restrictinfo->right_ec); + else + { + Assert(bms_is_subset(restrictinfo->left_ec->ec_relids, relids)); + useful_eclass_list = list_append_unique_ptr(useful_eclass_list, + restrictinfo->left_ec); + } + } + + return useful_eclass_list; +} + +/* + * get_useful_pathkeys_for_relation + * Determine which orderings of a relation might be useful. + * + * Getting data in sorted order can be useful either because the requested + * order matches the final output ordering for the overall query we're + * planning, or because it enables an efficient merge join. Here, we try + * to figure out which pathkeys to consider. + */ +static List * +get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) +{ + List *useful_pathkeys_list = NIL; + List *useful_eclass_list; + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) rel->fdw_private; + EquivalenceClass *query_ec = NULL; + ListCell *lc; + + /* + * Pushing the query_pathkeys to the remote server is always worth + * considering, because it might let us avoid a local sort. + */ + if (root->query_pathkeys) + { + bool query_pathkeys_ok = true; + + foreach(lc, root->query_pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; + Expr *em_expr; + + /* + * The planner and executor don't have any clever strategy for + * taking data sorted by a prefix of the query's pathkeys and + * getting it to be sorted by all of those pathkeys. We'll just + * end up resorting the entire data set. So, unless we can push + * down all of the query pathkeys, forget it. + * + * is_foreign_expr would detect volatile expressions as well, but + * checking ec_has_volatile here saves some cycles. + */ + if (pathkey_ec->ec_has_volatile || + !(em_expr = find_em_expr_for_rel(pathkey_ec, rel)) || + !is_foreign_expr(root, rel, em_expr)) + { + query_pathkeys_ok = false; + break; + } + } + + if (query_pathkeys_ok) + useful_pathkeys_list = list_make1(list_copy(root->query_pathkeys)); + } + + /* + * Even if we're not using remote estimates, having the remote side do + * the sort generally won't be any worse than doing it locally, and it + * might be much better if the remote side can generate data in the right + * order without needing a sort at all. However, what we're going to do + * next is try to generate pathkeys that seem promising for possible merge + * joins, and that's more speculative. A wrong choice might hurt quite a + * bit, so bail out if we can't use remote estimates. + */ + if (!fpinfo->use_remote_estimate) + return useful_pathkeys_list; + + /* Get the list of interesting EquivalenceClasses. */ + useful_eclass_list = get_useful_ecs_for_relation(root, rel); + + /* Extract unique EC for query, if any, so we don't consider it again. */ + if (list_length(root->query_pathkeys) == 1) + { + PathKey *query_pathkey = linitial(root->query_pathkeys); + + query_ec = query_pathkey->pk_eclass; + } + + /* + * As a heuristic, the only pathkeys we consider here are those of length + * one. It's surely possible to consider more, but since each one we + * choose to consider will generate a round-trip to the remote side, we + * need to be a bit cautious here. It would sure be nice to have a local + * cache of information about remote index definitions... + */ + foreach(lc, useful_eclass_list) + { + EquivalenceClass *cur_ec = lfirst(lc); + Expr *em_expr; + PathKey *pathkey; + + /* If redundant with what we did above, skip it. */ + if (cur_ec == query_ec) + continue; + + /* If no pushable expression for this rel, skip it. */ + em_expr = find_em_expr_for_rel(cur_ec, rel); + if (em_expr == NULL || !is_foreign_expr(root, rel, em_expr)) + continue; + + /* Looks like we can generate a pathkey, so let's do it. */ + pathkey = make_canonical_pathkey(root, cur_ec, + linitial_oid(cur_ec->ec_opfamilies), + BTLessStrategyNumber, + false); + useful_pathkeys_list = lappend(useful_pathkeys_list, + list_make1(pathkey)); + } + + return useful_pathkeys_list; +} + /* * postgresGetForeignPaths * Create possible scan paths for a scan on the foreign table @@ -521,7 +715,7 @@ postgresGetForeignPaths(PlannerInfo *root, ForeignPath *path; List *ppi_list; ListCell *lc; - List *usable_pathkeys = NIL; + List *useful_pathkeys_list = NIL; /* List of all pathkeys */ /* * Create simplest ForeignScan path node and add it to baserel. This path @@ -540,48 +734,18 @@ postgresGetForeignPaths(PlannerInfo *root, NIL); /* no fdw_private list */ add_path(baserel, (Path *) path); - /* - * Determine whether we can potentially push query pathkeys to the remote - * side, avoiding a local sort. - */ - foreach(lc, root->query_pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(lc); - EquivalenceClass *pathkey_ec = pathkey->pk_eclass; - Expr *em_expr; + useful_pathkeys_list = get_useful_pathkeys_for_relation(root, baserel); - /* - * is_foreign_expr would detect volatile expressions as well, but - * ec_has_volatile saves some cycles. - */ - if (!pathkey_ec->ec_has_volatile && - (em_expr = find_em_expr_for_rel(pathkey_ec, baserel)) && - is_foreign_expr(root, baserel, em_expr)) - usable_pathkeys = lappend(usable_pathkeys, pathkey); - else - { - /* - * The planner and executor don't have any clever strategy for - * taking data sorted by a prefix of the query's pathkeys and - * getting it to be sorted by all of those pathekeys. We'll just - * end up resorting the entire data set. So, unless we can push - * down all of the query pathkeys, forget it. - */ - list_free(usable_pathkeys); - usable_pathkeys = NIL; - break; - } - } - - /* Create a path with useful pathkeys, if we found one. */ - if (usable_pathkeys != NULL) + /* Create one path for each set of pathkeys we found above. */ + foreach(lc, useful_pathkeys_list) { double rows; int width; Cost startup_cost; Cost total_cost; + List *useful_pathkeys = lfirst(lc); - estimate_path_cost_size(root, baserel, NIL, usable_pathkeys, + estimate_path_cost_size(root, baserel, NIL, useful_pathkeys, &rows, &width, &startup_cost, &total_cost); add_path(baserel, (Path *) @@ -589,7 +753,7 @@ postgresGetForeignPaths(PlannerInfo *root, rows, startup_cost, total_cost, - usable_pathkeys, + useful_pathkeys, NULL, NULL, NIL)); diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 671e38ceaa..73fa9f6de0 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -178,6 +178,20 @@ SELECT * FROM ft1 t1 WHERE t1.c3 = (SELECT MAX(c3) FROM ft2 t2) ORDER BY c1; WITH t1 AS (SELECT * FROM ft1 WHERE c1 <= 10) SELECT t2.c1, t2.c2, t2.c3, t2.c4 FROM t1, ft2 t2 WHERE t1.c1 = t2.c1 ORDER BY t1.c1; -- fixed values SELECT 'fixed', NULL FROM ft1 t1 WHERE c1 = 1; +-- Test forcing the remote server to produce sorted data for a merge join. +SET enable_hashjoin TO false; +SET enable_nestloop TO false; +-- inner join; expressions in the clauses appear in the equivalence class list +EXPLAIN (VERBOSE, COSTS false) + SELECT t1.c1, t2."C 1" FROM ft2 t1 JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10; +SELECT t1.c1, t2."C 1" FROM ft2 t1 JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10; +-- outer join; expressions in the clauses do not appear in equivalence class +-- list but no output change as compared to the previous query +EXPLAIN (VERBOSE, COSTS false) + SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10; +SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10; +RESET enable_hashjoin; +RESET enable_nestloop; -- =================================================================== -- WHERE with remotely-executable conditions @@ -812,6 +826,32 @@ where bar.f1 = ss.f1; select tableoid::regclass, * from bar order by 1,2; +-- Test forcing the remote server to produce sorted data for a merge join, +-- but the foreign table is an inheritance child. +truncate table loct1; +truncate table only foo; +\set num_rows_foo 2000 +insert into loct1 select generate_series(0, :num_rows_foo, 2), generate_series(0, :num_rows_foo, 2), generate_series(0, :num_rows_foo, 2); +insert into foo select generate_series(1, :num_rows_foo, 2), generate_series(1, :num_rows_foo, 2); +SET enable_hashjoin to false; +SET enable_nestloop to false; +alter foreign table foo2 options (use_remote_estimate 'true'); +create index i_loct1_f1 on loct1(f1); +create index i_foo_f1 on foo(f1); +analyze foo; +analyze loct1; +-- inner join; expressions in the clauses appear in the equivalence class list +explain (verbose, costs off) + select foo.f1, loct1.f1 from foo join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10; +select foo.f1, loct1.f1 from foo join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10; +-- outer join; expressions in the clauses do not appear in equivalence class +-- list but no output change as compared to the previous query +explain (verbose, costs off) + select foo.f1, loct1.f1 from foo left join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10; +select foo.f1, loct1.f1 from foo left join loct1 on (foo.f1 = loct1.f1) order by foo.f2 offset 10 limit 10; +RESET enable_hashjoin; +RESET enable_nestloop; + -- Test that WHERE CURRENT OF is not supported begin; declare c cursor for select * from bar where f1 = 7; diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index c6b5d78724..b81cc49f0a 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -28,9 +28,6 @@ #include "utils/lsyscache.h" -static PathKey *make_canonical_pathkey(PlannerInfo *root, - EquivalenceClass *eclass, Oid opfamily, - int strategy, bool nulls_first); static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); @@ -50,7 +47,7 @@ static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); * equivclass.c will complain if a merge occurs after root->canon_pathkeys * has become nonempty.) */ -static PathKey * +PathKey * make_canonical_pathkey(PlannerInfo *root, EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first) diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 7757741c10..4e00e9f474 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -206,5 +206,8 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *pathkeys); extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel); +extern PathKey *make_canonical_pathkey(PlannerInfo *root, + EquivalenceClass *eclass, Oid opfamily, + int strategy, bool nulls_first); #endif /* PATHS_H */