postgres_fdw: Add ORDER BY to some remote SQL queries.

If the join problem's entire ORDER BY clause can be pushed to the
remote server, consider a path that adds this ORDER BY clause.  If
use_remote_estimate is on, we cost this path using an additional
remote EXPLAIN.  If not, we just estimate that the path costs 20%
more, which is intended to be large enough that we won't request a
remote sort when it's not helpful, but small enough that we'll have
the remote side do the sort when in doubt.  In some cases, the remote
sort might actually be free, because the remote query plan might
happen to produce output that is ordered the way we need, but without
remote estimates we have no way of knowing that.

It might also be useful to request sorted output from the remote side
if it enables an efficient merge join, but this patch doesn't attempt
to handle that case.

Ashutosh Bapat with revisions by me.  Also reviewed by Fabrízio de Royes
Mello and Jeevan Chalke.
This commit is contained in:
Robert Haas 2015-11-03 12:46:06 -05:00
parent fc0b893521
commit f18c944b61
5 changed files with 222 additions and 30 deletions

View File

@ -193,9 +193,12 @@ is_foreign_expr(PlannerInfo *root,
if (!foreign_expr_walker((Node *) expr, &glob_cxt, &loc_cxt))
return false;
/* Expressions examined here should be boolean, ie noncollatable */
Assert(loc_cxt.collation == InvalidOid);
Assert(loc_cxt.state == FDW_COLLATE_NONE);
/*
* If the expression has a valid collation that does not arise from a
* foreign var, the expression can not be sent over.
*/
if (loc_cxt.state == FDW_COLLATE_UNSAFE)
return false;
/*
* An expression which includes any mutable functions can't be sent over
@ -1877,3 +1880,50 @@ printRemotePlaceholder(Oid paramtype, int32 paramtypmod,
appendStringInfo(buf, "((SELECT null::%s)::%s)", ptypename, ptypename);
}
/*
* Deparse ORDER BY clause according to the given pathkeys for given base
* relation. From given pathkeys expressions belonging entirely to the given
* base relation are obtained and deparsed.
*/
void
appendOrderByClause(StringInfo buf, PlannerInfo *root, RelOptInfo *baserel,
List *pathkeys)
{
ListCell *lcell;
deparse_expr_cxt context;
int nestlevel;
char *delim = " ";
/* Set up context struct for recursion */
context.root = root;
context.foreignrel = baserel;
context.buf = buf;
context.params_list = NULL;
/* Make sure any constants in the exprs are printed portably */
nestlevel = set_transmission_modes();
appendStringInfo(buf, " ORDER BY");
foreach(lcell, pathkeys)
{
PathKey *pathkey = lfirst(lcell);
Expr *em_expr;
em_expr = find_em_expr_for_rel(pathkey->pk_eclass, baserel);
Assert(em_expr != NULL);
appendStringInfoString(buf, delim);
deparseExpr(em_expr, &context);
if (pathkey->pk_strategy == BTLessStrategyNumber)
appendStringInfoString(buf, " ASC");
else
appendStringInfoString(buf, " DESC");
if (pathkey->pk_nulls_first)
appendStringInfoString(buf, " NULLS FIRST");
delim = ", ";
}
reset_transmission_modes(nestlevel);
}

View File

@ -134,15 +134,13 @@ ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true');
-- ===================================================================
-- simple queries
-- ===================================================================
-- single table, with/without alias
-- single table without alias
EXPLAIN (COSTS false) SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
QUERY PLAN
---------------------------------
QUERY PLAN
---------------------------
Limit
-> Sort
Sort Key: c3, c1
-> Foreign Scan on ft1
(4 rows)
-> Foreign Scan on ft1
(2 rows)
SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
@ -159,20 +157,21 @@ SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
110 | 0 | 00110 | Sun Jan 11 00:00:00 1970 PST | Sun Jan 11 00:00:00 1970 | 0 | 0 | foo
(10 rows)
EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-- single table with alias - also test that tableoid sort is not pushed to remote side
EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
QUERY PLAN
-------------------------------------------------------------------------------------
Limit
Output: c1, c2, c3, c4, c5, c6, c7, c8
Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
-> Sort
Output: c1, c2, c3, c4, c5, c6, c7, c8
Sort Key: t1.c3, t1.c1
Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
Sort Key: t1.c3, t1.c1, t1.tableoid
-> Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
Output: c1, c2, c3, c4, c5, c6, c7, c8, tableoid
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(8 rows)
SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
-----+----+-------+------------------------------+--------------------------+----+------------+-----
101 | 1 | 00101 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
@ -189,17 +188,14 @@ SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-- whole-row reference
EXPLAIN (VERBOSE, COSTS false) SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
QUERY PLAN
-------------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Limit
Output: t1.*, c3, c1
-> Sort
-> Foreign Scan on public.ft1 t1
Output: t1.*, c3, c1
Sort Key: t1.c3, t1.c1
-> Foreign Scan on public.ft1 t1
Output: t1.*, c3, c1
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(8 rows)
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY c3 ASC, "C 1" ASC
(5 rows)
SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
t1
@ -650,6 +646,32 @@ SELECT * FROM ft2 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft1 WHERE c1 < 5));
4 | 4 | 00004 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4 | 4 | foo
(4 rows)
-- we should not push order by clause with volatile expressions or unsafe
-- collations
EXPLAIN (VERBOSE, COSTS false)
SELECT * FROM ft2 ORDER BY ft2.c1, random();
QUERY PLAN
-------------------------------------------------------------------------------
Sort
Output: c1, c2, c3, c4, c5, c6, c7, c8, (random())
Sort Key: ft2.c1, (random())
-> Foreign Scan on public.ft2
Output: c1, c2, c3, c4, c5, c6, c7, c8, random()
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(6 rows)
EXPLAIN (VERBOSE, COSTS false)
SELECT * FROM ft2 ORDER BY ft2.c1, ft2.c3 collate "C";
QUERY PLAN
-------------------------------------------------------------------------------
Sort
Output: c1, c2, c3, c4, c5, c6, c7, c8, ((c3)::text)
Sort Key: ft2.c1, ft2.c3 COLLATE "C"
-> Foreign Scan on public.ft2
Output: c1, c2, c3, c4, c5, c6, c7, c8, c3
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1"
(6 rows)
-- ===================================================================
-- parameterized queries
-- ===================================================================

View File

@ -47,6 +47,9 @@ PG_MODULE_MAGIC;
/* Default CPU cost to process 1 row (above and beyond cpu_tuple_cost). */
#define DEFAULT_FDW_TUPLE_COST 0.01
/* If no remote estimates, assume a sort costs 20% extra */
#define DEFAULT_FDW_SORT_MULTIPLIER 1.2
/*
* FDW-specific planner information kept in RelOptInfo.fdw_private for a
* foreign table. This information is collected by postgresGetForeignRelSize.
@ -296,6 +299,7 @@ static List *postgresImportForeignSchema(ImportForeignSchemaStmt *stmt,
static void estimate_path_cost_size(PlannerInfo *root,
RelOptInfo *baserel,
List *join_conds,
List *pathkeys,
double *p_rows, int *p_width,
Cost *p_startup_cost, Cost *p_total_cost);
static void get_remote_estimate(const char *sql,
@ -497,7 +501,7 @@ postgresGetForeignRelSize(PlannerInfo *root,
* values in fpinfo so we don't need to do it again to generate the
* basic foreign path.
*/
estimate_path_cost_size(root, baserel, NIL,
estimate_path_cost_size(root, baserel, NIL, NIL,
&fpinfo->rows, &fpinfo->width,
&fpinfo->startup_cost, &fpinfo->total_cost);
@ -527,7 +531,7 @@ postgresGetForeignRelSize(PlannerInfo *root,
set_baserel_size_estimates(root, baserel);
/* Fill in basically-bogus cost estimates for use later. */
estimate_path_cost_size(root, baserel, NIL,
estimate_path_cost_size(root, baserel, NIL, NIL,
&fpinfo->rows, &fpinfo->width,
&fpinfo->startup_cost, &fpinfo->total_cost);
}
@ -546,6 +550,7 @@ postgresGetForeignPaths(PlannerInfo *root,
ForeignPath *path;
List *ppi_list;
ListCell *lc;
List *usable_pathkeys = NIL;
/*
* Create simplest ForeignScan path node and add it to baserel. This path
@ -563,6 +568,60 @@ postgresGetForeignPaths(PlannerInfo *root,
NIL); /* no fdw_private list */
add_path(baserel, (Path *) path);
/*
* Determine whether we can potentially push query pathkeys to the remote
* side, avoiding a local sort.
*/
foreach(lc, root->query_pathkeys)
{
PathKey *pathkey = (PathKey *) lfirst(lc);
EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
Expr *em_expr;
/*
* is_foreign_expr would detect volatile expressions as well, but
* ec_has_volatile saves some cycles.
*/
if (!pathkey_ec->ec_has_volatile &&
(em_expr = find_em_expr_for_rel(pathkey_ec, baserel)) &&
is_foreign_expr(root, baserel, em_expr))
usable_pathkeys = lappend(usable_pathkeys, pathkey);
else
{
/*
* The planner and executor don't have any clever strategy for
* taking data sorted by a prefix of the query's pathkeys and
* getting it to be sorted by all of those pathekeys. We'll just
* end up resorting the entire data set. So, unless we can push
* down all of the query pathkeys, forget it.
*/
list_free(usable_pathkeys);
usable_pathkeys = NIL;
break;
}
}
/* Create a path with useful pathkeys, if we found one. */
if (usable_pathkeys != NULL)
{
double rows;
int width;
Cost startup_cost;
Cost total_cost;
estimate_path_cost_size(root, baserel, NIL, usable_pathkeys,
&rows, &width, &startup_cost, &total_cost);
add_path(baserel, (Path *)
create_foreignscan_path(root, baserel,
rows,
startup_cost,
total_cost,
usable_pathkeys,
NULL,
NIL));
}
/*
* If we're not using remote estimates, stop here. We have no way to
* estimate whether any join clauses would be worth sending across, so
@ -710,7 +769,7 @@ postgresGetForeignPaths(PlannerInfo *root,
/* Get a cost estimate from the remote */
estimate_path_cost_size(root, baserel,
param_info->ppi_clauses,
param_info->ppi_clauses, NIL,
&rows, &width,
&startup_cost, &total_cost);
@ -811,6 +870,10 @@ postgresGetForeignPlan(PlannerInfo *root,
appendWhereClause(&sql, root, baserel, remote_conds,
true, &params_list);
/* Add ORDER BY clause if we found any useful pathkeys */
if (best_path->path.pathkeys)
appendOrderByClause(&sql, root, baserel, best_path->path.pathkeys);
/*
* Add FOR UPDATE/SHARE if appropriate. We apply locking during the
* initial row fetch, rather than later on as is done for local tables.
@ -1728,6 +1791,7 @@ static void
estimate_path_cost_size(PlannerInfo *root,
RelOptInfo *baserel,
List *join_conds,
List *pathkeys,
double *p_rows, int *p_width,
Cost *p_startup_cost, Cost *p_total_cost)
{
@ -1780,6 +1844,9 @@ estimate_path_cost_size(PlannerInfo *root,
appendWhereClause(&sql, root, baserel, remote_join_conds,
(fpinfo->remote_conds == NIL), NULL);
if (pathkeys)
appendOrderByClause(&sql, root, baserel, pathkeys);
/* Get the remote estimate */
conn = GetConnection(fpinfo->server, fpinfo->user, false);
get_remote_estimate(sql.data, conn, &rows, &width,
@ -1837,6 +1904,21 @@ estimate_path_cost_size(PlannerInfo *root,
cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
run_cost += cpu_per_tuple * baserel->tuples;
/*
* Without remote estimates, we have no real way to estimate the cost
* of generating sorted output. It could be free if the query plan
* the remote side would have chosen generates properly-sorted output
* anyway, but in most cases it will cost something. Estimate a value
* high enough that we won't pick the sorted path when the ordering
* isn't locally useful, but low enough that we'll err on the side of
* pushing down the ORDER BY clause when it's useful to do so.
*/
if (pathkeys != NIL)
{
startup_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
run_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
}
total_cost = startup_cost + run_cost;
}
@ -3002,3 +3084,31 @@ conversion_error_callback(void *arg)
NameStr(tupdesc->attrs[errpos->cur_attno - 1]->attname),
RelationGetRelationName(errpos->rel));
}
/*
* Find an equivalence class member expression, all of whose Vars, come from
* the indicated relation.
*/
extern Expr *
find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
{
ListCell *lc_em;
foreach(lc_em, ec->ec_members)
{
EquivalenceMember *em = lfirst(lc_em);
if (bms_equal(em->em_relids, rel->relids))
{
/*
* If there is more than one equivalence member whose Vars are
* taken entirely from this relation, we'll be content to choose
* any one of those.
*/
return em->em_expr;
}
}
/* We didn't find any suitable equivalence class expression */
return NULL;
}

View File

@ -74,5 +74,8 @@ extern void deparseAnalyzeSizeSql(StringInfo buf, Relation rel);
extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
List **retrieved_attrs);
extern void deparseStringLiteral(StringInfo buf, const char *val);
extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
extern void appendOrderByClause(StringInfo buf, PlannerInfo *root,
RelOptInfo *baserel, List *pathkeys);
#endif /* POSTGRES_FDW_H */

View File

@ -138,11 +138,12 @@ ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true');
-- ===================================================================
-- simple queries
-- ===================================================================
-- single table, with/without alias
-- single table without alias
EXPLAIN (COSTS false) SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
SELECT * FROM ft1 ORDER BY c3, c1 OFFSET 100 LIMIT 10;
EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
-- single table with alias - also test that tableoid sort is not pushed to remote side
EXPLAIN (VERBOSE, COSTS false) SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
SELECT * FROM ft1 t1 ORDER BY t1.c3, t1.c1, t1.tableoid OFFSET 100 LIMIT 10;
-- whole-row reference
EXPLAIN (VERBOSE, COSTS false) SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
SELECT t1 FROM ft1 t1 ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;
@ -214,6 +215,12 @@ WHERE a.c2 = 6 AND b.c1 = a.c1 AND a.c8 = 'foo' AND b.c7 = upper(a.c7);
-- bug before 9.3.5 due to sloppy handling of remote-estimate parameters
SELECT * FROM ft1 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft2 WHERE c1 < 5));
SELECT * FROM ft2 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft1 WHERE c1 < 5));
-- we should not push order by clause with volatile expressions or unsafe
-- collations
EXPLAIN (VERBOSE, COSTS false)
SELECT * FROM ft2 ORDER BY ft2.c1, random();
EXPLAIN (VERBOSE, COSTS false)
SELECT * FROM ft2 ORDER BY ft2.c1, ft2.c3 collate "C";
-- ===================================================================
-- parameterized queries