1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* joinpath.c
|
1997-09-07 07:04:48 +02:00
|
|
|
* Routines to find all possible paths for processing a set of joins
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2006-03-05 16:59:11 +01:00
|
|
|
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2006-12-23 01:43:13 +01:00
|
|
|
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.108 2006/12/23 00:43:10 tgl Exp $
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
2001-05-07 02:43:27 +02:00
|
|
|
#include "postgres.h"
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
#include <math.h>
|
|
|
|
|
2006-12-23 01:43:13 +01:00
|
|
|
#include "access/skey.h"
|
1999-07-16 05:14:30 +02:00
|
|
|
#include "optimizer/cost.h"
|
1999-07-16 07:00:38 +02:00
|
|
|
#include "optimizer/pathnode.h"
|
|
|
|
#include "optimizer/paths.h"
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2002-03-12 01:52:10 +01:00
|
|
|
|
2005-06-06 00:32:58 +02:00
|
|
|
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
|
2001-03-22 05:01:46 +01:00
|
|
|
RelOptInfo *outerrel, RelOptInfo *innerrel,
|
|
|
|
List *restrictlist, List *mergeclause_list,
|
|
|
|
JoinType jointype);
|
2005-06-06 00:32:58 +02:00
|
|
|
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
|
2001-03-22 05:01:46 +01:00
|
|
|
RelOptInfo *outerrel, RelOptInfo *innerrel,
|
|
|
|
List *restrictlist, List *mergeclause_list,
|
|
|
|
JoinType jointype);
|
2005-06-06 00:32:58 +02:00
|
|
|
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
|
2001-03-22 05:01:46 +01:00
|
|
|
RelOptInfo *outerrel, RelOptInfo *innerrel,
|
|
|
|
List *restrictlist, JoinType jointype);
|
2006-02-05 03:59:17 +01:00
|
|
|
static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
|
2006-10-04 02:30:14 +02:00
|
|
|
RelOptInfo *outer_rel, JoinType jointype);
|
2000-02-07 05:41:04 +01:00
|
|
|
static List *select_mergejoin_clauses(RelOptInfo *joinrel,
|
2001-03-22 05:01:46 +01:00
|
|
|
RelOptInfo *outerrel,
|
|
|
|
RelOptInfo *innerrel,
|
|
|
|
List *restrictlist,
|
|
|
|
JoinType jointype);
|
2006-12-23 01:43:13 +01:00
|
|
|
static void build_mergejoin_strat_lists(List *mergeclauses,
|
|
|
|
List **mergefamilies, List **mergestrategies);
|
2000-02-07 05:41:04 +01:00
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/*
|
2000-02-07 05:41:04 +01:00
|
|
|
* add_paths_to_joinrel
|
|
|
|
* Given a join relation and two component rels from which it can be made,
|
|
|
|
* consider all possible paths that use the two component rels as outer
|
|
|
|
* and inner rel respectively. Add these paths to the join rel's pathlist
|
|
|
|
* if they survive comparison with other paths (and remove any existing
|
|
|
|
* paths that are dominated by these paths).
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
2000-02-07 05:41:04 +01:00
|
|
|
* Modifies the pathlist field of the joinrel node to contain the best
|
|
|
|
* paths found so far.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
void
|
2005-06-06 00:32:58 +02:00
|
|
|
add_paths_to_joinrel(PlannerInfo *root,
|
2000-02-07 05:41:04 +01:00
|
|
|
RelOptInfo *joinrel,
|
|
|
|
RelOptInfo *outerrel,
|
|
|
|
RelOptInfo *innerrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
JoinType jointype,
|
2000-02-07 05:41:04 +01:00
|
|
|
List *restrictlist)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-02-07 05:41:04 +01:00
|
|
|
List *mergeclause_list = NIL;
|
1999-02-12 07:43:53 +01:00
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
2000-11-23 04:57:31 +01:00
|
|
|
* Find potential mergejoin clauses. We can skip this if we are not
|
2005-10-15 04:49:52 +02:00
|
|
|
* interested in doing a mergejoin. However, mergejoin is currently our
|
|
|
|
* only way of implementing full outer joins, so override mergejoin
|
|
|
|
* disable if it's a full join.
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
2000-11-23 04:57:31 +01:00
|
|
|
if (enable_mergejoin || jointype == JOIN_FULL)
|
2000-02-07 05:41:04 +01:00
|
|
|
mergeclause_list = select_mergejoin_clauses(joinrel,
|
|
|
|
outerrel,
|
|
|
|
innerrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
restrictlist,
|
|
|
|
jointype);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
2000-04-12 19:17:23 +02:00
|
|
|
* 1. Consider mergejoin paths where both relations must be explicitly
|
|
|
|
* sorted.
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
2000-02-15 21:49:31 +01:00
|
|
|
sort_inner_and_outer(root, joinrel, outerrel, innerrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
restrictlist, mergeclause_list, jointype);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
2000-04-12 19:17:23 +02:00
|
|
|
* 2. Consider paths where the outer relation need not be explicitly
|
|
|
|
* sorted. This includes both nestloops and mergejoins where the outer
|
|
|
|
* path is already ordered.
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
2000-02-15 21:49:31 +01:00
|
|
|
match_unsorted_outer(root, joinrel, outerrel, innerrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
restrictlist, mergeclause_list, jointype);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-02-15 21:49:31 +01:00
|
|
|
#ifdef NOT_USED
|
2000-04-12 19:17:23 +02:00
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
2000-04-12 19:17:23 +02:00
|
|
|
* 3. Consider paths where the inner relation need not be explicitly
|
2005-10-15 04:49:52 +02:00
|
|
|
* sorted. This includes mergejoins only (nestloops were already built in
|
|
|
|
* match_unsorted_outer).
|
2000-02-15 21:49:31 +01:00
|
|
|
*
|
2000-04-12 19:17:23 +02:00
|
|
|
* Diked out as redundant 2/13/2000 -- tgl. There isn't any really
|
2005-10-15 04:49:52 +02:00
|
|
|
* significant difference between the inner and outer side of a mergejoin,
|
|
|
|
* so match_unsorted_inner creates no paths that aren't equivalent to
|
|
|
|
* those made by match_unsorted_outer when add_paths_to_joinrel() is
|
|
|
|
* invoked with the two rels given in the other order.
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
2000-02-15 21:49:31 +01:00
|
|
|
match_unsorted_inner(root, joinrel, outerrel, innerrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
restrictlist, mergeclause_list, jointype);
|
2000-02-15 21:49:31 +01:00
|
|
|
#endif
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* 4. Consider paths where both outer and inner relations must be hashed
|
|
|
|
* before being joined.
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
|
|
|
if (enable_hashjoin)
|
2000-02-15 21:49:31 +01:00
|
|
|
hash_inner_and_outer(root, joinrel, outerrel, innerrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
restrictlist, jointype);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* sort_inner_and_outer
|
1998-08-04 18:44:31 +02:00
|
|
|
* Create mergejoin join paths by explicitly sorting both the outer and
|
1997-09-07 07:04:48 +02:00
|
|
|
* inner join relations on each available merge ordering.
|
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* 'joinrel' is the join relation
|
|
|
|
* 'outerrel' is the outer join relation
|
|
|
|
* 'innerrel' is the inner join relation
|
2000-02-07 05:41:04 +01:00
|
|
|
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
|
|
|
|
* clauses that apply to this join
|
1999-08-16 04:17:58 +02:00
|
|
|
* 'mergeclause_list' is a list of RestrictInfo nodes for available
|
2000-02-07 05:41:04 +01:00
|
|
|
* mergejoin clauses in this join
|
2000-09-12 23:07:18 +02:00
|
|
|
* 'jointype' is the type of join to do
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-02-15 21:49:31 +01:00
|
|
|
static void
|
2005-06-06 00:32:58 +02:00
|
|
|
sort_inner_and_outer(PlannerInfo *root,
|
2000-02-15 21:49:31 +01:00
|
|
|
RelOptInfo *joinrel,
|
1999-05-26 00:43:53 +02:00
|
|
|
RelOptInfo *outerrel,
|
|
|
|
RelOptInfo *innerrel,
|
2000-02-07 05:41:04 +01:00
|
|
|
List *restrictlist,
|
2000-09-12 23:07:18 +02:00
|
|
|
List *mergeclause_list,
|
|
|
|
JoinType jointype)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2001-11-11 20:18:54 +01:00
|
|
|
bool useallclauses;
|
2003-01-20 19:55:07 +01:00
|
|
|
Path *outer_path;
|
|
|
|
Path *inner_path;
|
2000-12-14 23:30:45 +01:00
|
|
|
List *all_pathkeys;
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *l;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2001-11-11 20:18:54 +01:00
|
|
|
/*
|
|
|
|
* If we are doing a right or full join, we must use *all* the
|
|
|
|
* mergeclauses as join clauses, else we will not have a valid plan.
|
|
|
|
*/
|
|
|
|
switch (jointype)
|
|
|
|
{
|
|
|
|
case JOIN_INNER:
|
|
|
|
case JOIN_LEFT:
|
2003-01-20 19:55:07 +01:00
|
|
|
case JOIN_IN:
|
|
|
|
case JOIN_UNIQUE_OUTER:
|
|
|
|
case JOIN_UNIQUE_INNER:
|
2001-11-11 20:18:54 +01:00
|
|
|
useallclauses = false;
|
|
|
|
break;
|
|
|
|
case JOIN_RIGHT:
|
|
|
|
case JOIN_FULL:
|
|
|
|
useallclauses = true;
|
|
|
|
break;
|
|
|
|
default:
|
2003-07-25 02:01:09 +02:00
|
|
|
elog(ERROR, "unrecognized join type: %d",
|
2001-11-11 20:18:54 +01:00
|
|
|
(int) jointype);
|
2002-09-04 22:31:48 +02:00
|
|
|
useallclauses = false; /* keep compiler quiet */
|
2001-11-11 20:18:54 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2003-01-20 19:55:07 +01:00
|
|
|
/*
|
|
|
|
* We only consider the cheapest-total-cost input paths, since we are
|
|
|
|
* assuming here that a sort is required. We will consider
|
2005-10-15 04:49:52 +02:00
|
|
|
* cheapest-startup-cost input paths later, and only if they don't need a
|
|
|
|
* sort.
|
2003-01-20 19:55:07 +01:00
|
|
|
*
|
2005-11-22 19:17:34 +01:00
|
|
|
* If unique-ification is requested, do it and then handle as a plain
|
|
|
|
* inner join.
|
2003-01-20 19:55:07 +01:00
|
|
|
*/
|
|
|
|
outer_path = outerrel->cheapest_total_path;
|
|
|
|
inner_path = innerrel->cheapest_total_path;
|
|
|
|
if (jointype == JOIN_UNIQUE_OUTER)
|
|
|
|
{
|
|
|
|
outer_path = (Path *) create_unique_path(root, outerrel, outer_path);
|
|
|
|
jointype = JOIN_INNER;
|
|
|
|
}
|
|
|
|
else if (jointype == JOIN_UNIQUE_INNER)
|
|
|
|
{
|
|
|
|
inner_path = (Path *) create_unique_path(root, innerrel, inner_path);
|
|
|
|
jointype = JOIN_INNER;
|
|
|
|
}
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Each possible ordering of the available mergejoin clauses will generate
|
|
|
|
* a differently-sorted result path at essentially the same cost. We have
|
|
|
|
* no basis for choosing one over another at this level of joining, but
|
|
|
|
* some sort orders may be more useful than others for higher-level
|
|
|
|
* mergejoins, so it's worth considering multiple orderings.
|
1999-08-16 04:17:58 +02:00
|
|
|
*
|
2000-12-14 23:30:45 +01:00
|
|
|
* Actually, it's not quite true that every mergeclause ordering will
|
|
|
|
* generate a different path order, because some of the clauses may be
|
2005-10-15 04:49:52 +02:00
|
|
|
* redundant. Therefore, what we do is convert the mergeclause list to a
|
|
|
|
* list of canonical pathkeys, and then consider different orderings of
|
|
|
|
* the pathkeys.
|
2000-12-14 23:30:45 +01:00
|
|
|
*
|
2001-03-22 05:01:46 +01:00
|
|
|
* Generating a path for *every* permutation of the pathkeys doesn't seem
|
|
|
|
* like a winning strategy; the cost in planning time is too high. For
|
2005-10-15 04:49:52 +02:00
|
|
|
* now, we generate one path for each pathkey, listing that pathkey first
|
|
|
|
* and the rest in random order. This should allow at least a one-clause
|
|
|
|
* mergejoin without re-sorting against any other possible mergejoin
|
|
|
|
* partner path. But if we've not guessed the right ordering of secondary
|
|
|
|
* keys, we may end up evaluating clauses as qpquals when they could have
|
|
|
|
* been done as mergeclauses. We need to figure out a better way. (Two
|
|
|
|
* possible approaches: look at all the relevant index relations to
|
|
|
|
* suggest plausible sort orders, or make just one output path and somehow
|
|
|
|
* mark it as having a sort-order that can be rearranged freely.)
|
1999-08-16 04:17:58 +02:00
|
|
|
*/
|
2000-12-14 23:30:45 +01:00
|
|
|
all_pathkeys = make_pathkeys_for_mergeclauses(root,
|
|
|
|
mergeclause_list,
|
|
|
|
outerrel);
|
|
|
|
|
2004-05-26 06:41:50 +02:00
|
|
|
foreach(l, all_pathkeys)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2004-05-26 06:41:50 +02:00
|
|
|
List *front_pathkey = (List *) lfirst(l);
|
2000-12-14 23:30:45 +01:00
|
|
|
List *cur_pathkeys;
|
|
|
|
List *cur_mergeclauses;
|
2006-12-23 01:43:13 +01:00
|
|
|
List *mergefamilies;
|
|
|
|
List *mergestrategies;
|
2000-04-12 19:17:23 +02:00
|
|
|
List *outerkeys;
|
|
|
|
List *innerkeys;
|
|
|
|
List *merge_pathkeys;
|
1999-08-16 04:17:58 +02:00
|
|
|
|
2000-12-14 23:30:45 +01:00
|
|
|
/* Make a pathkey list with this guy first. */
|
2004-05-26 06:41:50 +02:00
|
|
|
if (l != list_head(all_pathkeys))
|
2000-12-14 23:30:45 +01:00
|
|
|
cur_pathkeys = lcons(front_pathkey,
|
2004-05-31 01:40:41 +02:00
|
|
|
list_delete_ptr(list_copy(all_pathkeys),
|
|
|
|
front_pathkey));
|
2000-02-19 00:47:31 +01:00
|
|
|
else
|
2001-03-22 05:01:46 +01:00
|
|
|
cur_pathkeys = all_pathkeys; /* no work at first one... */
|
2000-12-14 23:30:45 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Select mergeclause(s) that match this sort ordering. If we had
|
2005-10-15 04:49:52 +02:00
|
|
|
* redundant merge clauses then we will get a subset of the original
|
|
|
|
* clause list. There had better be some match, however...
|
2000-12-14 23:30:45 +01:00
|
|
|
*/
|
|
|
|
cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
|
|
|
|
cur_pathkeys,
|
2005-10-15 04:49:52 +02:00
|
|
|
mergeclause_list);
|
2000-12-14 23:30:45 +01:00
|
|
|
Assert(cur_mergeclauses != NIL);
|
2000-02-19 00:47:31 +01:00
|
|
|
|
2001-11-11 20:18:54 +01:00
|
|
|
/* Forget it if can't use all the clauses in right/full join */
|
|
|
|
if (useallclauses &&
|
2005-10-15 04:49:52 +02:00
|
|
|
list_length(cur_mergeclauses) != list_length(mergeclause_list))
|
2001-11-11 20:18:54 +01:00
|
|
|
continue;
|
|
|
|
|
2000-04-12 19:17:23 +02:00
|
|
|
/*
|
|
|
|
* Build sort pathkeys for both sides.
|
1999-08-16 04:17:58 +02:00
|
|
|
*
|
2000-04-12 19:17:23 +02:00
|
|
|
* Note: it's possible that the cheapest paths will already be sorted
|
2005-10-15 04:49:52 +02:00
|
|
|
* properly. create_mergejoin_path will detect that case and suppress
|
|
|
|
* an explicit sort step, so we needn't do so here.
|
1999-08-16 04:17:58 +02:00
|
|
|
*/
|
2000-02-15 21:49:31 +01:00
|
|
|
outerkeys = make_pathkeys_for_mergeclauses(root,
|
2000-12-14 23:30:45 +01:00
|
|
|
cur_mergeclauses,
|
2000-09-12 23:07:18 +02:00
|
|
|
outerrel);
|
2000-02-15 21:49:31 +01:00
|
|
|
innerkeys = make_pathkeys_for_mergeclauses(root,
|
2000-12-14 23:30:45 +01:00
|
|
|
cur_mergeclauses,
|
2000-09-12 23:07:18 +02:00
|
|
|
innerrel);
|
1999-08-16 04:17:58 +02:00
|
|
|
/* Build pathkeys representing output sort order. */
|
2005-01-23 03:21:36 +01:00
|
|
|
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
|
|
|
|
outerkeys);
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2006-12-23 01:43:13 +01:00
|
|
|
/* Build opfamily info for execution */
|
|
|
|
build_mergejoin_strat_lists(cur_mergeclauses,
|
|
|
|
&mergefamilies, &mergestrategies);
|
|
|
|
|
2000-02-15 21:49:31 +01:00
|
|
|
/*
|
2003-01-20 19:55:07 +01:00
|
|
|
* And now we can make the path.
|
2000-02-15 21:49:31 +01:00
|
|
|
*/
|
|
|
|
add_path(joinrel, (Path *)
|
2001-06-05 07:26:05 +02:00
|
|
|
create_mergejoin_path(root,
|
|
|
|
joinrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
jointype,
|
2003-01-20 19:55:07 +01:00
|
|
|
outer_path,
|
|
|
|
inner_path,
|
2000-02-15 21:49:31 +01:00
|
|
|
restrictlist,
|
|
|
|
merge_pathkeys,
|
2000-12-14 23:30:45 +01:00
|
|
|
cur_mergeclauses,
|
2006-12-23 01:43:13 +01:00
|
|
|
mergefamilies,
|
|
|
|
mergestrategies,
|
2000-02-15 21:49:31 +01:00
|
|
|
outerkeys,
|
|
|
|
innerkeys));
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-02-14 00:22:53 +01:00
|
|
|
* match_unsorted_outer
|
1997-09-07 07:04:48 +02:00
|
|
|
* Creates possible join paths for processing a single join relation
|
|
|
|
* 'joinrel' by employing either iterative substitution or
|
1999-08-16 04:17:58 +02:00
|
|
|
* mergejoining on each of its possible outer paths (considering
|
|
|
|
* only outer paths that are already ordered well enough for merging).
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1999-08-16 04:17:58 +02:00
|
|
|
* We always generate a nestloop path for each available outer path.
|
2002-11-30 06:21:03 +01:00
|
|
|
* In fact we may generate as many as four: one on the cheapest-total-cost
|
|
|
|
* inner path, one on the same with materialization, one on the
|
2003-08-04 02:43:34 +02:00
|
|
|
* cheapest-startup-cost inner path (if different),
|
2000-02-15 21:49:31 +01:00
|
|
|
* and one on the best inner-indexscan path (if any).
|
1999-08-16 04:17:58 +02:00
|
|
|
*
|
2000-04-12 19:17:23 +02:00
|
|
|
* We also consider mergejoins if mergejoin clauses are available. We have
|
2000-02-15 21:49:31 +01:00
|
|
|
* two ways to generate the inner path for a mergejoin: sort the cheapest
|
|
|
|
* inner path, or use an inner path that is already suitably ordered for the
|
|
|
|
* merge. If we have several mergeclauses, it could be that there is no inner
|
|
|
|
* path (or only a very expensive one) for the full list of mergeclauses, but
|
|
|
|
* better paths exist if we truncate the mergeclause list (thereby discarding
|
|
|
|
* some sort key requirements). So, we consider truncations of the
|
|
|
|
* mergeclause list as well as the full list. (Ideally we'd consider all
|
|
|
|
* subsets of the mergeclause list, but that seems way too expensive.)
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* 'joinrel' is the join relation
|
|
|
|
* 'outerrel' is the outer join relation
|
|
|
|
* 'innerrel' is the inner join relation
|
2000-02-07 05:41:04 +01:00
|
|
|
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
|
|
|
|
* clauses that apply to this join
|
1999-08-16 04:17:58 +02:00
|
|
|
* 'mergeclause_list' is a list of RestrictInfo nodes for available
|
2000-02-07 05:41:04 +01:00
|
|
|
* mergejoin clauses in this join
|
2000-09-12 23:07:18 +02:00
|
|
|
* 'jointype' is the type of join to do
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-02-15 21:49:31 +01:00
|
|
|
static void
|
2005-06-06 00:32:58 +02:00
|
|
|
match_unsorted_outer(PlannerInfo *root,
|
2000-02-15 21:49:31 +01:00
|
|
|
RelOptInfo *joinrel,
|
1999-05-26 00:43:53 +02:00
|
|
|
RelOptInfo *outerrel,
|
|
|
|
RelOptInfo *innerrel,
|
2000-02-07 05:41:04 +01:00
|
|
|
List *restrictlist,
|
2000-09-12 23:07:18 +02:00
|
|
|
List *mergeclause_list,
|
|
|
|
JoinType jointype)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2003-01-20 19:55:07 +01:00
|
|
|
JoinType save_jointype = jointype;
|
2000-09-12 23:07:18 +02:00
|
|
|
bool nestjoinOK;
|
2001-04-15 02:48:17 +02:00
|
|
|
bool useallclauses;
|
2003-01-20 19:55:07 +01:00
|
|
|
Path *inner_cheapest_startup = innerrel->cheapest_startup_path;
|
|
|
|
Path *inner_cheapest_total = innerrel->cheapest_total_path;
|
2002-11-30 06:21:03 +01:00
|
|
|
Path *matpath = NULL;
|
|
|
|
Path *bestinnerjoin = NULL;
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *l;
|
1999-08-16 04:17:58 +02:00
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
/*
|
2003-01-20 19:55:07 +01:00
|
|
|
* Nestloop only supports inner, left, and IN joins. Also, if we are
|
2005-10-15 04:49:52 +02:00
|
|
|
* doing a right or full join, we must use *all* the mergeclauses as join
|
|
|
|
* clauses, else we will not have a valid plan. (Although these two flags
|
|
|
|
* are currently inverses, keep them separate for clarity and possible
|
|
|
|
* future changes.)
|
2000-09-12 23:07:18 +02:00
|
|
|
*/
|
|
|
|
switch (jointype)
|
|
|
|
{
|
|
|
|
case JOIN_INNER:
|
|
|
|
case JOIN_LEFT:
|
2003-01-20 19:55:07 +01:00
|
|
|
case JOIN_IN:
|
|
|
|
case JOIN_UNIQUE_OUTER:
|
|
|
|
case JOIN_UNIQUE_INNER:
|
2000-09-12 23:07:18 +02:00
|
|
|
nestjoinOK = true;
|
2001-04-15 02:48:17 +02:00
|
|
|
useallclauses = false;
|
2000-09-12 23:07:18 +02:00
|
|
|
break;
|
2001-04-15 02:48:17 +02:00
|
|
|
case JOIN_RIGHT:
|
|
|
|
case JOIN_FULL:
|
2000-09-12 23:07:18 +02:00
|
|
|
nestjoinOK = false;
|
2001-04-15 02:48:17 +02:00
|
|
|
useallclauses = true;
|
|
|
|
break;
|
|
|
|
default:
|
2003-07-25 02:01:09 +02:00
|
|
|
elog(ERROR, "unrecognized join type: %d",
|
2001-04-15 02:48:17 +02:00
|
|
|
(int) jointype);
|
2001-10-25 07:50:21 +02:00
|
|
|
nestjoinOK = false; /* keep compiler quiet */
|
2001-04-15 02:48:17 +02:00
|
|
|
useallclauses = false;
|
2000-09-12 23:07:18 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2003-01-20 19:55:07 +01:00
|
|
|
/*
|
2003-08-04 02:43:34 +02:00
|
|
|
* If we need to unique-ify the inner path, we will consider only the
|
|
|
|
* cheapest inner.
|
2003-01-20 19:55:07 +01:00
|
|
|
*/
|
|
|
|
if (jointype == JOIN_UNIQUE_INNER)
|
|
|
|
{
|
|
|
|
inner_cheapest_total = (Path *)
|
|
|
|
create_unique_path(root, innerrel, inner_cheapest_total);
|
|
|
|
inner_cheapest_startup = inner_cheapest_total;
|
|
|
|
jointype = JOIN_INNER;
|
|
|
|
}
|
|
|
|
else if (nestjoinOK)
|
2002-11-30 06:21:03 +01:00
|
|
|
{
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If the cheapest inner path is a join or seqscan, we should consider
|
|
|
|
* materializing it. (This is a heuristic: we could consider it
|
|
|
|
* always, but for inner indexscans it's probably a waste of time.)
|
2002-11-30 06:21:03 +01:00
|
|
|
*/
|
2003-01-20 19:55:07 +01:00
|
|
|
if (!(IsA(inner_cheapest_total, IndexPath) ||
|
2005-04-20 00:35:18 +02:00
|
|
|
IsA(inner_cheapest_total, BitmapHeapPath) ||
|
2003-01-20 19:55:07 +01:00
|
|
|
IsA(inner_cheapest_total, TidPath)))
|
2002-11-30 06:21:03 +01:00
|
|
|
matpath = (Path *)
|
2003-01-20 19:55:07 +01:00
|
|
|
create_material_path(innerrel, inner_cheapest_total);
|
2002-11-30 06:21:03 +01:00
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Get the best innerjoin indexpath (if any) for this outer rel. It's
|
|
|
|
* the same for all outer paths.
|
2002-11-30 06:21:03 +01:00
|
|
|
*/
|
2006-02-05 03:59:17 +01:00
|
|
|
if (innerrel->reloptkind != RELOPT_JOINREL)
|
|
|
|
{
|
|
|
|
if (IsA(inner_cheapest_total, AppendPath))
|
|
|
|
bestinnerjoin = best_appendrel_indexscan(root, innerrel,
|
2006-06-06 19:59:58 +02:00
|
|
|
outerrel, jointype);
|
2006-02-05 03:59:17 +01:00
|
|
|
else if (innerrel->rtekind == RTE_RELATION)
|
|
|
|
bestinnerjoin = best_inner_indexscan(root, innerrel,
|
2006-06-06 19:59:58 +02:00
|
|
|
outerrel, jointype);
|
2006-02-05 03:59:17 +01:00
|
|
|
}
|
2002-11-30 06:21:03 +01:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2004-05-26 06:41:50 +02:00
|
|
|
foreach(l, outerrel->pathlist)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2004-05-26 06:41:50 +02:00
|
|
|
Path *outerpath = (Path *) lfirst(l);
|
1999-08-16 04:17:58 +02:00
|
|
|
List *merge_pathkeys;
|
2000-02-15 21:49:31 +01:00
|
|
|
List *mergeclauses;
|
2006-12-23 01:43:13 +01:00
|
|
|
List *mergefamilies;
|
|
|
|
List *mergestrategies;
|
1999-08-16 04:17:58 +02:00
|
|
|
List *innersortkeys;
|
2000-02-15 21:49:31 +01:00
|
|
|
List *trialsortkeys;
|
|
|
|
Path *cheapest_startup_inner;
|
|
|
|
Path *cheapest_total_inner;
|
2000-12-14 23:30:45 +01:00
|
|
|
int num_sortkeys;
|
|
|
|
int sortkeycnt;
|
1999-08-16 04:17:58 +02:00
|
|
|
|
2003-01-20 19:55:07 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If we need to unique-ify the outer path, it's pointless to consider
|
|
|
|
* any but the cheapest outer.
|
2003-01-20 19:55:07 +01:00
|
|
|
*/
|
|
|
|
if (save_jointype == JOIN_UNIQUE_OUTER)
|
|
|
|
{
|
|
|
|
if (outerpath != outerrel->cheapest_total_path)
|
|
|
|
continue;
|
|
|
|
outerpath = (Path *) create_unique_path(root, outerrel, outerpath);
|
|
|
|
jointype = JOIN_INNER;
|
|
|
|
}
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* The result will have this sort order (even if it is implemented as
|
|
|
|
* a nestloop, and even if some of the mergeclauses are implemented by
|
|
|
|
* qpquals rather than as true mergeclauses):
|
1999-08-16 04:17:58 +02:00
|
|
|
*/
|
2005-01-23 03:21:36 +01:00
|
|
|
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
|
2000-12-14 23:30:45 +01:00
|
|
|
outerpath->pathkeys);
|
1999-08-16 04:17:58 +02:00
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
if (nestjoinOK)
|
|
|
|
{
|
|
|
|
/*
|
2001-03-22 05:01:46 +01:00
|
|
|
* Always consider a nestloop join with this outer and
|
2002-11-30 06:21:03 +01:00
|
|
|
* cheapest-total-cost inner. When appropriate, also consider
|
|
|
|
* using the materialized form of the cheapest inner, the
|
|
|
|
* cheapest-startup-cost inner path, and the best innerjoin
|
2001-10-25 07:50:21 +02:00
|
|
|
* indexpath.
|
2000-09-12 23:07:18 +02:00
|
|
|
*/
|
2000-02-15 21:49:31 +01:00
|
|
|
add_path(joinrel, (Path *)
|
2001-06-05 07:26:05 +02:00
|
|
|
create_nestloop_path(root,
|
|
|
|
joinrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
jointype,
|
2000-02-15 21:49:31 +01:00
|
|
|
outerpath,
|
2003-01-20 19:55:07 +01:00
|
|
|
inner_cheapest_total,
|
2000-02-15 21:49:31 +01:00
|
|
|
restrictlist,
|
|
|
|
merge_pathkeys));
|
2002-11-30 06:21:03 +01:00
|
|
|
if (matpath != NULL)
|
|
|
|
add_path(joinrel, (Path *)
|
|
|
|
create_nestloop_path(root,
|
|
|
|
joinrel,
|
|
|
|
jointype,
|
|
|
|
outerpath,
|
|
|
|
matpath,
|
|
|
|
restrictlist,
|
|
|
|
merge_pathkeys));
|
2003-01-20 19:55:07 +01:00
|
|
|
if (inner_cheapest_startup != inner_cheapest_total)
|
2000-09-12 23:07:18 +02:00
|
|
|
add_path(joinrel, (Path *)
|
2001-06-05 07:26:05 +02:00
|
|
|
create_nestloop_path(root,
|
|
|
|
joinrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
jointype,
|
|
|
|
outerpath,
|
2003-01-20 19:55:07 +01:00
|
|
|
inner_cheapest_startup,
|
2000-09-12 23:07:18 +02:00
|
|
|
restrictlist,
|
|
|
|
merge_pathkeys));
|
|
|
|
if (bestinnerjoin != NULL)
|
|
|
|
add_path(joinrel, (Path *)
|
2001-06-05 07:26:05 +02:00
|
|
|
create_nestloop_path(root,
|
|
|
|
joinrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
jointype,
|
|
|
|
outerpath,
|
|
|
|
bestinnerjoin,
|
|
|
|
restrictlist,
|
|
|
|
merge_pathkeys));
|
|
|
|
}
|
2000-02-15 21:49:31 +01:00
|
|
|
|
2003-01-20 19:55:07 +01:00
|
|
|
/* Can't do anything else if outer path needs to be unique'd */
|
|
|
|
if (save_jointype == JOIN_UNIQUE_OUTER)
|
|
|
|
continue;
|
|
|
|
|
2000-02-15 21:49:31 +01:00
|
|
|
/* Look for useful mergeclauses (if any) */
|
2000-12-14 23:30:45 +01:00
|
|
|
mergeclauses = find_mergeclauses_for_pathkeys(root,
|
|
|
|
outerpath->pathkeys,
|
2000-02-15 21:49:31 +01:00
|
|
|
mergeclause_list);
|
1999-08-16 04:17:58 +02:00
|
|
|
|
2004-04-06 20:46:03 +02:00
|
|
|
/*
|
|
|
|
* Done with this outer path if no chance for a mergejoin.
|
|
|
|
*
|
2004-08-29 07:07:03 +02:00
|
|
|
* Special corner case: for "x FULL JOIN y ON true", there will be no
|
|
|
|
* join clauses at all. Ordinarily we'd generate a clauseless
|
2004-04-06 20:46:03 +02:00
|
|
|
* nestloop path, but since mergejoin is our only join type that
|
|
|
|
* supports FULL JOIN, it's necessary to generate a clauseless
|
|
|
|
* mergejoin path instead.
|
|
|
|
*/
|
1999-08-16 04:17:58 +02:00
|
|
|
if (mergeclauses == NIL)
|
2004-04-06 20:46:03 +02:00
|
|
|
{
|
2005-05-24 20:02:31 +02:00
|
|
|
if (jointype == JOIN_FULL)
|
2004-08-29 07:07:03 +02:00
|
|
|
/* okay to try for mergejoin */ ;
|
2004-04-06 20:46:03 +02:00
|
|
|
else
|
|
|
|
continue;
|
|
|
|
}
|
2004-05-31 01:40:41 +02:00
|
|
|
if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
|
2001-04-15 02:48:17 +02:00
|
|
|
continue;
|
1999-08-16 04:17:58 +02:00
|
|
|
|
|
|
|
/* Compute the required ordering of the inner path */
|
2000-02-15 21:49:31 +01:00
|
|
|
innersortkeys = make_pathkeys_for_mergeclauses(root,
|
|
|
|
mergeclauses,
|
2000-09-12 23:07:18 +02:00
|
|
|
innerrel);
|
1999-08-16 04:17:58 +02:00
|
|
|
|
2006-12-23 01:43:13 +01:00
|
|
|
/* Build opfamily info for execution */
|
|
|
|
build_mergejoin_strat_lists(mergeclauses,
|
|
|
|
&mergefamilies, &mergestrategies);
|
|
|
|
|
2000-02-15 21:49:31 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Generate a mergejoin on the basis of sorting the cheapest inner.
|
2005-11-22 19:17:34 +01:00
|
|
|
* Since a sort will be needed, only cheapest total cost matters. (But
|
|
|
|
* create_mergejoin_path will do the right thing if
|
2003-01-20 19:55:07 +01:00
|
|
|
* inner_cheapest_total is already correctly sorted.)
|
1999-08-16 04:17:58 +02:00
|
|
|
*/
|
2000-02-15 21:49:31 +01:00
|
|
|
add_path(joinrel, (Path *)
|
2001-06-05 07:26:05 +02:00
|
|
|
create_mergejoin_path(root,
|
|
|
|
joinrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
jointype,
|
2000-02-15 21:49:31 +01:00
|
|
|
outerpath,
|
2003-01-20 19:55:07 +01:00
|
|
|
inner_cheapest_total,
|
2000-02-15 21:49:31 +01:00
|
|
|
restrictlist,
|
|
|
|
merge_pathkeys,
|
2000-02-19 00:47:31 +01:00
|
|
|
mergeclauses,
|
2006-12-23 01:43:13 +01:00
|
|
|
mergefamilies,
|
|
|
|
mergestrategies,
|
2000-02-15 21:49:31 +01:00
|
|
|
NIL,
|
|
|
|
innersortkeys));
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2003-01-20 19:55:07 +01:00
|
|
|
/* Can't do anything else if inner path needs to be unique'd */
|
|
|
|
if (save_jointype == JOIN_UNIQUE_INNER)
|
|
|
|
continue;
|
|
|
|
|
2000-02-15 21:49:31 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Look for presorted inner paths that satisfy the innersortkey list
|
|
|
|
* --- or any truncation thereof, if we are allowed to build a
|
|
|
|
* mergejoin using a subset of the merge clauses. Here, we consider
|
2006-08-17 19:06:37 +02:00
|
|
|
* both cheap startup cost and cheap total cost. We can ignore
|
|
|
|
* inner_cheapest_total on the first iteration, since we already made
|
2006-10-04 02:30:14 +02:00
|
|
|
* a path with it --- but not on later iterations with shorter sort
|
|
|
|
* keys, because then we are considering a different situation, viz
|
|
|
|
* using a simpler mergejoin to avoid a sort of the inner rel.
|
2000-02-15 21:49:31 +01:00
|
|
|
*/
|
2004-05-31 01:40:41 +02:00
|
|
|
num_sortkeys = list_length(innersortkeys);
|
2001-04-15 02:48:17 +02:00
|
|
|
if (num_sortkeys > 1 && !useallclauses)
|
2004-05-31 01:40:41 +02:00
|
|
|
trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
|
2000-12-14 23:30:45 +01:00
|
|
|
else
|
2001-03-22 05:01:46 +01:00
|
|
|
trialsortkeys = innersortkeys; /* won't really truncate */
|
2000-02-15 21:49:31 +01:00
|
|
|
cheapest_startup_inner = NULL;
|
|
|
|
cheapest_total_inner = NULL;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-12-14 23:30:45 +01:00
|
|
|
for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
|
2000-02-15 21:49:31 +01:00
|
|
|
{
|
|
|
|
Path *innerpath;
|
2000-02-19 00:47:31 +01:00
|
|
|
List *newclauses = NIL;
|
2000-02-15 21:49:31 +01:00
|
|
|
|
2000-04-12 19:17:23 +02:00
|
|
|
/*
|
2000-12-14 23:30:45 +01:00
|
|
|
* Look for an inner path ordered well enough for the first
|
2005-10-15 04:49:52 +02:00
|
|
|
* 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
|
|
|
|
* destructively, which is why we made a copy...
|
2000-02-15 21:49:31 +01:00
|
|
|
*/
|
2004-05-31 01:40:41 +02:00
|
|
|
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
|
2000-02-15 21:49:31 +01:00
|
|
|
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
|
|
|
|
trialsortkeys,
|
|
|
|
TOTAL_COST);
|
|
|
|
if (innerpath != NULL &&
|
2006-08-17 19:06:37 +02:00
|
|
|
(innerpath != inner_cheapest_total ||
|
|
|
|
sortkeycnt < num_sortkeys) &&
|
2000-02-15 21:49:31 +01:00
|
|
|
(cheapest_total_inner == NULL ||
|
|
|
|
compare_path_costs(innerpath, cheapest_total_inner,
|
|
|
|
TOTAL_COST) < 0))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2000-02-15 21:49:31 +01:00
|
|
|
/* Found a cheap (or even-cheaper) sorted path */
|
2000-12-14 23:30:45 +01:00
|
|
|
/* Select the right mergeclauses, if we didn't already */
|
|
|
|
if (sortkeycnt < num_sortkeys)
|
|
|
|
{
|
|
|
|
newclauses =
|
|
|
|
find_mergeclauses_for_pathkeys(root,
|
|
|
|
trialsortkeys,
|
|
|
|
mergeclauses);
|
|
|
|
Assert(newclauses != NIL);
|
|
|
|
}
|
2000-02-19 00:47:31 +01:00
|
|
|
else
|
|
|
|
newclauses = mergeclauses;
|
2006-12-23 01:43:13 +01:00
|
|
|
|
|
|
|
/* Build opfamily info for execution */
|
|
|
|
build_mergejoin_strat_lists(newclauses,
|
|
|
|
&mergefamilies, &mergestrategies);
|
|
|
|
|
2000-02-15 21:49:31 +01:00
|
|
|
add_path(joinrel, (Path *)
|
2001-06-05 07:26:05 +02:00
|
|
|
create_mergejoin_path(root,
|
|
|
|
joinrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
jointype,
|
2000-02-15 21:49:31 +01:00
|
|
|
outerpath,
|
|
|
|
innerpath,
|
|
|
|
restrictlist,
|
|
|
|
merge_pathkeys,
|
|
|
|
newclauses,
|
2006-12-23 01:43:13 +01:00
|
|
|
mergefamilies,
|
|
|
|
mergestrategies,
|
2000-02-15 21:49:31 +01:00
|
|
|
NIL,
|
|
|
|
NIL));
|
|
|
|
cheapest_total_inner = innerpath;
|
|
|
|
}
|
|
|
|
/* Same on the basis of cheapest startup cost ... */
|
|
|
|
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
|
|
|
|
trialsortkeys,
|
|
|
|
STARTUP_COST);
|
|
|
|
if (innerpath != NULL &&
|
2006-08-17 19:06:37 +02:00
|
|
|
(innerpath != inner_cheapest_total ||
|
|
|
|
sortkeycnt < num_sortkeys) &&
|
2000-02-15 21:49:31 +01:00
|
|
|
(cheapest_startup_inner == NULL ||
|
|
|
|
compare_path_costs(innerpath, cheapest_startup_inner,
|
|
|
|
STARTUP_COST) < 0))
|
|
|
|
{
|
|
|
|
/* Found a cheap (or even-cheaper) sorted path */
|
|
|
|
if (innerpath != cheapest_total_inner)
|
1999-08-16 04:17:58 +02:00
|
|
|
{
|
2000-04-12 19:17:23 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Avoid rebuilding clause list if we already made one;
|
|
|
|
* saves memory in big join trees...
|
2000-02-19 00:47:31 +01:00
|
|
|
*/
|
|
|
|
if (newclauses == NIL)
|
|
|
|
{
|
2000-12-14 23:30:45 +01:00
|
|
|
if (sortkeycnt < num_sortkeys)
|
|
|
|
{
|
|
|
|
newclauses =
|
|
|
|
find_mergeclauses_for_pathkeys(root,
|
2005-10-15 04:49:52 +02:00
|
|
|
trialsortkeys,
|
|
|
|
mergeclauses);
|
2000-12-14 23:30:45 +01:00
|
|
|
Assert(newclauses != NIL);
|
|
|
|
}
|
2000-02-19 00:47:31 +01:00
|
|
|
else
|
|
|
|
newclauses = mergeclauses;
|
|
|
|
}
|
2006-12-23 01:43:13 +01:00
|
|
|
|
|
|
|
/* Build opfamily info for execution */
|
|
|
|
build_mergejoin_strat_lists(newclauses,
|
|
|
|
&mergefamilies, &mergestrategies);
|
|
|
|
|
2000-02-15 21:49:31 +01:00
|
|
|
add_path(joinrel, (Path *)
|
2001-06-05 07:26:05 +02:00
|
|
|
create_mergejoin_path(root,
|
|
|
|
joinrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
jointype,
|
2000-02-15 21:49:31 +01:00
|
|
|
outerpath,
|
|
|
|
innerpath,
|
|
|
|
restrictlist,
|
|
|
|
merge_pathkeys,
|
|
|
|
newclauses,
|
2006-12-23 01:43:13 +01:00
|
|
|
mergefamilies,
|
|
|
|
mergestrategies,
|
2000-02-15 21:49:31 +01:00
|
|
|
NIL,
|
|
|
|
NIL));
|
1999-08-16 04:17:58 +02:00
|
|
|
}
|
2000-02-15 21:49:31 +01:00
|
|
|
cheapest_startup_inner = innerpath;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
2001-10-25 07:50:21 +02:00
|
|
|
|
2001-04-15 02:48:17 +02:00
|
|
|
/*
|
|
|
|
* Don't consider truncated sortkeys if we need all clauses.
|
|
|
|
*/
|
|
|
|
if (useallclauses)
|
|
|
|
break;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
1999-08-06 06:00:17 +02:00
|
|
|
* hash_inner_and_outer
|
1997-09-07 07:04:48 +02:00
|
|
|
* Create hashjoin join paths by explicitly hashing both the outer and
|
2002-11-30 01:08:22 +01:00
|
|
|
* inner keys of each available hash clause.
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* 'joinrel' is the join relation
|
|
|
|
* 'outerrel' is the outer join relation
|
|
|
|
* 'innerrel' is the inner join relation
|
2000-02-07 05:41:04 +01:00
|
|
|
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
|
|
|
|
* clauses that apply to this join
|
2000-09-12 23:07:18 +02:00
|
|
|
* 'jointype' is the type of join to do
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-02-15 21:49:31 +01:00
|
|
|
static void
|
2005-06-06 00:32:58 +02:00
|
|
|
hash_inner_and_outer(PlannerInfo *root,
|
1999-08-06 06:00:17 +02:00
|
|
|
RelOptInfo *joinrel,
|
1999-05-26 00:43:53 +02:00
|
|
|
RelOptInfo *outerrel,
|
2000-02-07 05:41:04 +01:00
|
|
|
RelOptInfo *innerrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
List *restrictlist,
|
|
|
|
JoinType jointype)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-09-12 23:07:18 +02:00
|
|
|
bool isouterjoin;
|
2002-11-30 01:08:22 +01:00
|
|
|
List *hashclauses;
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *l;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
/*
|
2003-01-20 19:55:07 +01:00
|
|
|
* Hashjoin only supports inner, left, and IN joins.
|
2000-09-12 23:07:18 +02:00
|
|
|
*/
|
|
|
|
switch (jointype)
|
|
|
|
{
|
|
|
|
case JOIN_INNER:
|
2003-01-20 19:55:07 +01:00
|
|
|
case JOIN_IN:
|
|
|
|
case JOIN_UNIQUE_OUTER:
|
|
|
|
case JOIN_UNIQUE_INNER:
|
2000-09-12 23:07:18 +02:00
|
|
|
isouterjoin = false;
|
|
|
|
break;
|
|
|
|
case JOIN_LEFT:
|
|
|
|
isouterjoin = true;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
2002-11-30 01:08:22 +01:00
|
|
|
* We need to build only one hashpath for any given pair of outer and
|
|
|
|
* inner relations; all of the hashable clauses will be used as keys.
|
|
|
|
*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Scan the join's restrictinfo list to find hashjoinable clauses that are
|
|
|
|
* usable with this pair of sub-relations.
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
2002-11-30 01:08:22 +01:00
|
|
|
hashclauses = NIL;
|
2004-05-26 06:41:50 +02:00
|
|
|
foreach(l, restrictlist)
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2004-05-26 06:41:50 +02:00
|
|
|
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2004-01-05 06:07:36 +01:00
|
|
|
if (!restrictinfo->can_join ||
|
2003-01-15 20:35:48 +01:00
|
|
|
restrictinfo->hashjoinoperator == InvalidOid)
|
2000-02-07 05:41:04 +01:00
|
|
|
continue; /* not hashjoinable */
|
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
/*
|
2000-09-29 20:21:41 +02:00
|
|
|
* If processing an outer join, only use its own join clauses for
|
2000-09-12 23:07:18 +02:00
|
|
|
* hashing. For inner joins we need not be so picky.
|
|
|
|
*/
|
2004-01-05 06:07:36 +01:00
|
|
|
if (isouterjoin && restrictinfo->is_pushed_down)
|
2000-09-12 23:07:18 +02:00
|
|
|
continue;
|
|
|
|
|
2000-12-14 23:30:45 +01:00
|
|
|
/*
|
2001-06-05 07:26:05 +02:00
|
|
|
* Check if clause is usable with these input rels.
|
2000-12-14 23:30:45 +01:00
|
|
|
*/
|
2003-02-08 21:20:55 +01:00
|
|
|
if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
|
|
|
|
bms_is_subset(restrictinfo->right_relids, innerrel->relids))
|
2000-12-14 23:30:45 +01:00
|
|
|
{
|
|
|
|
/* righthand side is inner */
|
|
|
|
}
|
2003-02-08 21:20:55 +01:00
|
|
|
else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
|
2005-10-15 04:49:52 +02:00
|
|
|
bms_is_subset(restrictinfo->right_relids, outerrel->relids))
|
2000-12-14 23:30:45 +01:00
|
|
|
{
|
|
|
|
/* lefthand side is inner */
|
|
|
|
}
|
2000-02-07 05:41:04 +01:00
|
|
|
else
|
|
|
|
continue; /* no good for these input relations */
|
1999-05-25 18:15:34 +02:00
|
|
|
|
2002-11-30 01:08:22 +01:00
|
|
|
hashclauses = lappend(hashclauses, restrictinfo);
|
|
|
|
}
|
2000-02-19 00:47:31 +01:00
|
|
|
|
2002-11-30 01:08:22 +01:00
|
|
|
/* If we found any usable hashclauses, make a path */
|
|
|
|
if (hashclauses)
|
|
|
|
{
|
2000-02-15 21:49:31 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* We consider both the cheapest-total-cost and cheapest-startup-cost
|
|
|
|
* outer paths. There's no need to consider any but the
|
|
|
|
* cheapest-total-cost inner path, however.
|
2000-02-15 21:49:31 +01:00
|
|
|
*/
|
2003-08-04 02:43:34 +02:00
|
|
|
Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
|
|
|
|
Path *cheapest_total_outer = outerrel->cheapest_total_path;
|
|
|
|
Path *cheapest_total_inner = innerrel->cheapest_total_path;
|
2003-01-20 19:55:07 +01:00
|
|
|
|
|
|
|
/* Unique-ify if need be */
|
|
|
|
if (jointype == JOIN_UNIQUE_OUTER)
|
|
|
|
{
|
|
|
|
cheapest_total_outer = (Path *)
|
|
|
|
create_unique_path(root, outerrel, cheapest_total_outer);
|
|
|
|
cheapest_startup_outer = cheapest_total_outer;
|
|
|
|
jointype = JOIN_INNER;
|
|
|
|
}
|
|
|
|
else if (jointype == JOIN_UNIQUE_INNER)
|
|
|
|
{
|
|
|
|
cheapest_total_inner = (Path *)
|
|
|
|
create_unique_path(root, innerrel, cheapest_total_inner);
|
|
|
|
jointype = JOIN_INNER;
|
|
|
|
}
|
|
|
|
|
2000-02-15 21:49:31 +01:00
|
|
|
add_path(joinrel, (Path *)
|
2001-06-05 07:26:05 +02:00
|
|
|
create_hashjoin_path(root,
|
|
|
|
joinrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
jointype,
|
2003-01-20 19:55:07 +01:00
|
|
|
cheapest_total_outer,
|
|
|
|
cheapest_total_inner,
|
2000-02-15 21:49:31 +01:00
|
|
|
restrictlist,
|
2001-06-05 07:26:05 +02:00
|
|
|
hashclauses));
|
2003-01-20 19:55:07 +01:00
|
|
|
if (cheapest_startup_outer != cheapest_total_outer)
|
2000-02-15 21:49:31 +01:00
|
|
|
add_path(joinrel, (Path *)
|
2001-06-05 07:26:05 +02:00
|
|
|
create_hashjoin_path(root,
|
|
|
|
joinrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
jointype,
|
2003-01-20 19:55:07 +01:00
|
|
|
cheapest_startup_outer,
|
|
|
|
cheapest_total_inner,
|
2000-02-15 21:49:31 +01:00
|
|
|
restrictlist,
|
2001-06-05 07:26:05 +02:00
|
|
|
hashclauses));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2000-02-15 21:49:31 +01:00
|
|
|
}
|
1999-05-18 23:36:10 +02:00
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
/*
|
2006-02-05 03:59:17 +01:00
|
|
|
* best_appendrel_indexscan
|
|
|
|
* Finds the best available set of inner indexscans for a nestloop join
|
2006-06-06 19:59:58 +02:00
|
|
|
* with the given append relation on the inside and the given outer_rel
|
2006-10-04 02:30:14 +02:00
|
|
|
* outside. Returns an AppendPath comprising the best inner scans, or
|
2006-02-05 03:59:17 +01:00
|
|
|
* NULL if there are no possible inner indexscans.
|
2006-01-31 22:39:25 +01:00
|
|
|
*/
|
2006-02-05 03:59:17 +01:00
|
|
|
static Path *
|
|
|
|
best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
|
2006-06-06 19:59:58 +02:00
|
|
|
RelOptInfo *outer_rel, JoinType jointype)
|
2006-01-31 22:39:25 +01:00
|
|
|
{
|
2006-02-05 03:59:17 +01:00
|
|
|
int parentRTindex = rel->relid;
|
2006-01-31 22:39:25 +01:00
|
|
|
List *append_paths = NIL;
|
2006-02-05 03:59:17 +01:00
|
|
|
bool found_indexscan = false;
|
2006-01-31 22:39:25 +01:00
|
|
|
ListCell *l;
|
|
|
|
|
|
|
|
foreach(l, root->append_rel_list)
|
|
|
|
{
|
|
|
|
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
|
|
|
|
int childRTindex;
|
|
|
|
RelOptInfo *childrel;
|
|
|
|
Path *bestinnerjoin;
|
|
|
|
|
|
|
|
/* append_rel_list contains all append rels; ignore others */
|
|
|
|
if (appinfo->parent_relid != parentRTindex)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
childRTindex = appinfo->child_relid;
|
|
|
|
childrel = find_base_rel(root, childRTindex);
|
|
|
|
Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
|
|
|
|
|
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Check to see if child was rejected by constraint exclusion. If so,
|
|
|
|
* it will have a cheapest_total_path that's an Append path with no
|
|
|
|
* members (see set_plain_rel_pathlist).
|
2006-01-31 22:39:25 +01:00
|
|
|
*/
|
2006-02-05 00:03:20 +01:00
|
|
|
if (IsA(childrel->cheapest_total_path, AppendPath) &&
|
|
|
|
((AppendPath *) childrel->cheapest_total_path)->subpaths == NIL)
|
|
|
|
continue; /* OK, we can ignore it */
|
2006-01-31 22:39:25 +01:00
|
|
|
|
|
|
|
/*
|
2006-02-05 03:59:17 +01:00
|
|
|
* Get the best innerjoin indexpath (if any) for this child rel.
|
2006-01-31 22:39:25 +01:00
|
|
|
*/
|
|
|
|
bestinnerjoin = best_inner_indexscan(root, childrel,
|
2006-06-06 19:59:58 +02:00
|
|
|
outer_rel, jointype);
|
2006-02-05 03:59:17 +01:00
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* If no luck on an indexpath for this rel, we'll still consider an
|
|
|
|
* Append substituting the cheapest-total inner path. However we must
|
|
|
|
* find at least one indexpath, else there's not going to be any
|
|
|
|
* improvement over the base path for the appendrel.
|
2006-01-31 22:39:25 +01:00
|
|
|
*/
|
2006-02-05 03:59:17 +01:00
|
|
|
if (bestinnerjoin)
|
|
|
|
found_indexscan = true;
|
|
|
|
else
|
2006-01-31 22:39:25 +01:00
|
|
|
bestinnerjoin = childrel->cheapest_total_path;
|
|
|
|
|
2006-02-05 03:59:17 +01:00
|
|
|
append_paths = lappend(append_paths, bestinnerjoin);
|
2006-01-31 22:39:25 +01:00
|
|
|
}
|
|
|
|
|
2006-02-05 03:59:17 +01:00
|
|
|
if (!found_indexscan)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* Form and return the completed Append path. */
|
|
|
|
return (Path *) create_append_path(rel, append_paths);
|
2006-01-31 22:39:25 +01:00
|
|
|
}
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
/*
|
|
|
|
* select_mergejoin_clauses
|
|
|
|
* Select mergejoin clauses that are usable for a particular join.
|
|
|
|
* Returns a list of RestrictInfo nodes for those clauses.
|
|
|
|
*
|
2000-02-07 05:41:04 +01:00
|
|
|
* We examine each restrictinfo clause known for the join to see
|
|
|
|
* if it is mergejoinable and involves vars from the two sub-relations
|
|
|
|
* currently of interest.
|
1999-08-16 04:17:58 +02:00
|
|
|
*/
|
|
|
|
static List *
|
2000-02-07 05:41:04 +01:00
|
|
|
select_mergejoin_clauses(RelOptInfo *joinrel,
|
|
|
|
RelOptInfo *outerrel,
|
|
|
|
RelOptInfo *innerrel,
|
2000-09-12 23:07:18 +02:00
|
|
|
List *restrictlist,
|
|
|
|
JoinType jointype)
|
1999-08-16 04:17:58 +02:00
|
|
|
{
|
|
|
|
List *result_list = NIL;
|
2000-09-12 23:07:18 +02:00
|
|
|
bool isouterjoin = IS_OUTER_JOIN(jointype);
|
2005-10-25 22:30:30 +02:00
|
|
|
bool have_nonmergeable_joinclause = false;
|
2004-05-26 06:41:50 +02:00
|
|
|
ListCell *l;
|
1999-08-16 04:17:58 +02:00
|
|
|
|
2004-05-26 06:41:50 +02:00
|
|
|
foreach(l, restrictlist)
|
1999-08-16 04:17:58 +02:00
|
|
|
{
|
2004-05-26 06:41:50 +02:00
|
|
|
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2000-09-12 23:07:18 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* If processing an outer join, only use its own join clauses in the
|
2005-11-22 19:17:34 +01:00
|
|
|
* merge. For inner joins we can use pushed-down clauses too. (Note:
|
|
|
|
* we don't set have_nonmergeable_joinclause here because pushed-down
|
|
|
|
* clauses will become otherquals not joinquals.)
|
2000-09-12 23:07:18 +02:00
|
|
|
*/
|
2005-10-25 22:30:30 +02:00
|
|
|
if (isouterjoin && restrictinfo->is_pushed_down)
|
|
|
|
continue;
|
2000-09-12 23:07:18 +02:00
|
|
|
|
2004-01-05 06:07:36 +01:00
|
|
|
if (!restrictinfo->can_join ||
|
2003-01-15 20:35:48 +01:00
|
|
|
restrictinfo->mergejoinoperator == InvalidOid)
|
2005-10-25 22:30:30 +02:00
|
|
|
{
|
|
|
|
have_nonmergeable_joinclause = true;
|
2000-02-07 05:41:04 +01:00
|
|
|
continue; /* not mergejoinable */
|
2005-10-25 22:30:30 +02:00
|
|
|
}
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2003-01-15 20:35:48 +01:00
|
|
|
/*
|
2003-01-27 21:51:54 +01:00
|
|
|
* Check if clause is usable with these input rels. All the vars
|
2005-10-15 04:49:52 +02:00
|
|
|
* needed on each side of the clause must be available from one or the
|
|
|
|
* other of the input rels.
|
2003-01-15 20:35:48 +01:00
|
|
|
*/
|
2003-02-08 21:20:55 +01:00
|
|
|
if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
|
|
|
|
bms_is_subset(restrictinfo->right_relids, innerrel->relids))
|
2003-01-15 20:35:48 +01:00
|
|
|
{
|
|
|
|
/* righthand side is inner */
|
|
|
|
}
|
2003-02-08 21:20:55 +01:00
|
|
|
else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
|
2005-10-15 04:49:52 +02:00
|
|
|
bms_is_subset(restrictinfo->right_relids, outerrel->relids))
|
2003-01-15 20:35:48 +01:00
|
|
|
{
|
|
|
|
/* lefthand side is inner */
|
|
|
|
}
|
|
|
|
else
|
2005-10-25 22:30:30 +02:00
|
|
|
{
|
|
|
|
have_nonmergeable_joinclause = true;
|
2003-01-15 20:35:48 +01:00
|
|
|
continue; /* no good for these input relations */
|
2005-10-25 22:30:30 +02:00
|
|
|
}
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2003-01-15 20:35:48 +01:00
|
|
|
result_list = lcons(restrictinfo, result_list);
|
1999-08-16 04:17:58 +02:00
|
|
|
}
|
|
|
|
|
2005-10-25 22:30:30 +02:00
|
|
|
/*
|
|
|
|
* If it is a right/full join then *all* the explicit join clauses must be
|
|
|
|
* mergejoinable, else the executor will fail. If we are asked for a right
|
|
|
|
* join then just return NIL to indicate no mergejoin is possible (we can
|
|
|
|
* handle it as a left join instead). If we are asked for a full join then
|
|
|
|
* emit an error, because there is no fallback.
|
|
|
|
*/
|
|
|
|
if (have_nonmergeable_joinclause)
|
|
|
|
{
|
|
|
|
switch (jointype)
|
|
|
|
{
|
|
|
|
case JOIN_RIGHT:
|
|
|
|
return NIL; /* not mergejoinable */
|
|
|
|
case JOIN_FULL:
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* otherwise, it's OK to have nonmergeable join quals */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1999-08-16 04:17:58 +02:00
|
|
|
return result_list;
|
|
|
|
}
|
2006-12-23 01:43:13 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Temporary hack to build opfamily and strategy lists needed for mergejoin
|
|
|
|
* by the executor. We need to rethink the planner's handling of merge
|
|
|
|
* planning so that it can deal with multiple possible merge orders, but
|
|
|
|
* that's not done yet.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
build_mergejoin_strat_lists(List *mergeclauses,
|
|
|
|
List **mergefamilies, List **mergestrategies)
|
|
|
|
{
|
|
|
|
ListCell *l;
|
|
|
|
|
|
|
|
*mergefamilies = NIL;
|
|
|
|
*mergestrategies = NIL;
|
|
|
|
|
|
|
|
foreach(l, mergeclauses)
|
|
|
|
{
|
|
|
|
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We do not need to worry about whether the mergeclause will be
|
|
|
|
* commuted at runtime --- it's the same opfamily either way.
|
|
|
|
*/
|
|
|
|
*mergefamilies = lappend_oid(*mergefamilies, restrictinfo->mergeopfamily);
|
|
|
|
/*
|
|
|
|
* For the moment, strategy must always be LessThan --- see
|
|
|
|
* hack version of get_op_mergejoin_info
|
|
|
|
*/
|
|
|
|
*mergestrategies = lappend_int(*mergestrategies, BTLessStrategyNumber);
|
|
|
|
}
|
|
|
|
}
|