diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index cb447b6371..c5c9cb0782 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.103 2000/01/27 18:11:27 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.104 2000/02/07 04:40:56 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -977,7 +977,7 @@ _copyRelOptInfo(RelOptInfo *from) newnode->pages = from->pages; newnode->tuples = from->tuples; - Node_Copy(from, newnode, restrictinfo); + Node_Copy(from, newnode, baserestrictinfo); Node_Copy(from, newnode, joininfo); Node_Copy(from, newnode, innerjoin); @@ -1137,6 +1137,7 @@ CopyJoinPathFields(JoinPath *from, JoinPath *newnode) { Node_Copy(from, newnode, outerjoinpath); Node_Copy(from, newnode, innerjoinpath); + Node_Copy(from, newnode, joinrestrictinfo); } /* ---------------- diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 90344ae669..5888f49515 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.58 2000/01/31 01:21:39 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.59 2000/02/07 04:40:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -374,6 +374,8 @@ _equalJoinPath(JoinPath *a, JoinPath *b) return false; if (!equal(a->innerjoinpath, b->innerjoinpath)) return false; + if (!equal(a->joinrestrictinfo, b->joinrestrictinfo)) + return false; return true; } diff --git a/src/backend/nodes/freefuncs.c b/src/backend/nodes/freefuncs.c index fc5c3506d8..ab308fe310 100644 --- a/src/backend/nodes/freefuncs.c +++ b/src/backend/nodes/freefuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/Attic/freefuncs.c,v 1.33 2000/01/27 18:11:28 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/Attic/freefuncs.c,v 1.34 2000/02/07 04:40:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -735,7 +735,7 @@ _freeRelOptInfo(RelOptInfo *node) */ freeObject(node->cheapestpath); - freeObject(node->restrictinfo); + freeObject(node->baserestrictinfo); freeObject(node->joininfo); freeObject(node->innerjoin); @@ -853,6 +853,10 @@ FreeJoinPathFields(JoinPath *node) { freeObject(node->outerjoinpath); freeObject(node->innerjoinpath); + /* XXX probably wrong, since ordinarily a JoinPath would share its + * restrictinfo list with other paths made for the same join? + */ + freeObject(node->joinrestrictinfo); } /* ---------------- diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 4475fa382b..8923510e1b 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.105 2000/01/27 18:11:28 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.106 2000/02/07 04:40:57 tgl Exp $ * * NOTES * Every (plan) node in POSTGRES has an associated "out" routine which @@ -915,10 +915,10 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node) */ appendStringInfo(str, - " :cheapestpath @ 0x%x :pruneable %s :restrictinfo ", + " :cheapestpath @ 0x%x :pruneable %s :baserestrictinfo ", (int) node->cheapestpath, node->pruneable ? "true" : "false"); - _outNode(str, node->restrictinfo); + _outNode(str, node->baserestrictinfo); appendStringInfo(str, " :joininfo "); _outNode(str, node->joininfo); @@ -1035,16 +1035,12 @@ _outNestPath(StringInfo str, NestPath *node) node->path.pathtype, node->path.path_cost); _outNode(str, node->path.pathkeys); - - /* - * Not sure if these are nodes; they're declared as "struct path *". - * For now, i'll just print the addresses. - */ - - appendStringInfo(str, - " :outerjoinpath @ 0x%x :innerjoinpath @ 0x%x ", - (int) node->outerjoinpath, - (int) node->innerjoinpath); + appendStringInfo(str, " :outerjoinpath "); + _outNode(str, node->outerjoinpath); + appendStringInfo(str, " :innerjoinpath "); + _outNode(str, node->innerjoinpath); + appendStringInfo(str, " :joinrestrictinfo "); + _outNode(str, node->joinrestrictinfo); } /* @@ -1058,16 +1054,12 @@ _outMergePath(StringInfo str, MergePath *node) node->jpath.path.pathtype, node->jpath.path.path_cost); _outNode(str, node->jpath.path.pathkeys); - - /* - * Not sure if these are nodes; they're declared as "struct path *". - * For now, i'll just print the addresses. - */ - - appendStringInfo(str, - " :outerjoinpath @ 0x%x :innerjoinpath @ 0x%x ", - (int) node->jpath.outerjoinpath, - (int) node->jpath.innerjoinpath); + appendStringInfo(str, " :outerjoinpath "); + _outNode(str, node->jpath.outerjoinpath); + appendStringInfo(str, " :innerjoinpath "); + _outNode(str, node->jpath.innerjoinpath); + appendStringInfo(str, " :joinrestrictinfo "); + _outNode(str, node->jpath.joinrestrictinfo); appendStringInfo(str, " :path_mergeclauses "); _outNode(str, node->path_mergeclauses); @@ -1090,16 +1082,12 @@ _outHashPath(StringInfo str, HashPath *node) node->jpath.path.pathtype, node->jpath.path.path_cost); _outNode(str, node->jpath.path.pathkeys); - - /* - * Not sure if these are nodes; they're declared as "struct path *". - * For now, i'll just print the addresses. - */ - - appendStringInfo(str, - " :outerjoinpath @ 0x%x :innerjoinpath @ 0x%x ", - (int) node->jpath.outerjoinpath, - (int) node->jpath.innerjoinpath); + appendStringInfo(str, " :outerjoinpath "); + _outNode(str, node->jpath.outerjoinpath); + appendStringInfo(str, " :innerjoinpath "); + _outNode(str, node->jpath.innerjoinpath); + appendStringInfo(str, " :joinrestrictinfo "); + _outNode(str, node->jpath.joinrestrictinfo); appendStringInfo(str, " :path_hashclauses "); _outNode(str, node->path_hashclauses); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 9dccbf5017..38db7def0b 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.81 2000/01/27 18:11:28 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.82 2000/02/07 04:40:57 tgl Exp $ * * NOTES * Most of the read functions for plan nodes are tested. (In fact, they @@ -1288,8 +1288,8 @@ _readRelOptInfo() sscanf(token, "%x", (unsigned int *) &local_node->cheapestpath); - token = lsptok(NULL, &length); /* get :restrictinfo */ - local_node->restrictinfo = nodeRead(true); /* now read it */ + token = lsptok(NULL, &length); /* get :baserestrictinfo */ + local_node->baserestrictinfo = nodeRead(true); /* now read it */ token = lsptok(NULL, &length); /* get :joininfo */ local_node->joininfo = nodeRead(true); /* now read it */ @@ -1518,25 +1518,14 @@ _readNestPath() token = lsptok(NULL, &length); /* get :pathkeys */ local_node->path.pathkeys = nodeRead(true); /* now read it */ - /* - * Not sure if these are nodes; they're declared as "struct path *". - * For now, i'll just print the addresses. - * - * GJK: Since I am parsing this stuff, I'll just ignore the addresses, - * and initialize these pointers to NULL. - */ - token = lsptok(NULL, &length); /* get :outerjoinpath */ - token = lsptok(NULL, &length); /* get @ */ - token = lsptok(NULL, &length); /* now read it */ - - local_node->outerjoinpath = NULL; + local_node->outerjoinpath = nodeRead(true); /* now read it */ token = lsptok(NULL, &length); /* get :innerjoinpath */ - token = lsptok(NULL, &length); /* get @ */ - token = lsptok(NULL, &length); /* now read it */ + local_node->innerjoinpath = nodeRead(true); /* now read it */ - local_node->innerjoinpath = NULL; + token = lsptok(NULL, &length); /* get :joinrestrictinfo */ + local_node->joinrestrictinfo = nodeRead(true); /* now read it */ return local_node; } @@ -1569,25 +1558,14 @@ _readMergePath() token = lsptok(NULL, &length); /* get :pathkeys */ local_node->jpath.path.pathkeys = nodeRead(true); /* now read it */ - /* - * Not sure if these are nodes; they're declared as "struct path *". - * For now, i'll just print the addresses. - * - * GJK: Since I am parsing this stuff, I'll just ignore the addresses, - * and initialize these pointers to NULL. - */ - token = lsptok(NULL, &length); /* get :outerjoinpath */ - token = lsptok(NULL, &length); /* get @ */ - token = lsptok(NULL, &length); /* now read it */ - - local_node->jpath.outerjoinpath = NULL; + local_node->jpath.outerjoinpath = nodeRead(true); /* now read it */ token = lsptok(NULL, &length); /* get :innerjoinpath */ - token = lsptok(NULL, &length); /* get @ */ - token = lsptok(NULL, &length); /* now read it */ + local_node->jpath.innerjoinpath = nodeRead(true); /* now read it */ - local_node->jpath.innerjoinpath = NULL; + token = lsptok(NULL, &length); /* get :joinrestrictinfo */ + local_node->jpath.joinrestrictinfo = nodeRead(true); /* now read it */ token = lsptok(NULL, &length); /* get :path_mergeclauses */ local_node->path_mergeclauses = nodeRead(true); /* now read it */ @@ -1629,25 +1607,14 @@ _readHashPath() token = lsptok(NULL, &length); /* get :pathkeys */ local_node->jpath.path.pathkeys = nodeRead(true); /* now read it */ - /* - * Not sure if these are nodes; they're declared as "struct path *". - * For now, i'll just print the addresses. - * - * GJK: Since I am parsing this stuff, I'll just ignore the addresses, - * and initialize these pointers to NULL. - */ - token = lsptok(NULL, &length); /* get :outerjoinpath */ - token = lsptok(NULL, &length); /* get @ */ - token = lsptok(NULL, &length); /* now read it */ - - local_node->jpath.outerjoinpath = NULL; + local_node->jpath.outerjoinpath = nodeRead(true); /* now read it */ token = lsptok(NULL, &length); /* get :innerjoinpath */ - token = lsptok(NULL, &length); /* get @ */ - token = lsptok(NULL, &length); /* now read it */ + local_node->jpath.innerjoinpath = nodeRead(true); /* now read it */ - local_node->jpath.innerjoinpath = NULL; + token = lsptok(NULL, &length); /* get :joinrestrictinfo */ + local_node->jpath.joinrestrictinfo = nodeRead(true); /* now read it */ token = lsptok(NULL, &length); /* get :path_hashclauses */ local_node->path_hashclauses = nodeRead(true); /* now read it */ diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 20a4e47742..bbc1204395 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -1,6 +1,20 @@ Summary ------- +These directories take the Query structure returned by the parser, and +generate a plan used by the executor. The /plan directory generates the +actual output plan, the /path code generates all possible ways to join the +tables, and /prep handles special cases like inheritance. /util is utility +stuff. /geqo is the separate "genetic optimization" planner --- it does +a semi-random search through the join tree space, rather than exhaustively +considering all possible join trees. (But each join considered by geqo +is given to /path to create paths for, so we consider all possible +implementation paths for each specific join even in GEQO mode.) + + +Join Tree Construction +---------------------- + The optimizer generates optimal query plans by doing a more-or-less exhaustive search through the ways of executing the query. During the planning/optimizing process, we build "Path" trees representing @@ -26,8 +40,17 @@ the WHERE clause "tab1.col1 = tab2.col1" generates a JoinInfo for tab1 listing tab2 as an unjoined relation, and also one for tab2 showing tab1 as an unjoined relation. +If we have only a single base relation in the query, we are done here. +Otherwise we have to figure out how to join the base relations into a +single join relation. + 2) Consider joining each RelOptInfo to each other RelOptInfo specified in its RelOptInfo.joininfo, and generate a Path for each possible join method. +(If we have a RelOptInfo with no join clauses, we have no choice but to +generate a clauseless Cartesian-product join; so we consider joining that +rel to each other available rel. But in the presence of join clauses we +will only consider joins that use available join clauses.) + At this stage each input RelOptInfo is a single relation, so we are joining every relation to the other relations as joined in the WHERE clause. We generate a new "join" RelOptInfo for each possible combination of two @@ -53,9 +76,17 @@ that represent the scan methods used for the two input relations. 3) If we only had two base relations, we are done: we just pick the cheapest path for the join RelOptInfo. If we had more than two, we now need to consider ways of joining join RelOptInfos to each other to make -join RelOptInfos that represent more than two base relations. This process -is repeated until we have finally built a RelOptInfo that represents all -the base relations in the query. Then we pick its cheapest Path. +join RelOptInfos that represent more than two base relations. + +The join tree is constructed using a "dynamic programming" algorithm: +in the first pass (already described) we consider ways to create join rels +representing exactly two base relations. The second pass considers ways +to make join rels that represent exactly three base relations; the next pass, +four relations, etc. The last pass considers how to make the final join +relation that includes all base rels --- obviously there can be only one +join rel at this top level, whereas there can be more than one join rel +at lower levels. At each level we use joins that follow available join +clauses, if possible, just as described for the first level. For example: @@ -69,6 +100,7 @@ For example: {1 2},{2 3},{3 4} {1 2 3},{2 3 4} {1 2 3 4} + (other possibilities will be excluded for lack of join clauses) SELECT * FROM tab1, tab2, tab3, tab4 @@ -78,56 +110,43 @@ For example: Tables 1, 2, 3, and 4 are joined as: {1 2},{1 3},{1 4} - {1 2 3},{1 3 4},{1,2,4} + {1 2 3},{1 3 4},{1 2 4} {1 2 3 4} -In the default left-handed joins, each RelOptInfo adds one -single-relation RelOptInfo in each join pass, and the added RelOptInfo -is always the inner relation in the join. In right-handed joins, the -added RelOptInfo is the outer relation in the join. In bushy plans, -multi-relation RelOptInfo's can be joined to other multi-relation -RelOptInfo's. +We consider left-handed plans (the outer rel of an upper join is a joinrel, +but the inner is always a base rel); right-handed plans (outer rel is always +a base rel); and bushy plans (both inner and outer can be joins themselves). +For example, when building {1 2 3 4} we consider joining {1 2 3} to {4} +(left-handed), {4} to {1 2 3} (right-handed), and {1 2} to {3 4} (bushy), +among other choices. Although the jointree scanning code produces these +potential join combinations one at a time, all the ways to produce the +same set of joined base rels will share the same RelOptInfo, so the paths +produced from different join combinations that produce equivalent joinrels +will compete in add_pathlist. + +Once we have built the final join rel, we use either the cheapest path +for it or the cheapest path with the desired ordering (if that's cheaper +than applying a sort to the cheapest other path). + Optimizer Functions ------------------- -These directories take the Query structure returned by the parser, and -generate a plan used by the executor. The /plan directory generates the -actual output plan, the /path code generates all possible ways to join the -tables, and /prep handles special cases like inheritance. /util is utility -stuff. /geqo is the separate "genetic optimization" planner --- it does -a semi-random search rather than exhaustively considering all possible -join trees. - planner() handle inheritance by processing separately -init_query_planner() preprocess target list preprocess qualifications(WHERE) --query_planner() - cnfify() - Summary: - - Simple cases with all AND's are handled by removing the AND's: - - convert: a = 1 AND b = 2 AND c = 3 - to: a = 1, b = 2, c = 3 - - Qualifications with OR's are handled differently. OR's inside AND - clauses are not modified drastically: - - convert: a = 1 AND b = 2 AND (c = 3 OR d = 4) - to: a = 1, b = 2, c = 3 OR d = 4 - - OR's in the upper level are more complex to handle: - - convert: (a = 1 AND b = 2) OR c = 3 - to: (a = 1 OR c = 3) AND (b = 2 OR c = 3) - finally: (a = 1 OR c = 3), (b = 2 OR c = 3) - - These clauses all have to be true for a result to be returned, - so the optimizer can choose the most restrictive clauses. - + simplify constant subexpressions + canonicalize_qual() + Attempt to reduce WHERE clause to either CNF or DNF canonical form. + CNF (top-level-AND) is preferred, since the optimizer can then use + any of the AND subclauses to filter tuples; but quals that are in + or close to DNF form will suffer exponential expansion if we try to + force them to CNF. In pathological cases either transform may expand + the qual unreasonably; so we may have to leave it un-normalized, + thereby reducing the accuracy of selectivity estimates. pull out constants from target list get a target list that only contains column names, no expressions if none, then return @@ -142,20 +161,14 @@ planner() find selectivity of columns used in joins -----make_one_rel_by_joins() jump to geqo if needed - again: - make_rels_by_joins(): - for each joinrel: - make_rels_by_clause_joins() - for each rel's joininfo list: - if a join from the join clause adds only one relation, do the join - or make_rels_by_clauseless_joins() - update_rels_pathlist_for_joins() - generate nested,merge,hash join paths for new rel's created above - merge_rels_with_same_relids() - merge RelOptInfo paths that have the same relids because of joins - rels_set_cheapest() - set cheapest path - if all relations in one RelOptInfo, return + else call make_rels_by_joins() for each level of join tree needed + make_rels_by_joins(): + For each joinrel of the prior level, do make_rels_by_clause_joins() + if it has join clauses, or make_rels_by_clauseless_joins() if not. + Also generate "bushy plan" joins between joinrels of lower levels. + Back at make_one_rel_by_joins(), apply set_cheapest() to extract the + cheapest path for each newly constructed joinrel. + Loop back if this wasn't the top join level. do group(GROUP) do aggregate put back constants @@ -164,7 +177,6 @@ planner() make sort(ORDER BY) - Optimizer Data Structures ------------------------- @@ -174,13 +186,13 @@ RelOptInfo - a relation or joined relations JoinInfo - join clauses, including the relids needed for the join Path - every way to generate a RelOptInfo(sequential,index,joins) - SeqScan - a plain Path node with nodeTag = T_SeqScan + SeqScan - a plain Path node with nodeTag = T_SeqScan IndexPath - index scans NestPath - nested-loop joins MergePath - merge joins HashPath - hash joins - PathKeys - a data structure representing the ordering of a path + PathKeys - a data structure representing the ordering of a path The optimizer spends a good deal of its time worrying about the ordering of the tuples returned by a path. The reason this is useful is that by @@ -192,21 +204,19 @@ generated during the optimization process are marked with their sort order (to the extent that it is known) for possible use by a higher-level merge. It is also possible to avoid an explicit sort step to implement a user's -ORDER BY clause if the final path has the right ordering already. -Currently, this is not very well implemented: we avoid generating a -redundant sort if the chosen path has the desired order, but we do not do -anything to encourage the selection of such a path --- so we only avoid the -sort if the path that would be chosen anyway (because it is cheapest -without regard to its ordering) is properly sorted. The path winnowing -process needs to be aware of the desired output order and account for the -cost of doing an explicit sort while it is choosing the best path. +ORDER BY clause if the final path has the right ordering already, so the +sort ordering is of interest even at the top level. subplanner() will +look for the cheapest path with a sort order matching the desired order, +and will compare its cost to the cost of using the cheapest-overall path +and doing an explicit sort. When we are generating paths for a particular RelOptInfo, we discard a path if it is more expensive than another known path that has the same or better sort order. We will never discard a path that is the only known way to -achieve a given sort order. In this way, the next level up will have the -maximum freedom to build mergejoins without sorting, since it can pick from -any of the paths retained for its inputs. +achieve a given sort order (without an explicit sort, that is). In this +way, the next level up will have the maximum freedom to build mergejoins +without sorting, since it can pick from any of the paths retained for its +inputs. See path/pathkeys.c for an explanation of the PathKeys data structure that represents what is known about the sort order of a particular Path. diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 382a4feb10..614ca47c84 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: geqo_eval.c,v 1.46 2000/01/26 05:56:33 momjian Exp $ + * $Id: geqo_eval.c,v 1.47 2000/02/07 04:40:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,6 +32,7 @@ #include "optimizer/cost.h" #include "optimizer/geqo.h" +#include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "utils/portal.h" @@ -121,7 +122,6 @@ gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene, RelOptInfo *old { RelOptInfo *inner_rel; /* current relation */ int base_rel_index; - List *new_rels; RelOptInfo *new_rel; if (rel_count < num_gene) @@ -130,7 +130,7 @@ gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene, RelOptInfo *old /* tour[0] = 3; tour[1] = 1; tour[2] = 2 */ base_rel_index = (int) tour[rel_count]; - inner_rel = (RelOptInfo *) nth(base_rel_index - 1, root->base_rel_list); + inner_rel = (RelOptInfo *) nth(base_rel_index-1, root->base_rel_list); if (rel_count == 0) { /* processing first join with @@ -140,54 +140,23 @@ gimme_tree(Query *root, Gene *tour, int rel_count, int num_gene, RelOptInfo *old } else { /* tree main part */ - if (!(new_rels = make_rels_by_clause_joins(root, old_rel, - old_rel->joininfo, - inner_rel->relids))) + List *acceptable_rels = lcons(inner_rel, NIL); + + new_rel = make_rels_by_clause_joins(root, old_rel, + acceptable_rels); + if (! new_rel) { - new_rels = make_rels_by_clauseless_joins(old_rel, - lcons(inner_rel, NIL)); - - /* - * we don't do bushy plans in geqo, do we? bjm 02/18/1999 - * new_rels = append(new_rels, - * make_rels_by_clauseless_joins(old_rel, - * lcons(old_rel,NIL)); - */ + new_rel = make_rels_by_clauseless_joins(root, old_rel, + acceptable_rels); + if (! new_rel) + elog(ERROR, "gimme_tree: failed to construct join rel"); } - /* process new_rel->pathlist */ - update_rels_pathlist_for_joins(root, new_rels); - - /* prune new_rels */ - /* MAU: is this necessary? */ - - /* - * what's the matter if more than one new rel is left till - * now? - */ - - /* - * joinrels in newrels with different ordering of relids are - * not possible - */ - if (length(new_rels) > 1) - merge_rels_with_same_relids(new_rels); - - if (length(new_rels) > 1) - { /* should never be reached ... */ - elog(DEBUG, "gimme_tree: still %d relations left", length(new_rels)); - } - - rels_set_cheapest(root, new_rels); - - /* get essential new relation */ - new_rel = (RelOptInfo *) lfirst(new_rels); rel_count++; + Assert(length(new_rel->relids) == rel_count); - /* processing of other new_rel attributes */ - set_rel_rows_width(root, new_rel); - - root->join_rel_list = lcons(new_rel, NIL); + /* Find and save the cheapest path for this rel */ + set_cheapest(new_rel, new_rel->pathlist); return gimme_tree(root, tour, rel_count, num_gene, new_rel); } diff --git a/src/backend/optimizer/geqo/geqo_misc.c b/src/backend/optimizer/geqo/geqo_misc.c index e9f7876678..849c739f2d 100644 --- a/src/backend/optimizer/geqo/geqo_misc.c +++ b/src/backend/optimizer/geqo/geqo_misc.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: geqo_misc.c,v 1.26 2000/01/26 05:56:33 momjian Exp $ + * $Id: geqo_misc.c,v 1.27 2000/02/07 04:40:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -188,7 +188,7 @@ geqo_print_path(Query *root, Path *path, int indent) for (i = 0; i < indent + 1; i++) printf("\t"); printf(" clauses=("); - geqo_print_joinclauses(root, path->parent->restrictinfo); + geqo_print_joinclauses(root, jp->joinrestrictinfo); printf(")\n"); if (nodeTag(path) == T_MergePath) diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile index 3808f8242f..fcca74f315 100644 --- a/src/backend/optimizer/path/Makefile +++ b/src/backend/optimizer/path/Makefile @@ -4,7 +4,7 @@ # Makefile for optimizer/path # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/optimizer/path/Makefile,v 1.12 1999/12/13 22:32:52 momjian Exp $ +# $Header: /cvsroot/pgsql/src/backend/optimizer/path/Makefile,v 1.13 2000/02/07 04:40:59 tgl Exp $ # #------------------------------------------------------------------------- @@ -14,8 +14,7 @@ include ../../../Makefile.global CFLAGS += -I../.. OBJS = allpaths.o clausesel.o costsize.o indxpath.o \ - joinpath.o joinrels.o orindxpath.o pathkeys.o prune.o \ - tidpath.o + joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o all: SUBSYS.o diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index c4d36a064b..52c30f7d01 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.57 2000/01/26 05:56:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.58 2000/02/07 04:40:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,6 +21,7 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" + #ifdef GEQO bool enable_geqo = true; #else @@ -30,31 +31,28 @@ bool enable_geqo = false; int geqo_rels = GEQO_RELS; -static void set_base_rel_pathlist(Query *root, List *rels); -static RelOptInfo *make_one_rel_by_joins(Query *root, List *rels, - int levels_needed); +static void set_base_rel_pathlist(Query *root); +static RelOptInfo *make_one_rel_by_joins(Query *root, int levels_needed); #ifdef OPTIMIZER_DEBUG static void debug_print_rel(Query *root, RelOptInfo *rel); - #endif + /* * make_one_rel * Finds all possible access paths for executing a query, returning a - * single rel. - * - * 'rels' is the list of single relation entries appearing in the query + * single rel that represents the join of all base rels in the query. */ RelOptInfo * -make_one_rel(Query *root, List *rels) +make_one_rel(Query *root) { int levels_needed; /* * Set the number of join (not nesting) levels yet to be processed. */ - levels_needed = length(rels); + levels_needed = length(root->base_rel_list); if (levels_needed <= 0) return NULL; @@ -62,159 +60,162 @@ make_one_rel(Query *root, List *rels) /* * Generate access paths for the base rels. */ - set_base_rel_pathlist(root, rels); + set_base_rel_pathlist(root); - if (levels_needed <= 1) + if (levels_needed == 1) { /* * Single relation, no more processing is required. */ - return lfirst(rels); + return (RelOptInfo *) lfirst(root->base_rel_list); } else { /* * Generate join tree. */ - return make_one_rel_by_joins(root, rels, levels_needed); + return make_one_rel_by_joins(root, levels_needed); } } /* * set_base_rel_pathlist - * Finds all paths available for scanning each relation entry in - * 'rels'. Sequential scan and any available indices are considered - * if possible (indices are not considered for lower nesting levels). - * All useful paths are attached to the relation's 'pathlist' field. - * - * MODIFIES: rels + * Finds all paths available for scanning each base-relation entry. + * Sequential scan and any available indices are considered. + * Each useful path is attached to its relation's 'pathlist' field. */ static void -set_base_rel_pathlist(Query *root, List *rels) +set_base_rel_pathlist(Query *root) { - List *temp; + List *rellist; - foreach(temp, rels) + foreach(rellist, root->base_rel_list) { - RelOptInfo *rel = (RelOptInfo *) lfirst(temp); + RelOptInfo *rel = (RelOptInfo *) lfirst(rellist); List *indices = find_relation_indices(root, rel); - List *sequential_scan_list; - List *rel_index_scan_list; - List *or_index_scan_list; - List *tidscan_pathlist; - sequential_scan_list = lcons(create_seqscan_path(rel), NIL); - /* Tid Scan Pathlist add */ - tidscan_pathlist = create_tidscan_paths(root, rel); - if (tidscan_pathlist) - sequential_scan_list = nconc(sequential_scan_list, - tidscan_pathlist); - rel_index_scan_list = create_index_paths(root, - rel, - indices, - rel->restrictinfo, - rel->joininfo); + /* Mark rel with estimated output rows, width, etc */ + set_baserel_size_estimates(root, rel); + + /* + * Generate paths and add them to the rel's pathlist. + * + * add_path/add_pathlist will discard any paths that are dominated + * by another available path, keeping only those paths that are + * superior along at least one dimension of cost or sortedness. + */ + + /* Consider sequential scan */ + add_path(rel, create_seqscan_path(rel)); + + /* Consider TID scans */ + add_pathlist(rel, create_tidscan_paths(root, rel)); + + /* Consider index paths for both simple and OR index clauses */ + add_pathlist(rel, create_index_paths(root, + rel, + indices, + rel->baserestrictinfo, + rel->joininfo)); /* Note: create_or_index_paths depends on create_index_paths * to have marked OR restriction clauses with relevant indices; * this is why it doesn't need to be given the full list of indices. */ - or_index_scan_list = create_or_index_paths(root, rel, - rel->restrictinfo); + add_pathlist(rel, create_or_index_paths(root, rel, + rel->baserestrictinfo)); - /* add_pathlist will discard any paths that are dominated by - * another available path, keeping only those paths that are - * superior along at least one dimension of cost or sortedness. - */ - rel->pathlist = add_pathlist(rel, - sequential_scan_list, - nconc(rel_index_scan_list, - or_index_scan_list)); - - /* Now find the cheapest of the paths */ + /* Now find the cheapest of the paths for this rel */ set_cheapest(rel, rel->pathlist); - /* Mark rel with estimated output rows and width */ - set_rel_rows_width(root, rel); } } /* * make_one_rel_by_joins * Find all possible joinpaths for a query by successively finding ways - * to join single relations into join relations. + * to join component relations into join relations. * - * Find all possible joinpaths(bushy trees) for a query by systematically - * finding ways to join relations(both original and derived) together. - * - * 'rels' is the current list of relations for which join paths - * are to be found, i.e., the current list of relations that - * have already been derived. - * 'levels_needed' is the number of iterations needed + * 'levels_needed' is the number of iterations needed, ie, the number of + * base relations present in the query * * Returns the final level of join relations, i.e., the relation that is * the result of joining all the original relations together. */ static RelOptInfo * -make_one_rel_by_joins(Query *root, List *rels, int levels_needed) +make_one_rel_by_joins(Query *root, int levels_needed) { - List *x; - List *joined_rels = NIL; + int lev; RelOptInfo *rel; /******************************************* * genetic query optimizer entry point * * * - *******************************************/ - if (enable_geqo && length(root->base_rel_list) >= geqo_rels) - return geqo(root); - - /******************************************* * rest will be skipped in case of GEQO * *******************************************/ + if (enable_geqo && levels_needed >= geqo_rels) + return geqo(root); - while (--levels_needed) + /* + * We employ a simple "dynamic programming" algorithm: we first + * find all ways to build joins of two base relations, then all ways + * to build joins of three base relations (from two-base-rel joins + * and other base rels), then four-base-rel joins, and so on until + * we have considered all ways to join all N relations into one rel. + */ + + for (lev = 2; lev <= levels_needed; lev++) { + List *first_old_rel = root->join_rel_list; + List *x; /* * Determine all possible pairs of relations to be joined at this - * level. Determine paths for joining these relation pairs and - * modify 'joined_rels' accordingly, then eliminate redundant join - * relations. + * level, and build paths for making each one from every available + * pair of lower-level relations. Results are prepended to + * root->join_rel_list. */ - joined_rels = make_rels_by_joins(root, rels); - - update_rels_pathlist_for_joins(root, joined_rels); - - merge_rels_with_same_relids(joined_rels); - - root->join_rel_list = rels = joined_rels; - -#ifdef NOT_USED + make_rels_by_joins(root, lev); /* - * * for each expensive predicate in each path in each distinct - * rel, * consider doing pullup -- JMH + * The relations created at the current level will appear at the + * front of root->join_rel_list. */ - if (XfuncMode != XFUNC_NOPULL && XfuncMode != XFUNC_OFF) - foreach(x, joined_rels) - xfunc_trypullup((RelOptInfo *) lfirst(x)); -#endif - - rels_set_cheapest(root, joined_rels); - - foreach(x, joined_rels) + foreach(x, root->join_rel_list) { + if (x == first_old_rel) + break; /* no more rels added at this level */ + rel = (RelOptInfo *) lfirst(x); +#ifdef NOT_USED + /* + * * for each expensive predicate in each path in each distinct + * rel, * consider doing pullup -- JMH + */ + if (XfuncMode != XFUNC_NOPULL && XfuncMode != XFUNC_OFF) + xfunc_trypullup(rel); +#endif + + /* Find and save the cheapest path for this rel */ + set_cheapest(rel, rel->pathlist); + #ifdef OPTIMIZER_DEBUG - printf("levels left: %d\n", levels_needed); debug_print_rel(root, rel); #endif } - } - return get_cheapest_complete_rel(rels); + /* + * Now, the front of the join_rel_list should be the single rel + * representing the join of all the base rels. + */ + Assert(length(root->join_rel_list) > 0); + rel = (RelOptInfo *) lfirst(root->join_rel_list); + Assert(length(rel->relids) == levels_needed); + Assert(length(root->join_rel_list) == 1 || + length(((RelOptInfo *) lsecond(root->join_rel_list))->relids) < levels_needed); + + return rel; } /***************************************************************************** @@ -222,6 +223,7 @@ make_one_rel_by_joins(Query *root, List *rels, int levels_needed) *****************************************************************************/ #ifdef OPTIMIZER_DEBUG + static void print_joinclauses(Query *root, List *clauses) { @@ -286,7 +288,7 @@ print_path(Query *root, Path *path, int indent) for (i = 0; i < indent + 1; i++) printf("\t"); printf(" clauses=("); - print_joinclauses(root, jp->path.parent->restrictinfo); + print_joinclauses(root, jp->joinrestrictinfo); printf(")\n"); if (nodeTag(path) == T_MergePath) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 1246f87830..7c8d4b63c0 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -19,7 +19,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.50 2000/01/26 05:56:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.51 2000/02/07 04:40:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -447,22 +447,26 @@ cost_hashjoin(Path *outer_path, } /* - * set_rel_rows_width - * Set the 'rows' and 'width' estimates for the given base relation. + * set_baserel_size_estimates + * Set the size estimates for the given base relation. * - * 'rows' is the estimated number of output tuples (after applying - * restriction clauses). - * 'width' is the estimated average output tuple width in bytes. + * The rel's targetlist and restrictinfo list must have been constructed + * already. + * + * We set the following fields of the rel node: + * rows: the estimated number of output tuples (after applying + * restriction clauses). + * width: the estimated average output tuple width in bytes. */ void -set_rel_rows_width(Query *root, RelOptInfo *rel) +set_baserel_size_estimates(Query *root, RelOptInfo *rel) { /* Should only be applied to base relations */ Assert(length(rel->relids) == 1); rel->rows = rel->tuples * restrictlist_selectivity(root, - rel->restrictinfo, + rel->baserestrictinfo, lfirsti(rel->relids)); Assert(rel->rows >= 0); @@ -470,28 +474,56 @@ set_rel_rows_width(Query *root, RelOptInfo *rel) } /* - * set_joinrel_rows_width - * Set the 'rows' and 'width' estimates for the given join relation. + * set_joinrel_size_estimates + * Set the size estimates for the given join relation. + * + * The rel's targetlist must have been constructed already, and a + * restriction clause list that matches the given component rels must + * be provided. + * + * Since there is more than one way to make a joinrel for more than two + * base relations, the results we get here could depend on which component + * rel pair is provided. In theory we should get the same answers no matter + * which pair is provided; in practice, since the selectivity estimation + * routines don't handle all cases equally well, we might not. But there's + * not much to be done about it. (Would it make sense to repeat the + * calculations for each pair of input rels that's encountered, and somehow + * average the results? Probably way more trouble than it's worth.) + * + * We set the same relnode fields as set_baserel_size_estimates() does. */ void -set_joinrel_rows_width(Query *root, RelOptInfo *rel, - JoinPath *joinpath) +set_joinrel_size_estimates(Query *root, RelOptInfo *rel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + List *restrictlist) { double temp; /* cartesian product */ - temp = joinpath->outerjoinpath->parent->rows * - joinpath->innerjoinpath->parent->rows; + temp = outer_rel->rows * inner_rel->rows; - /* apply join restrictivity */ + /* + * Apply join restrictivity. Note that we are only considering clauses + * that become restriction clauses at this join level; we are not + * double-counting them because they were not considered in estimating + * the sizes of the component rels. + */ temp *= restrictlist_selectivity(root, - joinpath->path.parent->restrictinfo, + restrictlist, 0); Assert(temp >= 0); rel->rows = temp; - set_rel_width(root, rel); + /* + * We could apply set_rel_width() to compute the output tuple width + * from scratch, but at present it's always just the sum of the input + * widths, so why work harder than necessary? If relnode.c is ever + * taught to remove unneeded columns from join targetlists, go back + * to using set_rel_width here. + */ + rel->width = outer_rel->width + inner_rel->width; } /* @@ -516,6 +548,7 @@ set_rel_width(Query *root, RelOptInfo *rel) * * If a field is variable-length, we make a default assumption. Would be * better if VACUUM recorded some stats about the average field width... + * also, we have access to the atttypmod, but fail to use it... */ static int compute_attribute_width(TargetEntry *tlistentry) diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 371dd2b7b5..f8912a1a54 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.50 2000/02/06 03:27:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.51 2000/02/07 04:40:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,126 +31,108 @@ static Path *best_innerjoin(List *join_paths, List *outer_relid); static List *sort_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, List *mergeclause_list); static List *match_unsorted_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, - RelOptInfo *innerrel, List *outerpath_list, - Path *cheapest_inner, Path *best_innerjoin, + RelOptInfo *innerrel, List *restrictlist, + List *outerpath_list, Path *cheapest_inner, + Path *best_innerjoin, List *mergeclause_list); static List *match_unsorted_inner(RelOptInfo *joinrel, RelOptInfo *outerrel, - RelOptInfo *innerrel, List *innerpath_list, + RelOptInfo *innerrel, List *restrictlist, + List *innerpath_list, List *mergeclause_list); static List *hash_inner_and_outer(Query *root, RelOptInfo *joinrel, - RelOptInfo *outerrel, RelOptInfo *innerrel); + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist); static Selectivity estimate_disbursion(Query *root, Var *var); -static List *select_mergejoin_clauses(List *restrictinfo_list); +static List *select_mergejoin_clauses(RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist); + /* - * update_rels_pathlist_for_joins - * Creates all possible ways to process joins for each of the join - * relations in the list 'joinrels.' Each unique path will be included - * in the join relation's 'pathlist' field. + * add_paths_to_joinrel + * Given a join relation and two component rels from which it can be made, + * consider all possible paths that use the two component rels as outer + * and inner rel respectively. Add these paths to the join rel's pathlist + * if they survive comparison with other paths (and remove any existing + * paths that are dominated by these paths). * - * 'joinrels' is the list of relation entries to be joined - * - * Modifies the pathlist field of each joinrel node to contain - * the unique join paths. + * Modifies the pathlist field of the joinrel node to contain the best + * paths found so far. */ void -update_rels_pathlist_for_joins(Query *root, List *joinrels) +add_paths_to_joinrel(Query *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist) { - List *j; + Path *bestinnerjoin; + List *mergeclause_list = NIL; - foreach(j, joinrels) - { - RelOptInfo *joinrel = (RelOptInfo *) lfirst(j); - Relids innerrelids; - Relids outerrelids; - RelOptInfo *innerrel; - RelOptInfo *outerrel; - Path *bestinnerjoin; - List *pathlist; - List *mergeclause_list = NIL; + /* + * Get the best inner join for match_unsorted_outer(). + */ + bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids); - /* - * On entry, joinrel->relids is a list of two sublists of relids, - * namely the outer and inner member relids. Extract these sublists - * and change joinrel->relids to a flattened single list. - * (Use listCopy so as not to damage the member lists...) - */ - outerrelids = lfirst(joinrel->relids); - innerrelids = lsecond(joinrel->relids); + /* + * Find potential mergejoin clauses. + */ + if (enable_mergejoin) + mergeclause_list = select_mergejoin_clauses(joinrel, + outerrel, + innerrel, + restrictlist); - joinrel->relids = nconc(listCopy(outerrelids), - listCopy(innerrelids)); + /* + * 1. Consider mergejoin paths where both relations must be + * explicitly sorted. + */ + add_pathlist(joinrel, sort_inner_and_outer(joinrel, + outerrel, + innerrel, + restrictlist, + mergeclause_list)); - /* - * Get the corresponding RelOptInfos for the outer and inner sides. - * Base relation id is an integer and join relation relid is a - * list of integers. - */ - innerrel = (length(innerrelids) == 1) ? - get_base_rel(root, lfirsti(innerrelids)) : - get_join_rel(root, innerrelids); - outerrel = (length(outerrelids) == 1) ? - get_base_rel(root, lfirsti(outerrelids)) : - get_join_rel(root, outerrelids); + /* + * 2. Consider paths where the outer relation need not be + * explicitly sorted. This includes both nestloops and + * mergejoins where the outer path is already ordered. + */ + add_pathlist(joinrel, match_unsorted_outer(joinrel, + outerrel, + innerrel, + restrictlist, + outerrel->pathlist, + innerrel->cheapestpath, + bestinnerjoin, + mergeclause_list)); - /* - * Get the best inner join for match_unsorted_outer(). - */ - bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids); + /* + * 3. Consider paths where the inner relation need not be + * explicitly sorted. This includes mergejoins only + * (nestloops were already built in match_unsorted_outer). + */ + add_pathlist(joinrel, match_unsorted_inner(joinrel, + outerrel, + innerrel, + restrictlist, + innerrel->pathlist, + mergeclause_list)); - /* - * Find potential mergejoin clauses. - */ - if (enable_mergejoin) - mergeclause_list = select_mergejoin_clauses(joinrel->restrictinfo); - - /* - * 1. Consider mergejoin paths where both relations must be - * explicitly sorted. - */ - pathlist = sort_inner_and_outer(joinrel, outerrel, - innerrel, mergeclause_list); - - /* - * 2. Consider paths where the outer relation need not be - * explicitly sorted. This includes both nestloops and - * mergejoins where the outer path is already ordered. - */ - pathlist = add_pathlist(joinrel, pathlist, - match_unsorted_outer(joinrel, - outerrel, - innerrel, - outerrel->pathlist, - innerrel->cheapestpath, - bestinnerjoin, - mergeclause_list)); - - /* - * 3. Consider paths where the inner relation need not be - * explicitly sorted. This includes mergejoins only - * (nestloops were already built in match_unsorted_outer). - */ - pathlist = add_pathlist(joinrel, pathlist, - match_unsorted_inner(joinrel, outerrel, - innerrel, - innerrel->pathlist, - mergeclause_list)); - - /* - * 4. Consider paths where both outer and inner relations must be - * hashed before being joined. - */ - if (enable_hashjoin) - pathlist = add_pathlist(joinrel, pathlist, - hash_inner_and_outer(root, joinrel, - outerrel, - innerrel)); - - /* Save the completed pathlist in the join rel */ - joinrel->pathlist = pathlist; - } + /* + * 4. Consider paths where both outer and inner relations must be + * hashed before being joined. + */ + if (enable_hashjoin) + add_pathlist(joinrel, hash_inner_and_outer(root, + joinrel, + outerrel, + innerrel, + restrictlist)); } /* @@ -197,8 +179,10 @@ best_innerjoin(List *join_paths, Relids outer_relids) * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation + * 'restrictlist' contains all of the RestrictInfo nodes for restriction + * clauses that apply to this join * 'mergeclause_list' is a list of RestrictInfo nodes for available - * mergejoin clauses between these two relations + * mergejoin clauses in this join * * Returns a list of mergejoin paths. */ @@ -206,6 +190,7 @@ static List * sort_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, List *mergeclause_list) { List *path_list = NIL; @@ -255,12 +240,14 @@ sort_inner_and_outer(RelOptInfo *joinrel, innerkeys = make_pathkeys_for_mergeclauses(curclause_list, innerrel->targetlist); /* Build pathkeys representing output sort order. */ - merge_pathkeys = build_join_pathkeys(outerkeys, joinrel->targetlist, + merge_pathkeys = build_join_pathkeys(outerkeys, + joinrel->targetlist, curclause_list); /* And now we can make the path. */ path_node = create_mergejoin_path(joinrel, outerrel->cheapestpath, innerrel->cheapestpath, + restrictlist, merge_pathkeys, get_actual_clauses(curclause_list), outerkeys, @@ -301,11 +288,13 @@ sort_inner_and_outer(RelOptInfo *joinrel, * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation + * 'restrictlist' contains all of the RestrictInfo nodes for restriction + * clauses that apply to this join * 'outerpath_list' is the list of possible outer paths * 'cheapest_inner' is the cheapest inner path * 'best_innerjoin' is the best inner index path (if any) * 'mergeclause_list' is a list of RestrictInfo nodes for available - * mergejoin clauses between these two relations + * mergejoin clauses in this join * * Returns a list of possible join path nodes. */ @@ -313,6 +302,7 @@ static List * match_unsorted_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, List *outerpath_list, Path *cheapest_inner, Path *best_innerjoin, @@ -358,6 +348,7 @@ match_unsorted_outer(RelOptInfo *joinrel, create_nestloop_path(joinrel, outerpath, nestinnerpath, + restrictlist, merge_pathkeys)); /* Done with this outer path if no chance for a mergejoin */ @@ -425,6 +416,7 @@ match_unsorted_outer(RelOptInfo *joinrel, create_mergejoin_path(joinrel, outerpath, mergeinnerpath, + restrictlist, merge_pathkeys, mergeclauses, NIL, @@ -442,9 +434,11 @@ match_unsorted_outer(RelOptInfo *joinrel, * 'joinrel' is the join result relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation + * 'restrictlist' contains all of the RestrictInfo nodes for restriction + * clauses that apply to this join * 'innerpath_list' is the list of possible inner join paths * 'mergeclause_list' is a list of RestrictInfo nodes for available - * mergejoin clauses between these two relations + * mergejoin clauses in this join * * Returns a list of possible merge paths. */ @@ -452,6 +446,7 @@ static List * match_unsorted_inner(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, List *innerpath_list, List *mergeclause_list) { @@ -510,6 +505,7 @@ match_unsorted_inner(RelOptInfo *joinrel, create_mergejoin_path(joinrel, mergeouterpath, innerpath, + restrictlist, merge_pathkeys, mergeclauses, outersortkeys, @@ -528,6 +524,8 @@ match_unsorted_inner(RelOptInfo *joinrel, * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation + * 'restrictlist' contains all of the RestrictInfo nodes for restriction + * clauses that apply to this join * * Returns a list of hashjoin paths. */ @@ -535,39 +533,62 @@ static List * hash_inner_and_outer(Query *root, RelOptInfo *joinrel, RelOptInfo *outerrel, - RelOptInfo *innerrel) + RelOptInfo *innerrel, + List *restrictlist) { List *hpath_list = NIL; + Relids outerrelids = outerrel->relids; + Relids innerrelids = innerrel->relids; List *i; - foreach(i, joinrel->restrictinfo) + /* + * Scan the join's restrictinfo list to find hashjoinable clauses + * that are usable with this pair of sub-relations. Since we currently + * accept only var-op-var clauses as hashjoinable, we need only check + * the membership of the vars to determine whether a particular clause + * can be used with this pair of sub-relations. This code would need + * to be upgraded if we wanted to allow more-complex expressions in + * hash joins. + */ + foreach(i, restrictlist) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); + Expr *clause; + Var *left, + *right, + *inner; + Selectivity innerdisbursion; + HashPath *hash_path; - /* we consider only clauses previously marked hashjoinable */ - if (restrictinfo->hashjoinoperator) - { - Expr *clause = restrictinfo->clause; - Var *leftop = get_leftop(clause); - Var *rightop = get_rightop(clause); - Var *innerop; - Selectivity innerdisbursion; - HashPath *hash_path; + if (restrictinfo->hashjoinoperator == InvalidOid) + continue; /* not hashjoinable */ - /* find the inner var and estimate its disbursion */ - if (intMember(leftop->varno, innerrel->relids)) - innerop = leftop; - else - innerop = rightop; - innerdisbursion = estimate_disbursion(root, innerop); + clause = restrictinfo->clause; + /* these must be OK, since check_hashjoinable accepted the clause */ + left = get_leftop(clause); + right = get_rightop(clause); - hash_path = create_hashjoin_path(joinrel, - outerrel->cheapestpath, - innerrel->cheapestpath, - lcons(clause, NIL), - innerdisbursion); - hpath_list = lappend(hpath_list, hash_path); - } + /* check if clause is usable with these sub-rels, find inner var */ + if (intMember(left->varno, outerrelids) && + intMember(right->varno, innerrelids)) + inner = right; + else if (intMember(left->varno, innerrelids) && + intMember(right->varno, outerrelids)) + inner = left; + else + continue; /* no good for these input relations */ + + /* estimate disbursion of inner var for costing purposes */ + innerdisbursion = estimate_disbursion(root, inner); + + hash_path = create_hashjoin_path(joinrel, + outerrel->cheapestpath, + innerrel->cheapestpath, + restrictlist, + lcons(clause, NIL), + innerdisbursion); + + hpath_list = lappend(hpath_list, hash_path); } return hpath_list; @@ -600,28 +621,47 @@ estimate_disbursion(Query *root, Var *var) * Select mergejoin clauses that are usable for a particular join. * Returns a list of RestrictInfo nodes for those clauses. * - * Currently, all we need is the restrictinfo list of the joinrel. - * By definition, any mergejoinable clause in that list will work --- - * it must involve only vars in the join, or it wouldn't have been - * in the restrict list, and it must involve vars on both sides of - * the join, or it wouldn't have made it up to this level of join. - * Since we currently allow only simple Vars as the left and right - * sides of mergejoin clauses, that means the mergejoin clauses must - * be usable for this join. If we ever allow more complex expressions - * containing multiple Vars, we would need to check that each side - * of a potential joinclause uses only vars from one side of the join. + * We examine each restrictinfo clause known for the join to see + * if it is mergejoinable and involves vars from the two sub-relations + * currently of interest. + * + * Since we currently allow only plain Vars as the left and right sides + * of mergejoin clauses, this test is relatively simple. This routine + * would need to be upgraded to support more-complex expressions + * as sides of mergejoins. In theory, we could allow arbitrarily complex + * expressions in mergejoins, so long as one side uses only vars from one + * sub-relation and the other side uses only vars from the other. */ static List * -select_mergejoin_clauses(List *restrictinfo_list) +select_mergejoin_clauses(RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist) { List *result_list = NIL; + Relids outerrelids = outerrel->relids; + Relids innerrelids = innerrel->relids; List *i; - foreach(i, restrictinfo_list) + foreach(i, restrictlist) { - RestrictInfo *restrictinfo = lfirst(i); + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); + Expr *clause; + Var *left, + *right; - if (restrictinfo->mergejoinoperator != InvalidOid) + if (restrictinfo->mergejoinoperator == InvalidOid) + continue; /* not mergejoinable */ + + clause = restrictinfo->clause; + /* these must be OK, since check_mergejoinable accepted the clause */ + left = get_leftop(clause); + right = get_rightop(clause); + + if ((intMember(left->varno, outerrelids) && + intMember(right->varno, innerrelids)) || + (intMember(left->varno, innerrelids) && + intMember(right->varno, outerrelids))) result_list = lcons(restrictinfo, result_list); } diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 5fc673e5db..e872b67623 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -8,399 +8,263 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.42 2000/02/06 03:27:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.43 2000/02/07 04:40:59 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" -#ifdef HAVE_LIMITS_H -#include -#ifndef MAXINT -#define MAXINT INT_MAX -#endif -#else -#ifdef HAVE_VALUES_H -#include -#endif -#endif - #include "optimizer/cost.h" #include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/tlist.h" -static RelOptInfo *make_join_rel(RelOptInfo *outer_rel, RelOptInfo *inner_rel); -static List *new_join_tlist(List *tlist, int first_resdomno); -static void build_joinrel_restrict_and_join(RelOptInfo *joinrel, - List *joininfo_list, - Relids join_relids); + +static RelOptInfo *make_join_rel(Query *root, RelOptInfo *rel1, + RelOptInfo *rel2); + /* * make_rels_by_joins - * Find all possible joins for each of the outer join relations in - * 'old_rels'. A rel node is created for each possible join relation, - * and the resulting list of nodes is returned. If at all possible, only - * those relations for which join clauses exist are considered. If none - * of these exist for a given relation, all remaining possibilities are - * considered. + * Consider ways to produce join relations containing exactly 'level' + * base relations. (This is one step of the dynamic-programming method + * embodied in make_one_rel_by_joins.) Join rel nodes for each feasible + * combination of base rels are created and added to the front of the + * query's join_rel_list. Implementation paths are created for each + * such joinrel, too. * - * Returns a list of rel nodes corresponding to the new join relations. + * Returns nothing, but adds entries to root->join_rel_list. */ -List * -make_rels_by_joins(Query *root, List *old_rels) +void +make_rels_by_joins(Query *root, int level) { - List *join_list = NIL; List *r; - foreach(r, old_rels) + /* + * First, consider left-sided and right-sided plans, in which rels of + * exactly level-1 member relations are joined against base relations. + * We prefer to join using join clauses, but if we find a rel of level-1 + * members that has no join clauses, we will generate Cartesian-product + * joins against all base rels not already contained in it. + * + * In the first pass (level == 2), we try to join each base rel to each + * base rel that appears later in base_rel_list. (The mirror-image + * joins are handled automatically by make_join_rel.) In later passes, + * we try to join rels of size level-1 from join_rel_list to each + * base rel in base_rel_list. + * + * We assume that the rels already present in join_rel_list appear in + * decreasing order of level (number of members). This should be true + * since we always add new higher-level rels to the front of the list. + */ + if (level == 2) + r = root->base_rel_list; /* level-1 is base rels */ + else + r = root->join_rel_list; + for (; r != NIL; r = lnext(r)) { RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); - List *joined_rels; + int old_level = length(old_rel->relids); + List *other_rels; - if (!(joined_rels = make_rels_by_clause_joins(root, old_rel, - old_rel->joininfo, - NIL))) + if (old_level != level-1) + break; + + if (level == 2) + other_rels = lnext(r); /* only consider remaining base rels */ + else + other_rels = root->base_rel_list; /* consider all base rels */ + + if (old_rel->joininfo != NIL) + { + /* + * Note that if all available join clauses for this rel require + * more than one other rel, we will fail to make any joins against + * it here. That's OK; it'll be considered by "bushy plan" join + * code in a higher-level pass. + */ + make_rels_by_clause_joins(root, + old_rel, + other_rels); + } + else { /* * Oops, we have a relation that is not joined to any other * relation. Cartesian product time. */ - joined_rels = make_rels_by_clauseless_joins(old_rel, - root->base_rel_list); - joined_rels = nconc(joined_rels, - make_rels_by_clauseless_joins(old_rel, - old_rels)); + make_rels_by_clauseless_joins(root, + old_rel, + other_rels); } - - join_list = nconc(join_list, joined_rels); } - return join_list; -} - -/* - * make_rels_by_clause_joins - * Build joins between an outer relation 'old_rel' and relations - * within old_rel's joininfo nodes - * (i.e., relations that participate in join clauses that 'old_rel' - * also participates in). - * - * 'old_rel' is the relation entry for the outer relation - * 'joininfo_list' is a list of join clauses which 'old_rel' - * participates in - * 'only_relids': if not NIL, only joins against base rels mentioned in - * only_relids are allowable. - * - * Returns a list of new join relations. - */ -List * -make_rels_by_clause_joins(Query *root, RelOptInfo *old_rel, - List *joininfo_list, Relids only_relids) -{ - List *join_list = NIL; - List *i; - - foreach(i, joininfo_list) + /* + * Now, consider "bushy plans" in which relations of k base rels are + * joined to relations of level-k base rels, for 2 <= k <= level-2. + * The previous loop left r pointing to the first rel of level level-2. + * + * We only consider bushy-plan joins for pairs of rels where there is + * a suitable join clause, in order to avoid unreasonable growth of + * planning time. + */ + for (; r != NIL; r = lnext(r)) { - JoinInfo *joininfo = (JoinInfo *) lfirst(i); - Relids unjoined_relids = joininfo->unjoined_relids; - RelOptInfo *joined_rel; + RelOptInfo *old_rel = (RelOptInfo *) lfirst(r); + int old_level = length(old_rel->relids); + List *r2; - if (unjoined_relids == NIL) - continue; /* probably can't happen */ + /* We can quit once past the halfway point (make_join_rel took care + * of making the opposite-direction joins) + */ + if (old_level * 2 < level) + break; - if (length(unjoined_relids) == 1 && - (only_relids == NIL || - /* geqo only wants certain relids to be joined to old_rel */ - intMember(lfirsti(unjoined_relids), only_relids))) + if (old_rel->joininfo == NIL) + continue; /* we ignore clauseless joins here */ + + foreach(r2, lnext(r)) { - RelOptInfo *base_rel = get_base_rel(root, - lfirsti(unjoined_relids)); + RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2); + int new_level = length(new_rel->relids); - /* Left-sided join of outer rel against a single base rel */ - joined_rel = make_join_rel(old_rel, base_rel); - join_list = lappend(join_list, joined_rel); - - /* Consider right-sided plan as well */ - if (length(old_rel->relids) > 1) + if (old_level + new_level > level) + continue; /* scan down to new_rels of right size */ + if (old_level + new_level < level) + break; /* no more new_rels of right size */ + if (nonoverlap_setsi(old_rel->relids, new_rel->relids)) { - joined_rel = make_join_rel(base_rel, old_rel); - join_list = lappend(join_list, joined_rel); - } - } + List *i; - if (only_relids == NIL) /* no bushy plans for geqo */ - { - List *r; - - /* Build "bushy" plans: join old_rel against all pre-existing - * joins of rels it doesn't already contain, if there is a - * suitable join clause. - */ - foreach(r, root->join_rel_list) - { - RelOptInfo *join_rel = lfirst(r); - - Assert(length(join_rel->relids) > 1); - if (is_subseti(unjoined_relids, join_rel->relids) && - nonoverlap_setsi(old_rel->relids, join_rel->relids)) + /* OK, we can build a rel of the right level from this pair of + * rels. Do so if there is at least one usable join clause. + */ + foreach(i, old_rel->joininfo) { - joined_rel = make_join_rel(old_rel, join_rel); - join_list = lappend(join_list, joined_rel); + JoinInfo *joininfo = (JoinInfo *) lfirst(i); + + if (is_subseti(joininfo->unjoined_relids, new_rel->relids)) + { + make_join_rel(root, old_rel, new_rel); + break; + } } } } } +} - return join_list; +/* + * make_rels_by_clause_joins + * Build joins between the given relation 'old_rel' and other relations + * that are mentioned within old_rel's joininfo nodes (i.e., relations + * that participate in join clauses that 'old_rel' also participates in). + * The join rel nodes are added to root->join_rel_list. + * + * 'old_rel' is the relation entry for the relation to be joined + * 'other_rels': other rels to be considered for joining + * + * Currently, this is only used with base rels in other_rels, but it would + * work for joining to joinrels too, if the caller ensures there is no + * membership overlap between old_rel and the rels in other_rels. (We need + * no extra test for overlap for base rels, since the is_subset test can + * only succeed when other_rel is not already part of old_rel.) + * + * Returns NULL if no suitable joins were found, else the last suitable + * joinrel processed. (The only caller who checks the return value is + * geqo_eval.c, and it sets things up so there can be no more than one + * "suitable" joinrel; so we don't bother with returning a list.) + */ +RelOptInfo * +make_rels_by_clause_joins(Query *root, + RelOptInfo *old_rel, + List *other_rels) +{ + RelOptInfo *result = NULL; + List *i, + *j; + + foreach(i, old_rel->joininfo) + { + JoinInfo *joininfo = (JoinInfo *) lfirst(i); + Relids unjoined_relids = joininfo->unjoined_relids; + + foreach(j, other_rels) + { + RelOptInfo *other_rel = (RelOptInfo *) lfirst(j); + + if (is_subseti(unjoined_relids, other_rel->relids)) + result = make_join_rel(root, old_rel, other_rel); + } + } + + return result; } /* * make_rels_by_clauseless_joins - * Given an outer relation 'old_rel' and a list of inner relations - * 'inner_rels', create a join relation between 'old_rel' and each - * member of 'inner_rels' that isn't already included in 'old_rel'. + * Given a relation 'old_rel' and a list of other relations + * 'other_rels', create a join relation between 'old_rel' and each + * member of 'other_rels' that isn't already included in 'old_rel'. * - * Returns a list of new join relations. + * 'old_rel' is the relation entry for the relation to be joined + * 'other_rels': other rels to be considered for joining + * + * Currently, this is only used with base rels in other_rels, but it would + * work for joining to joinrels too. + * + * Returns NULL if no suitable joins were found, else the last suitable + * joinrel processed. (The only caller who checks the return value is + * geqo_eval.c, and it sets things up so there can be no more than one + * "suitable" joinrel; so we don't bother with returning a list.) */ -List * -make_rels_by_clauseless_joins(RelOptInfo *old_rel, List *inner_rels) +RelOptInfo * +make_rels_by_clauseless_joins(Query *root, + RelOptInfo *old_rel, + List *other_rels) { - List *join_list = NIL; + RelOptInfo *result = NULL; List *i; - foreach(i, inner_rels) + foreach(i, other_rels) { - RelOptInfo *inner_rel = (RelOptInfo *) lfirst(i); + RelOptInfo *other_rel = (RelOptInfo *) lfirst(i); - if (nonoverlap_setsi(inner_rel->relids, old_rel->relids)) - { - join_list = lappend(join_list, - make_join_rel(old_rel, inner_rel)); - } + if (nonoverlap_setsi(other_rel->relids, old_rel->relids)) + result = make_join_rel(root, old_rel, other_rel); } - return join_list; + return result; } + /* * make_join_rel - * Creates and initializes a new join relation. - * - * 'outer_rel' and 'inner_rel' are relation nodes for the relations to be - * joined - * - * Returns the new join relation node. + * Find or create a join RelOptInfo that represents the join of + * the two given rels, and add to it path information for paths + * created with the two rels as outer and inner rel. + * (The join rel may already contain paths generated from other + * pairs of rels that add up to the same set of base rels.) + * The join rel is stored in the query's join_rel_list. */ static RelOptInfo * -make_join_rel(RelOptInfo *outer_rel, RelOptInfo *inner_rel) +make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2) { - RelOptInfo *joinrel = makeNode(RelOptInfo); - List *new_outer_tlist; - List *new_inner_tlist; + RelOptInfo *joinrel; + List *restrictlist; /* - * This function uses a trick to pass inner/outer rels as two sublists. - * The list will be flattened out in update_rels_pathlist_for_joins(). + * Find or build the join RelOptInfo, and compute the restrictlist + * that goes with this particular joining. */ - joinrel->relids = lcons(outer_rel->relids, lcons(inner_rel->relids, NIL)); - joinrel->rows = 0; - joinrel->width = 0; - joinrel->targetlist = NIL; - joinrel->pathlist = NIL; - joinrel->cheapestpath = (Path *) NULL; - joinrel->pruneable = true; - joinrel->indexed = false; - joinrel->pages = 0; - joinrel->tuples = 0; - joinrel->restrictinfo = NIL; - joinrel->joininfo = NIL; - joinrel->innerjoin = NIL; + joinrel = get_join_rel(root, rel1, rel2, &restrictlist); /* - * Create a new tlist by removing irrelevant elements from both tlists - * of the outer and inner join relations and then merging the results - * together. + * We consider paths using each rel as both outer and inner. */ - new_outer_tlist = new_join_tlist(outer_rel->targetlist, 1); - new_inner_tlist = new_join_tlist(inner_rel->targetlist, - length(new_outer_tlist) + 1); - joinrel->targetlist = nconc(new_outer_tlist, new_inner_tlist); - - /* - * Construct restrict and join clause lists for the new joinrel. - * - * nconc(listCopy(x), y) is an idiom for making a new list without - * changing either input list. - */ - build_joinrel_restrict_and_join(joinrel, - nconc(listCopy(outer_rel->joininfo), - inner_rel->joininfo), - nconc(listCopy(outer_rel->relids), - inner_rel->relids)); + add_paths_to_joinrel(root, joinrel, rel1, rel2, restrictlist); + add_paths_to_joinrel(root, joinrel, rel2, rel1, restrictlist); return joinrel; } - -/* - * new_join_tlist - * Builds a join relation's target list by keeping those elements that - * will be in the final target list and any other elements that are still - * needed for future joins. For a target list entry to still be needed - * for future joins, its 'joinlist' field must not be empty after removal - * of all relids in 'other_relids'. - * - * XXX this seems to be a dead test --- we don't keep track of joinlists - * for individual targetlist entries anymore, if we ever did... - * - * 'tlist' is the target list of one of the join relations - * 'other_relids' is a list of relids contained within the other - * join relation - * 'first_resdomno' is the resdom number to use for the first created - * target list entry - * - * Returns the new target list. - */ -static List * -new_join_tlist(List *tlist, - int first_resdomno) -{ - int resdomno = first_resdomno - 1; - List *t_list = NIL; - List *i; - List *join_list = NIL; - - foreach(i, tlist) - { - TargetEntry *xtl = lfirst(i); - bool in_final_tlist; - - /* - * XXX surely this is wrong? join_list is never changed? tgl - * 2/99 - */ - in_final_tlist = (join_list == NIL); - if (in_final_tlist) - { - resdomno += 1; - t_list = lappend(t_list, - create_tl_element(get_expr(xtl), resdomno)); - } - } - - return t_list; -} - -/* - * build_joinrel_restrict_and_join - * Builds a join relation's restrictinfo and joininfo lists from the - * joininfo lists of the relations it joins. If a join clause from an - * input relation refers to base rels still not present in the joinrel, - * then it is still a join clause for the joinrel; we put it into an - * appropriate JoinInfo list for the joinrel. Otherwise, the clause is - * now a restrict clause for the joined relation, and we put it into - * the joinrel's restrictinfo list. (It will not need to be considered - * further up the join tree.) - * - * 'joininfo_list' is a list of joininfo nodes from the relations being joined - * 'join_relids' is a list of all base relids in the new join relation - * - * NB: Formerly, we made deep(!) copies of each input RestrictInfo to pass - * up to the join relation. I believe this is no longer necessary, because - * RestrictInfo nodes are no longer context-dependent. Instead, just add - * the original nodes to the lists belonging to the join relation. - */ -static void -build_joinrel_restrict_and_join(RelOptInfo *joinrel, - List *joininfo_list, - Relids join_relids) -{ - List *xjoininfo; - - foreach(xjoininfo, joininfo_list) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(xjoininfo); - Relids new_unjoined_relids; - - new_unjoined_relids = set_differencei(joininfo->unjoined_relids, - join_relids); - if (new_unjoined_relids == NIL) - { - /* - * Clauses in this JoinInfo list become restriction clauses - * for the joinrel, since they refer to no outside rels. - * - * Be careful to eliminate duplicates, since we will see the - * same clauses arriving from both input relations... - */ - joinrel->restrictinfo = - LispUnion(joinrel->restrictinfo, - joininfo->jinfo_restrictinfo); - } - else - { - /* - * These clauses are still join clauses at this level, - * so find or make the appropriate JoinInfo item for the joinrel, - * and add the clauses to it (eliminating duplicates). - */ - JoinInfo *new_joininfo; - - new_joininfo = find_joininfo_node(joinrel, new_unjoined_relids); - new_joininfo->jinfo_restrictinfo = - LispUnion(new_joininfo->jinfo_restrictinfo, - joininfo->jinfo_restrictinfo); - } - } -} - -/* - * get_cheapest_complete_rel - * Find the join relation that includes all the original - * relations, i.e. the final join result. - * - * 'join_rel_list' is a list of join relations. - * - * Returns the list of final join relations. - */ -RelOptInfo * -get_cheapest_complete_rel(List *join_rel_list) -{ - RelOptInfo *final_rel = NULL; - List *xrel; - - /* - * find the relations that have no further joins, i.e., its joininfos - * all have unjoined_relids nil. (Actually, a JoinInfo shouldn't - * ever have nil unjoined_relids, so I think this code is overly - * complex. In fact it seems wrong; shouldn't we be looking for - * rels with complete relids lists??? Seems like a cartesian-product - * case could fail because sub-relations could have nil JoinInfo lists. - * Doesn't actually fail but I don't really understand why...) - */ - foreach(xrel, join_rel_list) - { - RelOptInfo *rel = (RelOptInfo *) lfirst(xrel); - bool final = true; - List *xjoininfo; - - foreach(xjoininfo, rel->joininfo) - { - JoinInfo *joininfo = (JoinInfo *) lfirst(xjoininfo); - - if (joininfo->unjoined_relids != NIL) - { - final = false; - break; - } - } - if (final) - if (final_rel == NULL || - path_is_cheaper(rel->cheapestpath, final_rel->cheapestpath)) - final_rel = rel; - } - - return final_rel; -} diff --git a/src/backend/optimizer/path/prune.c b/src/backend/optimizer/path/prune.c deleted file mode 100644 index f2dc6d90df..0000000000 --- a/src/backend/optimizer/path/prune.c +++ /dev/null @@ -1,109 +0,0 @@ -/*------------------------------------------------------------------------- - * - * prune.c - * Routines to prune redundant paths and relations - * - * Portions Copyright (c) 1996-2000, PostgreSQL, Inc - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/prune.c,v 1.46 2000/02/06 03:27:32 tgl Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - - -#include "optimizer/cost.h" -#include "optimizer/pathnode.h" -#include "optimizer/paths.h" - -static List *merge_rel_with_same_relids(RelOptInfo *rel, List *unmerged_rels); - -/* - * merge_rels_with_same_relids - * Removes any redundant relation entries from a list of rel nodes - * 'rel_list', merging their pathlists into the first non-duplicate - * relation entry for each value of relids. - * - * Returns the resulting list. - */ -void -merge_rels_with_same_relids(List *rel_list) -{ - List *i; - - /* - * rel_list can shorten while running as duplicate relations are - * deleted. Obviously, the first relation can't be a duplicate, - * so the list head pointer won't change. - */ - foreach(i, rel_list) - { - lnext(i) = merge_rel_with_same_relids((RelOptInfo *) lfirst(i), - lnext(i)); - } -} - -/* - * merge_rel_with_same_relids - * Prunes those relations from 'unmerged_rels' that are redundant with - * 'rel'. A relation is redundant if it is built up of the same - * relations as 'rel'. Paths for the redundant relations are merged into - * the pathlist of 'rel'. - * - * Returns a list of non-redundant relations, and sets the pathlist field - * of 'rel' appropriately. - * - */ -static List * -merge_rel_with_same_relids(RelOptInfo *rel, List *unmerged_rels) -{ - List *result = NIL; - List *i; - - foreach(i, unmerged_rels) - { - RelOptInfo *unmerged_rel = (RelOptInfo *) lfirst(i); - - if (sameseti(rel->relids, unmerged_rel->relids)) - { - /* - * These rels are for the same set of base relations, - * so get the best of their pathlists. We assume it's - * ok to reassign a path to the other RelOptInfo without - * doing more than changing its parent pointer (cf. pathnode.c). - */ - rel->pathlist = add_pathlist(rel, - rel->pathlist, - unmerged_rel->pathlist); - } - else - result = lappend(result, unmerged_rel); - } - return result; -} - -/* - * rels_set_cheapest - * For each relation entry in 'rel_list' (which should contain only join - * relations), set pointers to the cheapest path and compute rel size. - */ -void -rels_set_cheapest(Query *root, List *rel_list) -{ - List *x; - - foreach(x, rel_list) - { - RelOptInfo *rel = (RelOptInfo *) lfirst(x); - JoinPath *cheapest; - - cheapest = (JoinPath *) set_cheapest(rel, rel->pathlist); - if (IsA_JoinPath(cheapest)) - set_joinrel_rows_width(root, rel, cheapest); - else - elog(ERROR, "rels_set_cheapest: non JoinPath found"); - } -} diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c index 35bcbc7e56..ab0427ef32 100644 --- a/src/backend/optimizer/path/tidpath.c +++ b/src/backend/optimizer/path/tidpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.3 2000/01/26 05:56:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.4 2000/02/07 04:40:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,7 +37,7 @@ #include "utils/lsyscache.h" static List *create_tidscan_joinpaths(RelOptInfo *); -static List *TidqualFromRestrictinfo(List *relids, List * restrictinfo); +static List *TidqualFromRestrictinfo(List *relids, List *restrictinfo); static bool isEvaluable(int varno, Node *node); static Node *TidequalClause(int varno, Expr *node); static List *TidqualFromExpr(int varno, Expr *expr); @@ -209,16 +209,17 @@ List *TidqualFromExpr(int varno, Expr *expr) return rlst; } -static -List *TidqualFromRestrictinfo(List *relids, List * restrictinfo) +static List * +TidqualFromRestrictinfo(List *relids, List *restrictinfo) { List *lst, *rlst = NIL; int varno; Node *node; Expr *expr; - if (length(relids)>1) return NIL; - varno = (int)lfirst(relids); + if (length(relids) != 1) + return NIL; + varno = lfirsti(relids); foreach (lst, restrictinfo) { node = lfirst(lst); @@ -226,9 +227,7 @@ List *TidqualFromRestrictinfo(List *relids, List * restrictinfo) expr = ((RestrictInfo *)node)->clause; rlst = TidqualFromExpr(varno, expr); if (rlst) - { break; - } } return rlst; } @@ -281,8 +280,9 @@ List * create_tidscan_paths(Query *root, RelOptInfo *rel) { List *rlst = NIL; - TidPath *pathnode = (TidPath *)0; - List *tideval = TidqualFromRestrictinfo(rel->relids, rel->restrictinfo); + TidPath *pathnode = (TidPath *) NULL; + List *tideval = TidqualFromRestrictinfo(rel->relids, + rel->baserestrictinfo); if (tideval) pathnode = create_tidscan_path(rel, tideval); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 69c0a3f65b..97a021a2dd 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.83 2000/02/03 06:12:18 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.84 2000/02/07 04:41:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -147,7 +147,7 @@ create_scan_node(Query *root, Path *best_path, List *tlist) * Extract the relevant restriction clauses from the parent relation; * the executor must apply all these restrictions during the scan. */ - scan_clauses = get_actual_clauses(best_path->parent->restrictinfo); + scan_clauses = get_actual_clauses(best_path->parent->baserestrictinfo); switch (best_path->pathtype) { @@ -203,7 +203,7 @@ create_join_node(Query *root, JoinPath *best_path, List *tlist) inner_node = create_plan(root, best_path->innerjoinpath); inner_tlist = inner_node->targetlist; - clauses = get_actual_clauses(best_path->path.parent->restrictinfo); + clauses = get_actual_clauses(best_path->joinrestrictinfo); switch (best_path->path.pathtype) { diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 7ae5f3caf9..b94cc3e4b4 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.43 2000/01/26 05:56:37 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.44 2000/02/07 04:41:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -179,7 +179,8 @@ add_restrict_and_join_to_rel(Query *root, Node *clause) */ RelOptInfo *rel = get_base_rel(root, lfirsti(relids)); - rel->restrictinfo = lcons(restrictinfo, rel->restrictinfo); + rel->baserestrictinfo = lcons(restrictinfo, + rel->baserestrictinfo); } else { diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 4c7225b0d9..a414a910fe 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.50 2000/01/26 05:56:37 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.51 2000/02/07 04:41:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -219,7 +219,7 @@ subplanner(Query *root, add_restrict_and_join_to_rels(root, qual); add_missing_rels_to_query(root); - final_rel = make_one_rel(root, root->base_rel_list); + final_rel = make_one_rel(root); if (! final_rel) { diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 98737a4802..7c3c20b855 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.58 2000/01/26 05:56:40 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.59 2000/02/07 04:41:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -76,101 +76,104 @@ set_cheapest(RelOptInfo *parent_rel, List *pathlist) /* * add_pathlist - * Construct an output path list by adding to old_paths each path in - * new_paths that is worth considering --- that is, it has either a - * better sort order (better pathkeys) or cheaper cost than any of the - * existing old paths. - * - * Unless parent_rel->pruneable is false, we also remove from the output - * pathlist any old paths that are dominated by added path(s) --- that is, - * some new path is both cheaper and at least as well ordered. - * - * Note: the list old_paths is destructively modified, and in fact is - * turned into the output list. - * - * 'parent_rel' is the relation entry to which these paths correspond. - * 'old_paths' is the list of previously accepted paths for parent_rel. - * 'new_paths' is a list of potential new paths. - * - * Returns the updated list of interesting pathnodes. + * Consider each path given in new_paths, and add it to the parent rel's + * pathlist if it seems worthy. */ -List * -add_pathlist(RelOptInfo *parent_rel, List *old_paths, List *new_paths) +void +add_pathlist(RelOptInfo *parent_rel, List *new_paths) { List *p1; foreach(p1, new_paths) { Path *new_path = (Path *) lfirst(p1); - bool accept_new = true; /* unless we find a superior old path */ - List *p2_prev = NIL; - List *p2; - /* - * Loop to check proposed new path against old paths. Note it is - * possible for more than one old path to be tossed out because - * new_path dominates it. - */ - foreach(p2, old_paths) + add_path(parent_rel, new_path); + } +} + +/* + * add_path + * Consider a potential implementation path for the specified parent rel, + * and add it to the rel's pathlist if it is worthy of consideration. + * A path is worthy if it has either a better sort order (better pathkeys) + * or cheaper cost than any of the existing old paths. + * + * Unless parent_rel->pruneable is false, we also remove from the rel's + * pathlist any old paths that are dominated by new_path --- that is, + * new_path is both cheaper and at least as well ordered. + * + * 'parent_rel' is the relation entry to which the path corresponds. + * 'new_path' is a potential path for parent_rel. + * + * Returns nothing, but modifies parent_rel->pathlist. + */ +void +add_path(RelOptInfo *parent_rel, Path *new_path) +{ + bool accept_new = true; /* unless we find a superior old path */ + List *p1_prev = NIL; + List *p1; + + /* + * Loop to check proposed new path against old paths. Note it is + * possible for more than one old path to be tossed out because + * new_path dominates it. + */ + foreach(p1, parent_rel->pathlist) + { + Path *old_path = (Path *) lfirst(p1); + bool remove_old = false; /* unless new proves superior */ + + switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys)) { - Path *old_path = (Path *) lfirst(p2); - bool remove_old = false; /* unless new proves superior */ - - switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys)) - { - case PATHKEYS_EQUAL: - if (new_path->path_cost < old_path->path_cost) - remove_old = true; /* new dominates old */ - else - accept_new = false; /* old equals or dominates new */ - break; - case PATHKEYS_BETTER1: - if (new_path->path_cost <= old_path->path_cost) - remove_old = true; /* new dominates old */ - break; - case PATHKEYS_BETTER2: - if (new_path->path_cost >= old_path->path_cost) - accept_new = false; /* old dominates new */ - break; - case PATHKEYS_DIFFERENT: - /* keep both paths, since they have different ordering */ - break; - } - - /* - * Remove current element from old_list if dominated by new, - * unless xfunc told us not to remove any paths. - */ - if (remove_old && parent_rel->pruneable) - { - if (p2_prev) - lnext(p2_prev) = lnext(p2); + case PATHKEYS_EQUAL: + if (new_path->path_cost < old_path->path_cost) + remove_old = true; /* new dominates old */ else - old_paths = lnext(p2); - } - else - p2_prev = p2; - - /* - * If we found an old path that dominates new_path, we can quit - * scanning old_paths; we will not add new_path, and we assume - * new_path cannot dominate any other elements of old_paths. - */ - if (! accept_new) + accept_new = false; /* old equals or dominates new */ + break; + case PATHKEYS_BETTER1: + if (new_path->path_cost <= old_path->path_cost) + remove_old = true; /* new dominates old */ + break; + case PATHKEYS_BETTER2: + if (new_path->path_cost >= old_path->path_cost) + accept_new = false; /* old dominates new */ + break; + case PATHKEYS_DIFFERENT: + /* keep both paths, since they have different ordering */ break; } - if (accept_new) + /* + * Remove current element from pathlist if dominated by new, + * unless xfunc told us not to remove any paths. + */ + if (remove_old && parent_rel->pruneable) { - /* Accept the path. Note that it will now be eligible to be - * compared against the additional elements of new_paths... - */ - new_path->parent = parent_rel; /* not redundant, see prune.c */ - old_paths = lcons(new_path, old_paths); + if (p1_prev) + lnext(p1_prev) = lnext(p1); + else + parent_rel->pathlist = lnext(p1); } + else + p1_prev = p1; + + /* + * If we found an old path that dominates new_path, we can quit + * scanning the pathlist; we will not add new_path, and we assume + * new_path cannot dominate any other elements of the pathlist. + */ + if (! accept_new) + break; } - return old_paths; + if (accept_new) + { + /* Accept the path */ + parent_rel->pathlist = lcons(new_path, parent_rel->pathlist); + } } @@ -271,6 +274,7 @@ create_tidscan_path(RelOptInfo *rel, List *tideval) * 'joinrel' is the join relation. * 'outer_path' is the outer path * 'inner_path' is the inner path + * 'restrict_clauses' are the RestrictInfo nodes to apply at the join * 'pathkeys' are the path keys of the new join path * * Returns the resulting path node. @@ -280,6 +284,7 @@ NestPath * create_nestloop_path(RelOptInfo *joinrel, Path *outer_path, Path *inner_path, + List *restrict_clauses, List *pathkeys) { NestPath *pathnode = makeNode(NestPath); @@ -288,6 +293,7 @@ create_nestloop_path(RelOptInfo *joinrel, pathnode->path.parent = joinrel; pathnode->outerjoinpath = outer_path; pathnode->innerjoinpath = inner_path; + pathnode->joinrestrictinfo = restrict_clauses; pathnode->path.pathkeys = pathkeys; pathnode->path.path_cost = cost_nestloop(outer_path, @@ -305,6 +311,7 @@ create_nestloop_path(RelOptInfo *joinrel, * 'joinrel' is the join relation * 'outer_path' is the outer path * 'inner_path' is the inner path + * 'restrict_clauses' are the RestrictInfo nodes to apply at the join * 'pathkeys' are the path keys of the new join path * 'mergeclauses' are the applicable join/restriction clauses * 'outersortkeys' are the sort varkeys for the outer relation @@ -315,6 +322,7 @@ MergePath * create_mergejoin_path(RelOptInfo *joinrel, Path *outer_path, Path *inner_path, + List *restrict_clauses, List *pathkeys, List *mergeclauses, List *outersortkeys, @@ -337,6 +345,7 @@ create_mergejoin_path(RelOptInfo *joinrel, pathnode->jpath.path.parent = joinrel; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; + pathnode->jpath.joinrestrictinfo = restrict_clauses; pathnode->jpath.path.pathkeys = pathkeys; pathnode->path_mergeclauses = mergeclauses; pathnode->outersortkeys = outersortkeys; @@ -356,6 +365,7 @@ create_mergejoin_path(RelOptInfo *joinrel, * 'joinrel' is the join relation * 'outer_path' is the cheapest outer path * 'inner_path' is the cheapest inner path + * 'restrict_clauses' are the RestrictInfo nodes to apply at the join * 'hashclauses' is a list of the hash join clause (always a 1-element list) * 'innerdisbursion' is an estimate of the disbursion of the inner hash key * @@ -364,6 +374,7 @@ HashPath * create_hashjoin_path(RelOptInfo *joinrel, Path *outer_path, Path *inner_path, + List *restrict_clauses, List *hashclauses, Selectivity innerdisbursion) { @@ -373,6 +384,7 @@ create_hashjoin_path(RelOptInfo *joinrel, pathnode->jpath.path.parent = joinrel; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; + pathnode->jpath.joinrestrictinfo = restrict_clauses; /* A hashjoin never has pathkeys, since its ordering is unpredictable */ pathnode->jpath.path.pathkeys = NIL; pathnode->path_hashclauses = hashclauses; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 23ee8ba811..d22daa0f63 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -1,109 +1,408 @@ /*------------------------------------------------------------------------- * * relnode.c - * Relation manipulation routines + * Relation-node lookup/construction routines * * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.22 2000/02/06 03:27:33 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/util/relnode.c,v 1.23 2000/02/07 04:41:02 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "optimizer/cost.h" #include "optimizer/internal.h" +#include "optimizer/joininfo.h" #include "optimizer/pathnode.h" #include "optimizer/plancat.h" +#include "optimizer/tlist.h" +static List *new_join_tlist(List *tlist, int first_resdomno); +static List *build_joinrel_restrictlist(RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); +static void build_joinrel_joinlist(RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel); +static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel, + List *joininfo_list); +static void subbuild_joinrel_joinlist(RelOptInfo *joinrel, + List *joininfo_list); + /* * get_base_rel - * Returns relation entry corresponding to 'relid', creating a new one if - * necessary. This is for base relations. - * + * Returns relation entry corresponding to 'relid', creating a new one + * if necessary. This is for base relations. */ RelOptInfo * get_base_rel(Query *root, int relid) { - Relids relids = lconsi(relid, NIL); + List *baserels; RelOptInfo *rel; - rel = rel_member(relids, root->base_rel_list); - if (rel == NULL) + foreach(baserels, root->base_rel_list) { - rel = makeNode(RelOptInfo); - rel->relids = relids; - rel->rows = 0; - rel->width = 0; - rel->targetlist = NIL; - rel->pathlist = NIL; - rel->cheapestpath = (Path *) NULL; - rel->pruneable = true; - rel->indexed = false; - rel->pages = 0; - rel->tuples = 0; - rel->restrictinfo = NIL; - rel->joininfo = NIL; - rel->innerjoin = NIL; + rel = (RelOptInfo *) lfirst(baserels); - root->base_rel_list = lcons(rel, root->base_rel_list); - - if (relid < 0) - { - /* - * If the relation is a materialized relation, assume - * constants for sizes. - */ - rel->pages = _NONAME_RELATION_PAGES_; - rel->tuples = _NONAME_RELATION_TUPLES_; - } - else - { - /* - * Otherwise, retrieve relation statistics from the - * system catalogs. - */ - relation_info(root, relid, - &rel->indexed, &rel->pages, &rel->tuples); - } + /* We know length(rel->relids) == 1 for all members of base_rel_list */ + if (lfirsti(rel->relids) == relid) + return rel; } + + /* No existing RelOptInfo for this base rel, so make a new one */ + rel = makeNode(RelOptInfo); + rel->relids = lconsi(relid, NIL); + rel->rows = 0; + rel->width = 0; + rel->targetlist = NIL; + rel->pathlist = NIL; + rel->cheapestpath = (Path *) NULL; + rel->pruneable = true; + rel->indexed = false; + rel->pages = 0; + rel->tuples = 0; + rel->baserestrictinfo = NIL; + rel->joininfo = NIL; + rel->innerjoin = NIL; + + if (relid < 0) + { + /* + * If the relation is a materialized relation, assume + * constants for sizes. + */ + rel->pages = _NONAME_RELATION_PAGES_; + rel->tuples = _NONAME_RELATION_TUPLES_; + } + else + { + /* + * Otherwise, retrieve relation statistics from the + * system catalogs. + */ + relation_info(root, relid, + &rel->indexed, &rel->pages, &rel->tuples); + } + + root->base_rel_list = lcons(rel, root->base_rel_list); + return rel; } /* - * get_join_rel - * Returns relation entry corresponding to 'relid' (a list of relids), - * or NULL. - */ -RelOptInfo * -get_join_rel(Query *root, Relids relid) -{ - return rel_member(relid, root->join_rel_list); -} - -/* - * rel_member - * Determines whether a relation of id 'relid' is contained within a list - * 'rels'. - * - * Returns the corresponding entry in 'rels' if it is there. + * find_join_rel + * Returns relation entry corresponding to 'relids' (a list of RT indexes), + * or NULL if none exists. This is for join relations. * + * Note: there is probably no good reason for this to be called from + * anywhere except get_join_rel, but keep it as a separate routine + * just in case. */ -RelOptInfo * -rel_member(Relids relids, List *rels) +static RelOptInfo * +find_join_rel(Query *root, Relids relids) { - List *temp; + List *joinrels; - foreach(temp, rels) + foreach(joinrels, root->join_rel_list) { - RelOptInfo *rel = (RelOptInfo *) lfirst(temp); + RelOptInfo *rel = (RelOptInfo *) lfirst(joinrels); if (sameseti(rel->relids, relids)) return rel; } + return NULL; } + +/* + * get_join_rel + * Returns relation entry corresponding to the union of two given rels, + * creating a new relation entry if none already exists. + * + * 'outer_rel' and 'inner_rel' are relation nodes for the relations to be + * joined + * 'restrictlist_ptr': result variable. If not NULL, *restrictlist_ptr + * receives the list of RestrictInfo nodes that apply to this + * particular pair of joinable relations. + * + * restrictlist_ptr makes the routine's API a little grotty, but it saves + * duplicated calculation of the restrictlist... + */ +RelOptInfo * +get_join_rel(Query *root, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + List **restrictlist_ptr) +{ + List *joinrelids; + RelOptInfo *joinrel; + List *restrictlist; + List *new_outer_tlist; + List *new_inner_tlist; + + /* We should never try to join two overlapping sets of rels. */ + Assert(nonoverlap_setsi(outer_rel->relids, inner_rel->relids)); + + /* + * See if we already have a joinrel for this set of base rels. + * + * nconc(listCopy(x), y) is an idiom for making a new list without + * changing either input list. + */ + joinrelids = nconc(listCopy(outer_rel->relids), inner_rel->relids); + joinrel = find_join_rel(root, joinrelids); + + if (joinrel) + { + /* + * Yes, so we only need to figure the restrictlist for this + * particular pair of component relations. + */ + if (restrictlist_ptr) + *restrictlist_ptr = build_joinrel_restrictlist(joinrel, + outer_rel, + inner_rel); + return joinrel; + } + + /* + * Nope, so make one. + */ + joinrel = makeNode(RelOptInfo); + joinrel->relids = joinrelids; + joinrel->rows = 0; + joinrel->width = 0; + joinrel->targetlist = NIL; + joinrel->pathlist = NIL; + joinrel->cheapestpath = (Path *) NULL; + joinrel->pruneable = true; + joinrel->indexed = false; + joinrel->pages = 0; + joinrel->tuples = 0; + joinrel->baserestrictinfo = NIL; + joinrel->joininfo = NIL; + joinrel->innerjoin = NIL; + + /* + * Create a new tlist by removing irrelevant elements from both tlists + * of the outer and inner join relations and then merging the results + * together. + * + * NOTE: the tlist order for a join rel will depend on which pair + * of outer and inner rels we first try to build it from. But the + * contents should be the same regardless. + * + * XXX someday: consider pruning vars from the join's targetlist + * if they are needed only to evaluate restriction clauses of this + * join, and will never be accessed at higher levels of the plantree. + */ + new_outer_tlist = new_join_tlist(outer_rel->targetlist, 1); + new_inner_tlist = new_join_tlist(inner_rel->targetlist, + length(new_outer_tlist) + 1); + joinrel->targetlist = nconc(new_outer_tlist, new_inner_tlist); + + /* + * Construct restrict and join clause lists for the new joinrel. + * (The caller might or might not need the restrictlist, but + * I need it anyway for set_joinrel_size_estimates().) + */ + restrictlist = build_joinrel_restrictlist(joinrel, outer_rel, inner_rel); + if (restrictlist_ptr) + *restrictlist_ptr = restrictlist; + build_joinrel_joinlist(joinrel, outer_rel, inner_rel); + + /* + * Set estimates of the joinrel's size. + */ + set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel, + restrictlist); + + /* + * Add the joinrel to the front of the query's joinrel list. + * (allpaths.c depends on this ordering!) + */ + root->join_rel_list = lcons(joinrel, root->join_rel_list); + + return joinrel; +} + +/* + * new_join_tlist + * Builds a join relation's target list by keeping those elements that + * will be in the final target list and any other elements that are still + * needed for future joins. For a target list entry to still be needed + * for future joins, its 'joinlist' field must not be empty after removal + * of all relids in 'other_relids'. + * + * XXX the above comment refers to code that is long dead and gone; + * we don't keep track of joinlists for individual targetlist entries + * anymore. For now, all vars present in either input tlist will be + * emitted in the join's tlist. + * + * 'tlist' is the target list of one of the join relations + * 'first_resdomno' is the resdom number to use for the first created + * target list entry + * + * Returns the new target list. + */ +static List * +new_join_tlist(List *tlist, + int first_resdomno) +{ + int resdomno = first_resdomno - 1; + List *t_list = NIL; + List *i; + + foreach(i, tlist) + { + TargetEntry *xtl = lfirst(i); + + resdomno += 1; + t_list = lappend(t_list, + create_tl_element(get_expr(xtl), resdomno)); + } + + return t_list; +} + +/* + * build_joinrel_restrictlist + * build_joinrel_joinlist + * These routines build lists of restriction and join clauses for a + * join relation from the joininfo lists of the relations it joins. + * + * These routines are separate because the restriction list must be + * built afresh for each pair of input sub-relations we consider, whereas + * the join lists need only be computed once for any join RelOptInfo. + * The join lists are fully determined by the set of rels making up the + * joinrel, so we should get the same results (up to ordering) from any + * candidate pair of sub-relations. But the restriction list is whatever + * is not handled in the sub-relations, so it depends on which + * sub-relations are considered. + * + * If a join clause from an input relation refers to base rels still not + * present in the joinrel, then it is still a join clause for the joinrel; + * we put it into an appropriate JoinInfo list for the joinrel. Otherwise, + * the clause is now a restrict clause for the joined relation, and we + * return it to the caller of build_joinrel_restrictlist() to be stored in + * join paths made from this pair of sub-relations. (It will not need to + * be considered further up the join tree.) + * + * 'joinrel' is a join relation node + * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined + * to form joinrel. + * + * build_joinrel_restrictlist() returns a list of relevant restrictinfos, + * whereas build_joinrel_joinlist() stores its results in the joinrel's + * joininfo lists. One or the other must accept each given clause! + * + * NB: Formerly, we made deep(!) copies of each input RestrictInfo to pass + * up to the join relation. I believe this is no longer necessary, because + * RestrictInfo nodes are no longer context-dependent. Instead, just include + * the original nodes in the lists made for the join relation. + */ +static List * +build_joinrel_restrictlist(RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + /* + * We must eliminate duplicates, since we will see the + * same clauses arriving from both input relations... + */ + return LispUnion(subbuild_joinrel_restrictlist(joinrel, + outer_rel->joininfo), + subbuild_joinrel_restrictlist(joinrel, + inner_rel->joininfo)); +} + +static void +build_joinrel_joinlist(RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + subbuild_joinrel_joinlist(joinrel, outer_rel->joininfo); + subbuild_joinrel_joinlist(joinrel, inner_rel->joininfo); +} + +static List * +subbuild_joinrel_restrictlist(RelOptInfo *joinrel, + List *joininfo_list) +{ + List *restrictlist = NIL; + List *xjoininfo; + + foreach(xjoininfo, joininfo_list) + { + JoinInfo *joininfo = (JoinInfo *) lfirst(xjoininfo); + Relids new_unjoined_relids; + + new_unjoined_relids = set_differencei(joininfo->unjoined_relids, + joinrel->relids); + if (new_unjoined_relids == NIL) + { + /* + * Clauses in this JoinInfo list become restriction clauses + * for the joinrel, since they refer to no outside rels. + * + * We must copy the list to avoid disturbing the input relation, + * but we can use a shallow copy. + */ + restrictlist = nconc(restrictlist, + listCopy(joininfo->jinfo_restrictinfo)); + } + else + { + /* + * These clauses are still join clauses at this level, + * so we ignore them in this routine. + */ + } + } + + return restrictlist; +} + +static void +subbuild_joinrel_joinlist(RelOptInfo *joinrel, + List *joininfo_list) +{ + List *xjoininfo; + + foreach(xjoininfo, joininfo_list) + { + JoinInfo *joininfo = (JoinInfo *) lfirst(xjoininfo); + Relids new_unjoined_relids; + + new_unjoined_relids = set_differencei(joininfo->unjoined_relids, + joinrel->relids); + if (new_unjoined_relids == NIL) + { + /* + * Clauses in this JoinInfo list become restriction clauses + * for the joinrel, since they refer to no outside rels. + * So we can ignore them in this routine. + */ + } + else + { + /* + * These clauses are still join clauses at this level, + * so find or make the appropriate JoinInfo item for the joinrel, + * and add the clauses to it (eliminating duplicates). + */ + JoinInfo *new_joininfo; + + new_joininfo = find_joininfo_node(joinrel, new_unjoined_relids); + new_joininfo->jinfo_restrictinfo = + LispUnion(new_joininfo->jinfo_restrictinfo, + joininfo->jinfo_restrictinfo); + } + } +} diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 99254740e7..529aa5cea7 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: relation.h,v 1.42 2000/01/26 05:58:17 momjian Exp $ + * $Id: relation.h,v 1.43 2000/02/07 04:41:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -54,13 +54,26 @@ typedef List *Relids; * * The presence of the remaining fields depends on the restrictions * and joins that the relation participates in: * - * restrictinfo - List of RestrictInfo nodes, containing info about each - * qualification clause in which this relation participates + * baserestrictinfo - List of RestrictInfo nodes, containing info about + * each qualification clause in which this relation + * participates (only used for base rels) * joininfo - List of JoinInfo nodes, containing info about each join * clause in which this relation participates * innerjoin - List of Path nodes that represent indices that may be used * as inner paths of nestloop joins. This field is non-null * only for base rels, since join rels have no indices. + * + * Note: Keeping a restrictinfo list in the RelOptInfo is useful only for + * base rels, because for a join rel the set of clauses that are treated as + * restrict clauses varies depending on which sub-relations we choose to join. + * (For example, in a 3-base-rel join, a clause relating rels 1 and 2 must be + * treated as a restrictclause if we join {1} and {2 3} to make {1 2 3}; but + * if we join {1 2} and {3} then that clause will be a restrictclause in {1 2} + * and should not be processed again at the level of {1 2 3}.) Therefore, + * the restrictinfo list in the join case appears in individual JoinPaths + * (field joinrestrictinfo), not in the parent relation. But it's OK for + * the RelOptInfo to store the joininfo lists, because those are the same + * for a given rel no matter how we form it. */ typedef struct RelOptInfo @@ -86,7 +99,7 @@ typedef struct RelOptInfo double tuples; /* used by various scans and joins: */ - List *restrictinfo; /* RestrictInfo structures */ + List *baserestrictinfo; /* RestrictInfo structures (if base rel) */ List *joininfo; /* JoinInfo structures */ List *innerjoin; /* potential indexscans for nestloop joins */ /* innerjoin indexscans are not in the main pathlist because they are @@ -235,6 +248,10 @@ typedef struct JoinPath Path *outerjoinpath; /* path for the outer side of the join */ Path *innerjoinpath; /* path for the inner side of the join */ + List *joinrestrictinfo; /* RestrictInfos to apply to join */ + /* See the notes for RelOptInfo to understand why joinrestrictinfo is + * needed in JoinPath, and can't be merged into the parent RelOptInfo. + */ } JoinPath; /* @@ -289,18 +306,29 @@ typedef struct HashPath * without having to evaluate the rest. The RestrictInfo node itself stores * data used by the optimizer while choosing the best query plan. * - * A restriction clause will appear in the restrictinfo list of a RelOptInfo - * that describes exactly the set of base relations referenced by the - * restriction clause. It is not possible to apply the clause at any lower - * nesting level, and there is little point in delaying its evaluation to - * higher nesting levels. (The "predicate migration" code was once intended - * to push restriction clauses up and down the plan tree, but it's dead code - * and is unlikely to be resurrected in the foreseeable future.) + * If a restriction clause references a single base relation, it will appear + * in the baserestrictinfo list of the RelOptInfo for that base rel. * - * If a restriction clause references more than one base rel, it will also - * appear in the JoinInfo list of every RelOptInfo that describes a strict + * If a restriction clause references more than one base rel, it will + * appear in the JoinInfo lists of every RelOptInfo that describes a strict * subset of the base rels mentioned in the clause. The JoinInfo lists are * used to drive join tree building by selecting plausible join candidates. + * The clause cannot actually be applied until we have built a join rel + * containing all the base rels it references, however. + * + * When we construct a join rel that describes exactly the set of base rels + * referenced in a multi-relation restriction clause, we place that clause + * into the joinrestrictinfo lists of paths for the join rel. It will be + * applied at that join level, and will not propagate any further up the + * join tree. (Note: the "predicate migration" code was once intended to + * push restriction clauses up and down the plan tree based on evaluation + * costs, but it's dead code and is unlikely to be resurrected in the + * foreseeable future.) + * + * Note that in the presence of more than two rels, a multi-rel restriction + * might reach different heights in the join tree depending on the join + * sequence we use. So, these clauses cannot be associated directly with + * the join RelOptInfo, but must be kept track of on a per-join-path basis. * * In general, the referenced clause might be arbitrarily complex. The * kinds of clauses we can handle as indexscan quals, mergejoin clauses, diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 0a6e78d676..79153c01d8 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: cost.h,v 1.28 2000/01/26 05:58:20 momjian Exp $ + * $Id: cost.h,v 1.29 2000/02/07 04:41:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -55,9 +55,11 @@ extern Cost cost_mergejoin(Path *outer_path, Path *inner_path, List *outersortkeys, List *innersortkeys); extern Cost cost_hashjoin(Path *outer_path, Path *inner_path, Selectivity innerdisbursion); -extern void set_rel_rows_width(Query *root, RelOptInfo *rel); -extern void set_joinrel_rows_width(Query *root, RelOptInfo *rel, - JoinPath *joinpath); +extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel); +extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + List *restrictlist); /* * prototypes for clausesel.c diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index a83e743781..eefb2553b3 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: pathnode.h,v 1.24 2000/01/26 05:58:20 momjian Exp $ + * $Id: pathnode.h,v 1.25 2000/02/07 04:41:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,8 +21,8 @@ */ extern bool path_is_cheaper(Path *path1, Path *path2); extern Path *set_cheapest(RelOptInfo *parent_rel, List *pathlist); -extern List *add_pathlist(RelOptInfo *parent_rel, List *old_paths, - List *new_paths); +extern void add_path(RelOptInfo *parent_rel, Path *new_path); +extern void add_pathlist(RelOptInfo *parent_rel, List *new_paths); extern Path *create_seqscan_path(RelOptInfo *rel); extern IndexPath *create_index_path(Query *root, RelOptInfo *rel, @@ -31,25 +31,34 @@ extern IndexPath *create_index_path(Query *root, RelOptInfo *rel, extern TidPath *create_tidscan_path(RelOptInfo *rel, List *tideval); extern NestPath *create_nestloop_path(RelOptInfo *joinrel, - Path *outer_path, Path *inner_path, + Path *outer_path, + Path *inner_path, + List *restrict_clauses, List *pathkeys); -extern MergePath *create_mergejoin_path(RelOptInfo *joinrel, Path *outer_path, - Path *inner_path, List *pathkeys, +extern MergePath *create_mergejoin_path(RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *restrict_clauses, + List *pathkeys, List *mergeclauses, List *outersortkeys, List *innersortkeys); -extern HashPath *create_hashjoin_path(RelOptInfo *joinrel, Path *outer_path, - Path *inner_path, List *hashclauses, +extern HashPath *create_hashjoin_path(RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *restrict_clauses, + List *hashclauses, Selectivity innerdisbursion); /* - * prototypes for rel.c + * prototypes for relnode.c */ -extern RelOptInfo *rel_member(Relids relid, List *rels); extern RelOptInfo *get_base_rel(Query *root, int relid); -extern RelOptInfo *get_join_rel(Query *root, Relids relid); +extern RelOptInfo *get_join_rel(Query *root, RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + List **restrictlist_ptr); /* * prototypes for indexnode.h diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index bcecd4c923..256aac90d7 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: paths.h,v 1.41 2000/02/06 03:27:35 tgl Exp $ + * $Id: paths.h,v 1.42 2000/02/07 04:41:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,15 +27,15 @@ extern bool enable_geqo; extern int geqo_rels; -extern RelOptInfo *make_one_rel(Query *root, List *rels); +extern RelOptInfo *make_one_rel(Query *root); /* * indxpath.c * routines to generate index paths */ extern List *create_index_paths(Query *root, RelOptInfo *rel, List *indices, - List *restrictinfo_list, - List *joininfo_list); + List *restrictinfo_list, + List *joininfo_list); extern Oid indexable_operator(Expr *clause, Oid opclass, Oid relam, bool indexkey_on_left); extern List *extract_or_indexqual_conditions(RelOptInfo *rel, @@ -60,7 +60,22 @@ extern List *create_tidscan_paths(Query *root, RelOptInfo *rel); * joinpath.c * routines to create join paths */ -extern void update_rels_pathlist_for_joins(Query *root, List *joinrels); +extern void add_paths_to_joinrel(Query *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist); + +/* + * joinrels.c + * routines to determine which relations to join + */ +extern void make_rels_by_joins(Query *root, int level); +extern RelOptInfo *make_rels_by_clause_joins(Query *root, + RelOptInfo *old_rel, + List *other_rels); +extern RelOptInfo *make_rels_by_clauseless_joins(Query *root, + RelOptInfo *old_rel, + List *other_rels); /* * pathkeys.c @@ -90,22 +105,4 @@ extern List *find_mergeclauses_for_pathkeys(List *pathkeys, extern List *make_pathkeys_for_mergeclauses(List *mergeclauses, List *tlist); -/* - * joinrels.c - * routines to determine which relations to join - */ -extern List *make_rels_by_joins(Query *root, List *old_rels); -extern List *make_rels_by_clause_joins(Query *root, RelOptInfo *old_rel, - List *joininfo_list, Relids only_relids); -extern List *make_rels_by_clauseless_joins(RelOptInfo *old_rel, - List *inner_rels); -extern RelOptInfo *get_cheapest_complete_rel(List *join_rel_list); - -/* - * prune.c - */ -extern void merge_rels_with_same_relids(List *rel_list); -extern void rels_set_cheapest(Query *root, List *rel_list); -extern List *del_rels_all_bushy_inactive(List *old_rels); - #endif /* PATHS_H */