From 47f18ec702cf4c369a7e082e411ed43514911841 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 13 Aug 1999 01:17:16 +0000 Subject: [PATCH] Update comments about pathkeys. --- src/backend/optimizer/path/pathkeys.c | 91 ++++++++++++++++++--------- 1 file changed, 63 insertions(+), 28 deletions(-) diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index cf9f87faf7..c0782c5665 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.12 1999/07/16 04:59:15 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.13 1999/08/13 01:17:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,18 +33,24 @@ static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist, * order of the result generated by the Path. * * In single/base relation RelOptInfo's, the Path's represent various ways - * of generating the relation and the resulting ordering of the tuples. + * of scanning the relation and the resulting ordering of the tuples. * Sequential scan Paths have NIL pathkeys, indicating no known ordering. - * Index scans have Path.pathkeys that represent the chosen index. - * A single-key index pathkeys would be { {tab1_indexkey1} }. For a - * multi-key index pathkeys would be { {tab1_indexkey1}, {tab1_indexkey2} }, - * indicating major sort by indexkey1 and minor sort by indexkey2. + * Index scans have Path.pathkeys that represent the chosen index's ordering, + * if any. A single-key index would create a pathkey with a single sublist, + * e.g. ( (tab1_indexkey1) ). A multi-key index generates a sublist per key, + * e.g. ( (tab1_indexkey1) (tab1_indexkey2) ) which shows major sort by + * indexkey1 and minor sort by indexkey2. + * + * Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since + * we can say nothing about the overall order of its result. Also, an index + * scan on an unordered type of index generates no useful pathkeys. However, + * we can always create a pathkey by doing an explicit sort. * * Multi-relation RelOptInfo Path's are more complicated. Mergejoins are * only performed with equijoins ("="). Because of this, the multi-relation * path actually has more than one primary Var key. For example, a - * mergejoin Path of "tab1.col1 = tab2.col1" would generate a pathkeys of - * { {tab1.col1, tab2.col1} }, indicating that the major sort order of the + * mergejoin Path of "tab1.col1 = tab2.col1" would generate pathkeys of + * ( (tab1.col1 tab2.col1) ), indicating that the major sort order of the * Path can be taken to be *either* tab1.col1 or tab2.col1. * They are equal, so they are both primary sort keys. This allows future * joins to use either Var as a pre-sorted key to prevent upper Mergejoins @@ -53,21 +59,30 @@ static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist, * Note that while the order of the top list is meaningful (primary vs. * secondary sort key), the order of each sublist is arbitrary. * - * For multi-key sorts, if the outer is sorted by a multi-key index, the - * multi-key index remains after the join. If the inner has a multi-key - * sort, only the primary key of the inner is added to the result. - * Mergejoins only join on the primary key. Currently, non-primary keys - * in the pathkeys List are of limited value. + * We can actually keep all of the keys of the outer path of a merge or + * nestloop join, since the ordering of the outer path will be reflected + * in the result. We add to each pathkey sublist any inner vars that are + * equijoined to any of the outer vars in the sublist. In the nestloop + * case we have to be careful to consider only equijoin operators; the + * nestloop's join clauses might include non-equijoin operators. + * (Currently, we do this by considering only mergejoinable operators while + * making the pathkeys, since we have no separate marking for operators that + * are equijoins but aren't mergejoinable.) * * Although Hashjoins also work only with equijoin operators, it is *not* * safe to consider the output of a Hashjoin to be sorted in any particular * order --- not even the outer path's order. This is true because the - * executor might have to split the join into multiple batches. + * executor might have to split the join into multiple batches. Therefore + * a Hashjoin is always given NIL pathkeys. * - * NestJoin does not perform sorting, and allows non-equijoins, so it does - * not allow useful pathkeys. (But couldn't we use the outer path's order?) + * Notice that pathkeys only say *what* is being ordered, and not *how* + * it is ordered. The actual sort ordering is indicated by a separate + * data structure, the PathOrder. The PathOrder provides a sort operator + * OID for each of the sublists of the path key. This is fairly bogus, + * since in cross-datatype cases we really want to keep track of more than + * one sort operator... * - * -- bjm + * -- bjm & tgl *-------------------- */ @@ -328,17 +343,32 @@ make_pathkeys_from_joinkeys(List *joinkeys, /* * new_join_pathkeys - * Find the path keys for a join relation by finding all vars in the list - * of join clauses 'joinclauses' such that: - * (1) the var corresponding to the outer join relation is a - * key on the outer path - * (2) the var appears in the target list of the join relation - * In other words, add to each outer path key the inner path keys that - * are required for qualification. + * Build the path keys for a join relation constructed by mergejoin or + * nestloop join. These keys should include all the path key vars of the + * outer path (since the join will retain the ordering of the outer path) + * plus any vars of the inner path that are mergejoined to the outer vars. + * + * Per the discussion at the top of this file, mergejoined inner vars + * can be considered path keys of the result, just the same as the outer + * vars they were joined with. + * + * We can also use inner path vars as pathkeys of a nestloop join, but we + * must be careful that we only consider equijoin clauses and not general + * join clauses. For example, "t1.a < t2.b" might be a join clause of a + * nestloop, but it doesn't result in b acquiring the ordering of a! + * joinpath.c handles that problem by only passing this routine clauses + * that are marked mergejoinable, even if a nestloop join is being built. + * Therefore we only have 't1.a = t2.b' style clauses, and can expect that + * the inner var will acquire the outer's ordering no matter which join + * method is actually used. + * + * All vars in the result are copied from the join relation's tlist, not from + * the given pathkeys or the join clauses. (Is that necessary? I suspect + * not --- tgl) * * 'outer_pathkeys' is the list of the outer path's path keys * 'join_rel_tlist' is the target list of the join relation - * 'joinclauses' is the list of restricting join clauses + * 'joinclauses' is the list of mergejoinable join clauses * * Returns the list of new path keys. * @@ -358,8 +388,13 @@ new_join_pathkeys(List *outer_pathkeys, new_pathkey = new_join_pathkey(outer_pathkey, join_rel_tlist, joinclauses); - if (new_pathkey != NIL) - final_pathkeys = lappend(final_pathkeys, new_pathkey); + /* if we can find no sortable vars for the n'th sort key, + * then we're done generating pathkeys; can't expect to order + * subsequent vars. Not clear that this can really happen. + */ + if (new_pathkey == NIL) + break; + final_pathkeys = lappend(final_pathkeys, new_pathkey); } return final_pathkeys; } @@ -372,7 +407,7 @@ new_join_pathkeys(List *outer_pathkeys, * at the top of this file). * * Note that each returned pathkey is the var node found in - * 'join_rel_tlist' rather than the joinclause var node. + * 'join_rel_tlist' rather than the input pathkey or joinclause var node. * (Is this important?) Also, we return a fully copied list * that does not share any subnodes with existing data structures. * (Is that important, either?)