Revert "Optimize order of GROUP BY keys".

This reverts commit db0d67db24 and
several follow-on fixes.  The idea of making a cost-based choice
of the order of the sorting columns is not fundamentally unsound,
but it requires cost information and data statistics that we don't
really have.  For example, relying on procost to distinguish the
relative costs of different sort comparators is pretty pointless
so long as most such comparator functions are labeled with cost 1.0.
Moreover, estimating the number of comparisons done by Quicksort
requires more than just an estimate of the number of distinct values
in the input: you also need some idea of the sizes of the larger
groups, if you want an estimate that's good to better than a factor of
three or so.  That's data that's often unknown or not very reliable.
Worse, to arrive at estimates of the number of calls made to the
lower-order-column comparison functions, the code needs to make
estimates of the numbers of distinct values of multiple columns,
which are necessarily even less trustworthy than per-column stats.
Even if all the inputs are perfectly reliable, the cost algorithm
as-implemented cannot offer useful information about how to order
sorting columns beyond the point at which the average group size
is estimated to drop to 1.

Close inspection of the code added by db0d67db2 shows that there
are also multiple small bugs.  These could have been fixed, but
there's not much point if we don't trust the estimates to be
accurate in-principle.

Finally, the changes in cost_sort's behavior made for very large
changes (often a factor of 2 or so) in the cost estimates for all
sorting operations, not only those for multi-column GROUP BY.
That naturally changes plan choices in many situations, and there's
precious little evidence to show that the changes are for the better.
Given the above doubts about whether the new estimates are really
trustworthy, it's hard to summon much confidence that these changes
are better on the average.

Since we're hard up against the release deadline for v15, let's
revert these changes for now.  We can always try again later.

Note: in v15, I left T_PathKeyInfo in place in nodes.h even though
it's unreferenced.  Removing it would be an ABI break, and it seems
a bit late in the release cycle for that.

Discussion: https://postgr.es/m/TYAPR01MB586665EB5FB2C3807E893941F5579@TYAPR01MB5866.jpnprd01.prod.outlook.com
This commit is contained in:
Tom Lane 2022-10-03 10:56:16 -04:00
parent b507a7a19b
commit 443df6e2db
24 changed files with 458 additions and 1872 deletions

View File

@ -2840,13 +2840,16 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
-- GROUP BY clause in various forms, cardinal, alias and constant expression
explain (verbose, costs off)
select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Foreign Scan
QUERY PLAN
---------------------------------------------------------------------------------------
Sort
Output: (count(c2)), c2, 5, 7.0, 9
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5 ORDER BY c2 ASC NULLS LAST
(4 rows)
Sort Key: ft1.c2
-> Foreign Scan
Output: (count(c2)), c2, 5, 7.0, 9
Relations: Aggregate on (public.ft1)
Remote SQL: SELECT count(c2), c2, 5, 7.0, 9 FROM "S 1"."T 1" GROUP BY 2, 3, 5
(7 rows)
select count(c2) w, c2 x, 5 y, 7.0 z from ft1 group by 2, y, 9.0::int order by 2;
w | x | y | z

View File

@ -5055,20 +5055,6 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
</listitem>
</varlistentry>
<varlistentry id="guc-enable-groupby-reordering" xreflabel="enable_group_by_reordering">
<term><varname>enable_group_by_reordering</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>enable_group_by_reordering</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Enables or disables reordering of keys in a <literal>GROUP BY</literal>
clause. The default is <literal>on</literal>.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-enable-hashagg" xreflabel="enable_hashagg">
<term><varname>enable_hashagg</varname> (<type>boolean</type>)
<indexterm>

View File

@ -1796,327 +1796,6 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
rterm->pathtarget->width);
}
/*
* is_fake_var
* Workaround for generate_append_tlist() which generates fake Vars with
* varno == 0, that will cause a fail of estimate_num_group() call
*
* XXX Ummm, why would estimate_num_group fail with this?
*/
static bool
is_fake_var(Expr *expr)
{
if (IsA(expr, RelabelType))
expr = (Expr *) ((RelabelType *) expr)->arg;
return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
}
/*
* get_width_cost_multiplier
* Returns relative complexity of comparing two values based on its width.
* The idea behind is that the comparison becomes more expensive the longer the
* value is. Return value is in cpu_operator_cost units.
*/
static double
get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
{
double width = -1.0; /* fake value */
if (IsA(expr, RelabelType))
expr = (Expr *) ((RelabelType *) expr)->arg;
/* Try to find actual stat in corresponding relation */
if (IsA(expr, Var))
{
Var *var = (Var *) expr;
if (var->varno > 0 && var->varno < root->simple_rel_array_size)
{
RelOptInfo *rel = root->simple_rel_array[var->varno];
if (rel != NULL &&
var->varattno >= rel->min_attr &&
var->varattno <= rel->max_attr)
{
int ndx = var->varattno - rel->min_attr;
if (rel->attr_widths[ndx] > 0)
width = rel->attr_widths[ndx];
}
}
}
/* Didn't find any actual stats, try using type width instead. */
if (width < 0.0)
{
Node *node = (Node *) expr;
width = get_typavgwidth(exprType(node), exprTypmod(node));
}
/*
* Values are passed as Datum type, so comparisons can't be cheaper than
* comparing a Datum value.
*
* FIXME I find this reasoning questionable. We may pass int2, and
* comparing it is probably a bit cheaper than comparing a bigint.
*/
if (width <= sizeof(Datum))
return 1.0;
/*
* We consider the cost of a comparison not to be directly proportional to
* width of the argument, because widths of the arguments could be
* slightly different (we only know the average width for the whole
* column). So we use log16(width) as an estimate.
*/
return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
}
/*
* compute_cpu_sort_cost
* compute CPU cost of sort (i.e. in-memory)
*
* The main thing we need to calculate to estimate sort CPU costs is the number
* of calls to the comparator functions. The difficulty is that for multi-column
* sorts there may be different data types involved (for some of which the calls
* may be much more expensive). Furthermore, columns may have a very different
* number of distinct values - the higher the number, the fewer comparisons will
* be needed for the following columns.
*
* The algorithm is incremental - we add pathkeys one by one, and at each step we
* estimate the number of necessary comparisons (based on the number of distinct
* groups in the current pathkey prefix and the new pathkey), and the comparison
* costs (which is data type specific).
*
* Estimation of the number of comparisons is based on ideas from:
*
* "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
* [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
*
* In term of that paper, let N - number of tuples, Xi - number of identical
* tuples with value Ki, then the estimate of number of comparisons is:
*
* log(N! / (X1! * X2! * ..)) ~ sum(Xi * log(N/Xi))
*
* We assume all Xi the same because now we don't have any estimation of
* group sizes, we have only know the estimate of number of groups (distinct
* values). In that case, formula becomes:
*
* N * log(NumberOfGroups)
*
* For multi-column sorts we need to estimate the number of comparisons for
* each individual column - for example with columns (c1, c2, ..., ck) we
* can estimate that number of comparisons on ck is roughly
*
* ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
*
* Let k be a column number, Gk - number of groups defined by k columns, and Fk
* the cost of the comparison is
*
* N * sum( Fk * log(Gk) )
*
* Note: We also consider column width, not just the comparator cost.
*
* NOTE: some callers currently pass NIL for pathkeys because they
* can't conveniently supply the sort keys. In this case, it will fallback to
* simple comparison cost estimate.
*/
static Cost
compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
Cost comparison_cost, double tuples, double output_tuples,
bool heapSort)
{
Cost per_tuple_cost = 0.0;
ListCell *lc;
List *pathkeyExprs = NIL;
double tuplesPerPrevGroup = tuples;
double totalFuncCost = 1.0;
bool has_fake_var = false;
int i = 0;
Oid prev_datatype = InvalidOid;
List *cache_varinfos = NIL;
/* fallback if pathkeys is unknown */
if (list_length(pathkeys) == 0)
{
/*
* If we'll use a bounded heap-sort keeping just K tuples in memory,
* for a total number of tuple comparisons of N log2 K; but the
* constant factor is a bit higher than for quicksort. Tweak it so
* that the cost curve is continuous at the crossover point.
*/
output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
/* add cost provided by caller */
per_tuple_cost += comparison_cost;
return per_tuple_cost * tuples;
}
/*
* Computing total cost of sorting takes into account the per-column
* comparison function cost. We try to compute the needed number of
* comparisons per column.
*/
foreach(lc, pathkeys)
{
PathKey *pathkey = (PathKey *) lfirst(lc);
EquivalenceMember *em;
double nGroups,
correctedNGroups;
Cost funcCost = 1.0;
/*
* We believe that equivalence members aren't very different, so, to
* estimate cost we consider just the first member.
*/
em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
if (em->em_datatype != InvalidOid)
{
/* do not lookup funcCost if the data type is the same */
if (prev_datatype != em->em_datatype)
{
Oid sortop;
QualCost cost;
sortop = get_opfamily_member(pathkey->pk_opfamily,
em->em_datatype, em->em_datatype,
pathkey->pk_strategy);
cost.startup = 0;
cost.per_tuple = 0;
add_function_cost(root, get_opcode(sortop), NULL, &cost);
/*
* add_function_cost returns the product of cpu_operator_cost
* and procost, but we need just procost, co undo that.
*/
funcCost = cost.per_tuple / cpu_operator_cost;
prev_datatype = em->em_datatype;
}
}
/* factor in the width of the values in this column */
funcCost *= get_width_cost_multiplier(root, em->em_expr);
/* now we have per-key cost, so add to the running total */
totalFuncCost += funcCost;
/* remember if we have found a fake Var in pathkeys */
has_fake_var |= is_fake_var(em->em_expr);
pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
/*
* We need to calculate the number of comparisons for this column,
* which requires knowing the group size. So we estimate the number of
* groups by calling estimate_num_groups_incremental(), which
* estimates the group size for "new" pathkeys.
*
* Note: estimate_num_groups_incremental does not handle fake Vars, so
* use a default estimate otherwise.
*/
if (!has_fake_var)
nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
tuplesPerPrevGroup, NULL, NULL,
&cache_varinfos,
list_length(pathkeyExprs) - 1);
else if (tuples > 4.0)
/*
* Use geometric mean as estimation if there are no stats.
*
* We don't use DEFAULT_NUM_DISTINCT here, because that's used for
* a single column, but here we're dealing with multiple columns.
*/
nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
else
nGroups = tuples;
/*
* Presorted keys are not considered in the cost above, but we still
* do have to compare them in the qsort comparator. So make sure to
* factor in the cost in that case.
*/
if (i >= nPresortedKeys)
{
if (heapSort)
{
/*
* have to keep at least one group, and a multiple of group
* size
*/
correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup);
}
else
/* all groups in the input */
correctedNGroups = nGroups;
correctedNGroups = Max(1.0, ceil(correctedNGroups));
per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
}
i++;
/*
* Uniform distributions with all groups being of the same size are
* the best case, with nice smooth behavior. Real-world distributions
* tend not to be uniform, though, and we don't have any reliable
* easy-to-use information. As a basic defense against skewed
* distributions, we use a 1.5 factor to make the expected group a bit
* larger, but we need to be careful not to make the group larger than
* in the preceding step.
*/
tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
ceil(1.5 * tuplesPerPrevGroup / nGroups));
/*
* Once we get single-row group, it means tuples in the group are
* unique and we can skip all remaining columns.
*/
if (tuplesPerPrevGroup <= 1.0)
break;
}
list_free(pathkeyExprs);
/* per_tuple_cost is in cpu_operator_cost units */
per_tuple_cost *= cpu_operator_cost;
/*
* Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles
* E. Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort
* estimation formula has additional term proportional to number of tuples
* (see Chapter 8.2 and Theorem 4.1). That affects cases with a low number
* of tuples, approximately less than 1e4. We could implement it as an
* additional multiplier under the logarithm, but we use a bit more
* complex formula which takes into account the number of unique tuples
* and it's not clear how to combine the multiplier with the number of
* groups. Estimate it as 10 cpu_operator_cost units.
*/
per_tuple_cost += 10 * cpu_operator_cost;
per_tuple_cost += comparison_cost;
return tuples * per_tuple_cost;
}
/*
* simple wrapper just to estimate best sort path
*/
Cost
cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
double tuples)
{
return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
0, tuples, tuples, false);
}
/*
* cost_tuplesort
* Determines and returns the cost of sorting a relation using tuplesort,
@ -2133,7 +1812,7 @@ cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
* number of initial runs formed and M is the merge order used by tuplesort.c.
* Since the average initial run should be about sort_mem, we have
* disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
* and cpu cost (computed by compute_cpu_sort_cost()).
* cpu = comparison_cost * t * log2(t)
*
* If the sort is bounded (i.e., only the first k result tuples are needed)
* and k tuples can fit into sort_mem, we use a heap method that keeps only
@ -2152,11 +1831,9 @@ cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
* 'comparison_cost' is the extra cost per comparison, if any
* 'sort_mem' is the number of kilobytes of work memory allowed for the sort
* 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
* 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
* be added by caller later
*/
static void
cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
cost_tuplesort(Cost *startup_cost, Cost *run_cost,
double tuples, int width,
Cost comparison_cost, int sort_mem,
double limit_tuples)
@ -2173,6 +1850,9 @@ cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_
if (tuples < 2.0)
tuples = 2.0;
/* Include the default cost-per-comparison */
comparison_cost += 2.0 * cpu_operator_cost;
/* Do we have a useful LIMIT? */
if (limit_tuples > 0 && limit_tuples < tuples)
{
@ -2196,10 +1876,12 @@ cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_
double log_runs;
double npageaccesses;
/* CPU costs */
*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
comparison_cost, tuples,
tuples, false);
/*
* CPU costs
*
* Assume about N log2 N comparisons
*/
*startup_cost = comparison_cost * tuples * LOG2(tuples);
/* Disk costs */
@ -2215,17 +1897,18 @@ cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_
}
else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
{
/* We'll use a bounded heap-sort keeping just K tuples in memory. */
*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
comparison_cost, tuples,
output_tuples, true);
/*
* We'll use a bounded heap-sort keeping just K tuples in memory, for
* a total number of tuple comparisons of N log2 K; but the constant
* factor is a bit higher than for quicksort. Tweak it so that the
* cost curve is continuous at the crossover point.
*/
*startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
}
else
{
/* We'll use plain quicksort on all the input tuples */
*startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
comparison_cost, tuples,
tuples, false);
*startup_cost = comparison_cost * tuples * LOG2(tuples);
}
/*
@ -2258,8 +1941,8 @@ cost_incremental_sort(Path *path,
double input_tuples, int width, Cost comparison_cost, int sort_mem,
double limit_tuples)
{
Cost startup_cost,
run_cost,
Cost startup_cost = 0,
run_cost = 0,
input_run_cost = input_total_cost - input_startup_cost;
double group_tuples,
input_groups;
@ -2344,7 +2027,7 @@ cost_incremental_sort(Path *path,
* pessimistic about incremental sort performance and increase its average
* group size by half.
*/
cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
cost_tuplesort(&group_startup_cost, &group_run_cost,
1.5 * group_tuples, width, comparison_cost, sort_mem,
limit_tuples);
@ -2352,7 +2035,7 @@ cost_incremental_sort(Path *path,
* Startup cost of incremental sort is the startup cost of its first group
* plus the cost of its input.
*/
startup_cost = group_startup_cost
startup_cost += group_startup_cost
+ input_startup_cost + group_input_run_cost;
/*
@ -2361,7 +2044,7 @@ cost_incremental_sort(Path *path,
* group, plus the total cost to process the remaining groups, plus the
* remaining cost of input.
*/
run_cost = group_run_cost
run_cost += group_run_cost
+ (group_run_cost + group_startup_cost) * (input_groups - 1)
+ group_input_run_cost * (input_groups - 1);
@ -2401,7 +2084,7 @@ cost_sort(Path *path, PlannerInfo *root,
Cost startup_cost;
Cost run_cost;
cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
cost_tuplesort(&startup_cost, &run_cost,
tuples, width,
comparison_cost, sort_mem,
limit_tuples);
@ -2499,7 +2182,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
* Determines and returns the cost of an Append node.
*/
void
cost_append(AppendPath *apath, PlannerInfo *root)
cost_append(AppendPath *apath)
{
ListCell *l;
@ -2567,7 +2250,7 @@ cost_append(AppendPath *apath, PlannerInfo *root)
* any child.
*/
cost_sort(&sort_path,
root,
NULL, /* doesn't currently need root */
pathkeys,
subpath->total_cost,
subpath->rows,

View File

@ -681,18 +681,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
if (opcintype == cur_em->em_datatype &&
equal(expr, cur_em->em_expr))
{
/*
* Match!
*
* Copy the sortref if it wasn't set yet. That may happen if
* the ec was constructed from WHERE clause, i.e. it doesn't
* have a target reference at all.
*/
if (cur_ec->ec_sortref == 0 && sortref > 0)
cur_ec->ec_sortref = sortref;
return cur_ec;
}
return cur_ec; /* Match! */
}
}

View File

@ -17,24 +17,17 @@
*/
#include "postgres.h"
#include <float.h>
#include "miscadmin.h"
#include "access/stratnum.h"
#include "catalog/pg_opfamily.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/plannodes.h"
#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "partitioning/partbounds.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
/* Consider reordering of GROUP BY keys? */
bool enable_group_by_reordering = true;
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@ -341,527 +334,6 @@ pathkeys_contained_in(List *keys1, List *keys2)
return false;
}
/*
* group_keys_reorder_by_pathkeys
* Reorder GROUP BY keys to match pathkeys of input path.
*
* Function returns new lists (pathkeys and clauses), original GROUP BY lists
* stay untouched.
*
* Returns the number of GROUP BY keys with a matching pathkey.
*/
int
group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
List **group_clauses)
{
List *new_group_pathkeys = NIL,
*new_group_clauses = NIL;
ListCell *lc;
int n;
if (pathkeys == NIL || *group_pathkeys == NIL)
return 0;
/*
* Walk the pathkeys (determining ordering of the input path) and see if
* there's a matching GROUP BY key. If we find one, we append it to the
* list, and do the same for the clauses.
*
* Once we find the first pathkey without a matching GROUP BY key, the
* rest of the pathkeys are useless and can't be used to evaluate the
* grouping, so we abort the loop and ignore the remaining pathkeys.
*
* XXX Pathkeys are built in a way to allow simply comparing pointers.
*/
foreach(lc, pathkeys)
{
PathKey *pathkey = (PathKey *) lfirst(lc);
SortGroupClause *sgc;
/* abort on first mismatch */
if (!list_member_ptr(*group_pathkeys, pathkey))
break;
new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
*group_clauses);
new_group_clauses = lappend(new_group_clauses, sgc);
}
/* remember the number of pathkeys with a matching GROUP BY key */
n = list_length(new_group_pathkeys);
/* append the remaining group pathkeys (will be treated as not sorted) */
*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
*group_pathkeys);
*group_clauses = list_concat_unique_ptr(new_group_clauses,
*group_clauses);
return n;
}
/*
* Used to generate all permutations of a pathkey list.
*/
typedef struct PathkeyMutatorState
{
List *elemsList;
ListCell **elemCells;
void **elems;
int *positions;
int mutatorNColumns;
int count;
} PathkeyMutatorState;
/*
* PathkeyMutatorInit
* Initialize state of the permutation generator.
*
* We want to generate permutations of elements in the "elems" list. We may want
* to skip some number of elements at the beginning (when treating as presorted)
* or at the end (we only permute a limited number of group keys).
*
* The list is decomposed into elements, and we also keep pointers to individual
* cells. This allows us to build the permuted list quickly and cheaply, without
* creating any copies.
*/
static void
PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end)
{
int i;
int n = end - start;
ListCell *lc;
memset(state, 0, sizeof(*state));
state->mutatorNColumns = n;
state->elemsList = list_copy(elems);
state->elems = palloc(sizeof(void *) * n);
state->elemCells = palloc(sizeof(ListCell *) * n);
state->positions = palloc(sizeof(int) * n);
i = 0;
for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start))
{
state->elemCells[i] = lc;
state->elems[i] = lfirst(lc);
state->positions[i] = i + 1;
i++;
if (i >= n)
break;
}
}
/* Swap two elements of an array. */
static void
PathkeyMutatorSwap(int *a, int i, int j)
{
int s = a[i];
a[i] = a[j];
a[j] = s;
}
/*
* Generate the next permutation of elements.
*/
static bool
PathkeyMutatorNextSet(int *a, int n)
{
int j,
k,
l,
r;
j = n - 2;
while (j >= 0 && a[j] >= a[j + 1])
j--;
if (j < 0)
return false;
k = n - 1;
while (k >= 0 && a[j] >= a[k])
k--;
PathkeyMutatorSwap(a, j, k);
l = j + 1;
r = n - 1;
while (l < r)
PathkeyMutatorSwap(a, l++, r--);
return true;
}
/*
* PathkeyMutatorNext
* Generate the next permutation of list of elements.
*
* Returns the next permutation (as a list of elements) or NIL if there are no
* more permutations.
*/
static List *
PathkeyMutatorNext(PathkeyMutatorState *state)
{
int i;
state->count++;
/* first permutation is original list */
if (state->count == 1)
return state->elemsList;
/* when there are no more permutations, return NIL */
if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns))
{
pfree(state->elems);
pfree(state->elemCells);
pfree(state->positions);
list_free(state->elemsList);
return NIL;
}
/* update the list cells to point to the right elements */
for (i = 0; i < state->mutatorNColumns; i++)
lfirst(state->elemCells[i]) =
(void *) state->elems[state->positions[i] - 1];
return state->elemsList;
}
/*
* Cost of comparing pathkeys.
*/
typedef struct PathkeySortCost
{
Cost cost;
PathKey *pathkey;
} PathkeySortCost;
static int
pathkey_sort_cost_comparator(const void *_a, const void *_b)
{
const PathkeySortCost *a = (PathkeySortCost *) _a;
const PathkeySortCost *b = (PathkeySortCost *) _b;
if (a->cost < b->cost)
return -1;
else if (a->cost == b->cost)
return 0;
return 1;
}
/*
* get_cheapest_group_keys_order
* Reorders the group pathkeys / clauses to minimize the comparison cost.
*
* Given the list of pathkeys in '*group_pathkeys', we try to arrange these
* in an order that minimizes the sort costs that will be incurred by the
* GROUP BY. The costs mainly depend on the cost of the sort comparator
* function(s) and the number of distinct values in each column of the GROUP
* BY clause (*group_clauses). Sorting on subsequent columns is only required
* for tiebreak situations where two values sort equally.
*
* In case the input is partially sorted, only the remaining pathkeys are
* considered. 'n_preordered' denotes how many of the leading *group_pathkeys
* the input is presorted by.
*
* Returns true and sets *group_pathkeys and *group_clauses to the newly
* ordered versions of the lists that were passed in via these parameters.
* If no reordering was deemed necessary then we return false, in which case
* the *group_pathkeys and *group_clauses lists are left untouched. The
* original *group_pathkeys and *group_clauses parameter values are never
* destructively modified in place.
*/
static bool
get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
List **group_pathkeys, List **group_clauses,
int n_preordered)
{
List *new_group_pathkeys = NIL,
*new_group_clauses = NIL,
*var_group_pathkeys;
ListCell *cell;
PathkeyMutatorState mstate;
double cheapest_sort_cost = DBL_MAX;
int nFreeKeys;
int nToPermute;
/* If there are less than 2 unsorted pathkeys, we're done. */
if (list_length(*group_pathkeys) - n_preordered < 2)
return false;
/*
* We could exhaustively cost all possible orderings of the pathkeys, but
* for a large number of pathkeys it might be prohibitively expensive. So
* we try to apply simple cheap heuristics first - we sort the pathkeys by
* sort cost (as if the pathkey was sorted independently) and then check
* only the four cheapest pathkeys. The remaining pathkeys are kept
* ordered by cost.
*
* XXX This is a very simple heuristics, but likely to work fine for most
* cases (because the number of GROUP BY clauses tends to be lower than
* 4). But it ignores how the number of distinct values in each pathkey
* affects the following steps. It might be better to use "more expensive"
* pathkey first if it has many distinct values, because it then limits
* the number of comparisons for the remaining pathkeys. But evaluating
* that is likely quite the expensive.
*/
nFreeKeys = list_length(*group_pathkeys) - n_preordered;
nToPermute = 4;
if (nFreeKeys > nToPermute)
{
PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys);
PathkeySortCost *cost = costs;
/*
* Estimate cost for sorting individual pathkeys skipping the
* pre-ordered pathkeys.
*/
for_each_from(cell, *group_pathkeys, n_preordered)
{
PathKey *pathkey = (PathKey *) lfirst(cell);
List *to_cost = list_make1(pathkey);
cost->pathkey = pathkey;
cost->cost = cost_sort_estimate(root, to_cost, 0, nrows);
cost++;
list_free(to_cost);
}
/* sort the pathkeys by sort cost in ascending order */
qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator);
/*
* Rebuild the list of pathkeys - first the preordered ones, then the
* rest ordered by cost.
*/
new_group_pathkeys = list_copy_head(*group_pathkeys, n_preordered);
for (int i = 0; i < nFreeKeys; i++)
new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey);
pfree(costs);
}
else
{
/* Copy the list, so that we can free the new list by list_free. */
new_group_pathkeys = list_copy(*group_pathkeys);
nToPermute = nFreeKeys;
}
Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys));
/*
* Generate pathkey lists with permutations of the first nToPermute
* pathkeys.
*
* XXX We simply calculate sort cost for each individual pathkey list, but
* there's room for two dynamic programming optimizations here. Firstly,
* we may pass the current "best" cost to cost_sort_estimate so that it
* can "abort" if the estimated pathkeys list exceeds it. Secondly, it
* could pass the return information about the position when it exceeded
* the cost, and we could skip all permutations with the same prefix.
*
* Imagine we've already found ordering with cost C1, and we're evaluating
* another ordering - cost_sort_estimate() calculates cost by adding the
* pathkeys one by one (more or less), and the cost only grows. If at any
* point it exceeds C1, it can't possibly be "better" so we can discard
* it. But we also know that we can discard all ordering with the same
* prefix, because if we're estimating (a,b,c,d) and we exceed C1 at (a,b)
* then the same thing will happen for any ordering with this prefix.
*/
PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, n_preordered + nToPermute);
while ((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL)
{
Cost cost;
cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows);
if (cost < cheapest_sort_cost)
{
list_free(new_group_pathkeys);
new_group_pathkeys = list_copy(var_group_pathkeys);
cheapest_sort_cost = cost;
}
}
/* Reorder the group clauses according to the reordered pathkeys. */
foreach(cell, new_group_pathkeys)
{
PathKey *pathkey = (PathKey *) lfirst(cell);
new_group_clauses = lappend(new_group_clauses,
get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
*group_clauses));
}
/* Just append the rest GROUP BY clauses */
new_group_clauses = list_concat_unique_ptr(new_group_clauses,
*group_clauses);
*group_pathkeys = new_group_pathkeys;
*group_clauses = new_group_clauses;
return true;
}
/*
* get_useful_group_keys_orderings
* Determine which orderings of GROUP BY keys are potentially interesting.
*
* Returns list of PathKeyInfo items, each representing an interesting ordering
* of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
*
* The function considers (and keeps) multiple group by orderings:
*
* - the original ordering, as specified by the GROUP BY clause
*
* - GROUP BY keys reordered to minimize the sort cost
*
* - GROUP BY keys reordered to match path ordering (as much as possible), with
* the tail reordered to minimize the sort cost
*
* - GROUP BY keys to match target ORDER BY clause (as much as possible), with
* the tail reordered to minimize the sort cost
*
* There are other potentially interesting orderings (e.g. it might be best to
* match the first ORDER BY key, order the remaining keys differently and then
* rely on the incremental sort to fix this), but we ignore those for now. To
* make this work we'd have to pretty much generate all possible permutations.
*/
List *
get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
List *path_pathkeys,
List *group_pathkeys, List *group_clauses)
{
Query *parse = root->parse;
List *infos = NIL;
PathKeyInfo *info;
int n_preordered = 0;
List *pathkeys = group_pathkeys;
List *clauses = group_clauses;
/* always return at least the original pathkeys/clauses */
info = makeNode(PathKeyInfo);
info->pathkeys = pathkeys;
info->clauses = clauses;
infos = lappend(infos, info);
/*
* Should we try generating alternative orderings of the group keys? If
* not, we produce only the order specified in the query, i.e. the
* optimization is effectively disabled.
*/
if (!enable_group_by_reordering)
return infos;
/* for grouping sets we can't do any reordering */
if (parse->groupingSets)
return infos;
/*
* Try reordering pathkeys to minimize the sort cost, ignoring both the
* target ordering (ORDER BY) and ordering of the input path.
*/
if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
n_preordered))
{
info = makeNode(PathKeyInfo);
info->pathkeys = pathkeys;
info->clauses = clauses;
infos = lappend(infos, info);
}
/*
* If the path is sorted in some way, try reordering the group keys to
* match as much of the ordering as possible - we get this sort for free
* (mostly).
*
* We must not do this when there are no grouping sets, because those use
* more complex logic to decide the ordering.
*
* XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
* more a complement, because it allows benefiting from incremental sort
* as much as possible.
*
* XXX This does nothing if (n_preordered == 0). We shouldn't create the
* info in this case.
*/
if (path_pathkeys)
{
n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys,
&pathkeys,
&clauses);
/* reorder the tail to minimize sort cost */
get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
n_preordered);
/*
* reorder the tail to minimize sort cost
*
* XXX Ignore the return value - there may be nothing to reorder, in
* which case get_cheapest_group_keys_order returns false. But we
* still want to keep the keys reordered to path_pathkeys.
*/
info = makeNode(PathKeyInfo);
info->pathkeys = pathkeys;
info->clauses = clauses;
infos = lappend(infos, info);
}
/*
* Try reordering pathkeys to minimize the sort cost (this time consider
* the ORDER BY clause, but only if set debug_group_by_match_order_by).
*/
if (root->sort_pathkeys)
{
n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
&pathkeys,
&clauses);
/*
* reorder the tail to minimize sort cost
*
* XXX Ignore the return value - there may be nothing to reorder, in
* which case get_cheapest_group_keys_order returns false. But we
* still want to keep the keys reordered to sort_pathkeys.
*/
get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses,
n_preordered);
/* keep the group keys reordered to match ordering of input path */
info = makeNode(PathKeyInfo);
info->pathkeys = pathkeys;
info->clauses = clauses;
infos = lappend(infos, info);
}
return infos;
}
/*
* pathkeys_count_contained_in
* Same as pathkeys_contained_in, but also sets length of longest
@ -2390,54 +1862,6 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
return n_common_pathkeys;
}
/*
* pathkeys_useful_for_grouping
* Count the number of pathkeys that are useful for grouping (instead of
* explicit sort)
*
* Group pathkeys could be reordered to benefit from the ordering. The
* ordering may not be "complete" and may require incremental sort, but that's
* fine. So we simply count prefix pathkeys with a matching group key, and
* stop once we find the first pathkey without a match.
*
* So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
* pathkeys are useful for grouping, and we might do incremental sort to get
* path ordered by (a,b,e).
*
* This logic is necessary to retain paths with ordering not matching grouping
* keys directly, without the reordering.
*
* Returns the length of pathkey prefix with matching group keys.
*/
static int
pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
{
ListCell *key;
int n = 0;
/* no special ordering requested for grouping */
if (root->group_pathkeys == NIL)
return 0;
/* unordered path */
if (pathkeys == NIL)
return 0;
/* walk the pathkeys and search for matching group key */
foreach(key, pathkeys)
{
PathKey *pathkey = (PathKey *) lfirst(key);
/* no matching group key, we're done */
if (!list_member_ptr(root->group_pathkeys, pathkey))
break;
n++;
}
return n;
}
/*
* truncate_useless_pathkeys
* Shorten the given pathkey list to just the useful pathkeys.
@ -2452,9 +1876,6 @@ truncate_useless_pathkeys(PlannerInfo *root,
nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
if (nuseful2 > nuseful)
nuseful = nuseful2;
@ -2490,8 +1911,6 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
{
if (rel->joininfo != NIL || rel->has_eclass_joins)
return true; /* might be able to use pathkeys for merging */
if (root->group_pathkeys != NIL)
return true; /* might be able to use pathkeys for grouping */
if (root->query_pathkeys != NIL)
return true; /* might be able to use them for ordering */
return false; /* definitely useless */

View File

@ -2756,9 +2756,8 @@ remove_useless_groupby_columns(PlannerInfo *root)
*
* In principle it might be interesting to consider other orderings of the
* GROUP BY elements, which could match the sort ordering of other
* possible plans (eg an indexscan) and thereby reduce cost. However, we
* don't yet have sufficient information to do that here, so that's left until
* later in planning. See get_useful_group_keys_orderings().
* possible plans (eg an indexscan) and thereby reduce cost. We don't
* bother with that, though. Hashed grouping will frequently win anyway.
*
* Note: we need no comparable processing of the distinctClause because
* the parser already enforced that that matches ORDER BY.
@ -6232,122 +6231,24 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
*/
foreach(lc, input_rel->pathlist)
{
ListCell *lc2;
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
bool is_sorted;
int presorted_keys;
List *pathkey_orderings = NIL;
is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
path->pathkeys,
&presorted_keys);
List *group_pathkeys = root->group_pathkeys;
List *group_clauses = parse->groupClause;
/* generate alternative group orderings that might be useful */
pathkey_orderings = get_useful_group_keys_orderings(root,
path->rows,
path->pathkeys,
group_pathkeys,
group_clauses);
Assert(list_length(pathkey_orderings) > 0);
/* process all potentially interesting grouping reorderings */
foreach(lc2, pathkey_orderings)
if (path == cheapest_path || is_sorted)
{
bool is_sorted;
int presorted_keys = 0;
PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
/* restore the path (we replace it in the loop) */
path = path_original;
is_sorted = pathkeys_count_contained_in(info->pathkeys,
path->pathkeys,
&presorted_keys);
if (path == cheapest_path || is_sorted)
{
/* Sort the cheapest-total path if it isn't already sorted */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
info->pathkeys,
-1.0);
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
consider_groupingsets_paths(root, grouped_rel,
path, true, can_hash,
gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
/*
* We have aggregation, possibly with plain GROUP BY.
* Make an AggPath.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
info->clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
info->clauses,
havingQual,
agg_costs,
dNumGroups));
}
else if (group_clauses)
{
/*
* We have GROUP BY without aggregation or grouping
* sets. Make a GroupPath.
*/
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
info->clauses,
havingQual,
dNumGroups));
}
else
{
/* Other cases should have been handled above */
Assert(false);
}
}
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental
* sort is enabled.
*/
if (is_sorted || !enable_incremental_sort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, no point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1
* because then presorted_keys > 0 would imply is_sorted was
* true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
grouped_rel,
path,
info->pathkeys,
presorted_keys,
-1.0);
/* Sort the cheapest-total path if it isn't already sorted */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
/* Now decide what to stick atop it */
if (parse->groupingSets)
@ -6367,9 +6268,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
grouped_rel,
path,
grouped_rel->reltarget,
info->clauses ? AGG_SORTED : AGG_PLAIN,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
info->clauses,
parse->groupClause,
havingQual,
agg_costs,
dNumGroups));
@ -6384,7 +6285,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
create_group_path(root,
grouped_rel,
path,
info->clauses,
parse->groupClause,
havingQual,
dNumGroups));
}
@ -6394,6 +6295,79 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
Assert(false);
}
}
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental sort
* is enabled.
*/
if (is_sorted || !enable_incremental_sort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, no point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1 because
* then presorted_keys > 0 would imply is_sorted was true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
consider_groupingsets_paths(root, grouped_rel,
path, true, can_hash,
gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
/*
* We have aggregation, possibly with plain GROUP BY. Make an
* AggPath.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
parse->groupClause,
havingQual,
agg_costs,
dNumGroups));
}
else if (parse->groupClause)
{
/*
* We have GROUP BY without aggregation or grouping sets. Make
* a GroupPath.
*/
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
parse->groupClause,
havingQual,
dNumGroups));
}
else
{
/* Other cases should have been handled above */
Assert(false);
}
}
/*
@ -6404,130 +6378,100 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
{
foreach(lc, partially_grouped_rel->pathlist)
{
ListCell *lc2;
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
bool is_sorted;
int presorted_keys;
List *pathkey_orderings = NIL;
is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
path->pathkeys,
&presorted_keys);
List *group_pathkeys = root->group_pathkeys;
List *group_clauses = parse->groupClause;
/* generate alternative group orderings that might be useful */
pathkey_orderings = get_useful_group_keys_orderings(root,
path->rows,
path->pathkeys,
group_pathkeys,
group_clauses);
Assert(list_length(pathkey_orderings) > 0);
/* process all potentially interesting grouping reorderings */
foreach(lc2, pathkey_orderings)
/*
* Insert a Sort node, if required. But there's no point in
* sorting anything but the cheapest path.
*/
if (!is_sorted)
{
bool is_sorted;
int presorted_keys = 0;
PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
/* restore the path (we replace it in the loop) */
path = path_original;
is_sorted = pathkeys_count_contained_in(info->pathkeys,
path->pathkeys,
&presorted_keys);
/*
* Insert a Sort node, if required. But there's no point
* in sorting anything but the cheapest path.
*/
if (!is_sorted)
{
if (path != partially_grouped_rel->cheapest_total_path)
continue;
path = (Path *) create_sort_path(root,
grouped_rel,
path,
info->pathkeys,
-1.0);
}
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
info->clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
info->clauses,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
info->clauses,
havingQual,
dNumGroups));
/*
* Now we may consider incremental sort on this path, but
* only when the path is not already sorted and when
* incremental sort is enabled.
*/
if (is_sorted || !enable_incremental_sort)
if (path != partially_grouped_rel->cheapest_total_path)
continue;
/*
* Restore the input path (we might have added Sort on
* top).
*/
path = path_original;
/*
* no shared prefix, not point in building incremental
* sort
*/
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1
* because then presorted_keys > 0 would imply is_sorted
* was true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
grouped_rel,
path,
info->pathkeys,
presorted_keys,
-1.0);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
info->clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
info->clauses,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
info->clauses,
havingQual,
dNumGroups));
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
}
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
parse->groupClause,
havingQual,
dNumGroups));
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental
* sort is enabled.
*/
if (is_sorted || !enable_incremental_sort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, not point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1
* because then presorted_keys > 0 would imply is_sorted was
* true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
parse->groupClause,
havingQual,
dNumGroups));
}
}
}
@ -6730,71 +6674,41 @@ create_partial_grouping_paths(PlannerInfo *root,
*/
foreach(lc, input_rel->pathlist)
{
ListCell *lc2;
Path *path = (Path *) lfirst(lc);
Path *path_save = path;
bool is_sorted;
List *pathkey_orderings = NIL;
List *group_pathkeys = root->group_pathkeys;
List *group_clauses = parse->groupClause;
/* generate alternative group orderings that might be useful */
pathkey_orderings = get_useful_group_keys_orderings(root,
path->rows,
path->pathkeys,
group_pathkeys,
group_clauses);
Assert(list_length(pathkey_orderings) > 0);
/* process all potentially interesting grouping reorderings */
foreach(lc2, pathkey_orderings)
is_sorted = pathkeys_contained_in(root->group_pathkeys,
path->pathkeys);
if (path == cheapest_total_path || is_sorted)
{
bool is_sorted;
int presorted_keys = 0;
PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
path = (Path *) create_sort_path(root,
partially_grouped_rel,
path,
root->group_pathkeys,
-1.0);
/* restore the path (we replace it in the loop) */
path = path_save;
is_sorted = pathkeys_count_contained_in(info->pathkeys,
path->pathkeys,
&presorted_keys);
if (path == cheapest_total_path || is_sorted)
{
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
{
path = (Path *) create_sort_path(root,
partially_grouped_rel,
path,
info->pathkeys,
-1.0);
}
if (parse->hasAggs)
add_path(partially_grouped_rel, (Path *)
create_agg_path(root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
info->clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
info->clauses,
NIL,
agg_partial_costs,
dNumPartialGroups));
else
add_path(partially_grouped_rel, (Path *)
create_group_path(root,
partially_grouped_rel,
path,
info->clauses,
NIL,
dNumPartialGroups));
}
if (parse->hasAggs)
add_path(partially_grouped_rel, (Path *)
create_agg_path(root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialGroups));
else
add_path(partially_grouped_rel, (Path *)
create_group_path(root,
partially_grouped_rel,
path,
parse->groupClause,
NIL,
dNumPartialGroups));
}
}
@ -6804,8 +6718,6 @@ create_partial_grouping_paths(PlannerInfo *root,
* We can also skip the entire loop when we only have a single-item
* group_pathkeys because then we can't possibly have a presorted
* prefix of the list without having the list be fully sorted.
*
* XXX Shouldn't this also consider the group-key-reordering?
*/
if (enable_incremental_sort && list_length(root->group_pathkeys) > 1)
{
@ -6863,101 +6775,24 @@ create_partial_grouping_paths(PlannerInfo *root,
/* Similar to above logic, but for partial paths. */
foreach(lc, input_rel->partial_pathlist)
{
ListCell *lc2;
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
bool is_sorted;
int presorted_keys;
List *pathkey_orderings = NIL;
is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
path->pathkeys,
&presorted_keys);
List *group_pathkeys = root->group_pathkeys;
List *group_clauses = parse->groupClause;
/* generate alternative group orderings that might be useful */
pathkey_orderings = get_useful_group_keys_orderings(root,
path->rows,
path->pathkeys,
group_pathkeys,
group_clauses);
Assert(list_length(pathkey_orderings) > 0);
/* process all potentially interesting grouping reorderings */
foreach(lc2, pathkey_orderings)
if (path == cheapest_partial_path || is_sorted)
{
bool is_sorted;
int presorted_keys = 0;
PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
/* restore the path (we replace it in the loop) */
path = path_original;
is_sorted = pathkeys_count_contained_in(info->pathkeys,
path->pathkeys,
&presorted_keys);
if (path == cheapest_partial_path || is_sorted)
{
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
{
path = (Path *) create_sort_path(root,
partially_grouped_rel,
path,
info->pathkeys,
-1.0);
}
if (parse->hasAggs)
add_partial_path(partially_grouped_rel, (Path *)
create_agg_path(root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
info->clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
info->clauses,
NIL,
agg_partial_costs,
dNumPartialPartialGroups));
else
add_partial_path(partially_grouped_rel, (Path *)
create_group_path(root,
partially_grouped_rel,
path,
info->clauses,
NIL,
dNumPartialPartialGroups));
}
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental
* sort is enabled.
*/
if (is_sorted || !enable_incremental_sort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, not point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1
* because then presorted_keys > 0 would imply is_sorted was
* true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
partially_grouped_rel,
path,
info->pathkeys,
presorted_keys,
-1.0);
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
path = (Path *) create_sort_path(root,
partially_grouped_rel,
path,
root->group_pathkeys,
-1.0);
if (parse->hasAggs)
add_partial_path(partially_grouped_rel, (Path *)
@ -6965,9 +6800,9 @@ create_partial_grouping_paths(PlannerInfo *root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
info->clauses ? AGG_SORTED : AGG_PLAIN,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
info->clauses,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialPartialGroups));
@ -6976,10 +6811,59 @@ create_partial_grouping_paths(PlannerInfo *root,
create_group_path(root,
partially_grouped_rel,
path,
info->clauses,
parse->groupClause,
NIL,
dNumPartialPartialGroups));
}
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental sort
* is enabled.
*/
if (is_sorted || !enable_incremental_sort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, not point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1 because
* then presorted_keys > 0 would imply is_sorted was true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
partially_grouped_rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
if (parse->hasAggs)
add_partial_path(partially_grouped_rel, (Path *)
create_agg_path(root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialPartialGroups));
else
add_partial_path(partially_grouped_rel, (Path *)
create_group_path(root,
partially_grouped_rel,
path,
parse->groupClause,
NIL,
dNumPartialPartialGroups));
}
}

View File

@ -1342,7 +1342,7 @@ create_append_path(PlannerInfo *root,
pathnode->path.pathkeys = child->pathkeys;
}
else
cost_append(pathnode, root);
cost_append(pathnode);
/* If the caller provided a row estimate, override the computed value. */
if (rows >= 0)

View File

@ -3368,28 +3368,11 @@ double
estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
List **pgset, EstimationInfo *estinfo)
{
return estimate_num_groups_incremental(root, groupExprs,
input_rows, pgset, estinfo,
NULL, 0);
}
/*
* estimate_num_groups_incremental
* An estimate_num_groups variant, optimized for cases that are adding the
* expressions incrementally (e.g. one by one).
*/
double
estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
double input_rows,
List **pgset, EstimationInfo *estinfo,
List **cache_varinfos, int prevNExprs)
{
List *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
List *varinfos = NIL;
double srf_multiplier = 1.0;
double numdistinct;
ListCell *l;
int i,
j;
int i;
/* Zero the estinfo output parameter, if non-NULL */
if (estinfo != NULL)
@ -3420,7 +3403,7 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
*/
numdistinct = 1.0;
i = j = 0;
i = 0;
foreach(l, groupExprs)
{
Node *groupexpr = (Node *) lfirst(l);
@ -3429,14 +3412,6 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
List *varshere;
ListCell *l2;
/* was done on previous call */
if (cache_varinfos && j++ < prevNExprs)
{
if (pgset)
i++; /* to keep in sync with lines below */
continue;
}
/* is expression in this grouping set? */
if (pgset && !list_member_int(*pgset, i++))
continue;
@ -3506,11 +3481,7 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
if (varshere == NIL)
{
if (contain_volatile_functions(groupexpr))
{
if (cache_varinfos)
*cache_varinfos = varinfos;
return input_rows;
}
continue;
}
@ -3527,9 +3498,6 @@ estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
}
}
if (cache_varinfos)
*cache_varinfos = varinfos;
/*
* If now no Vars, we must have an all-constant or all-boolean GROUP BY
* list.

View File

@ -1214,16 +1214,6 @@ static struct config_bool ConfigureNamesBool[] =
true,
NULL, NULL, NULL
},
{
{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables reordering of GROUP BY keys."),
NULL,
GUC_EXPLAIN
},
&enable_group_by_reordering,
true,
NULL, NULL, NULL
},
{
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
gettext_noop("Enables genetic query optimization."),

View File

@ -388,7 +388,6 @@
#enable_seqscan = on
#enable_sort = on
#enable_tidscan = on
#enable_group_by_reordering = on
# - Planner Cost Constants -

View File

@ -1071,16 +1071,6 @@ typedef struct PathKey
bool pk_nulls_first; /* do NULLs come before normal values? */
} PathKey;
/*
* Combines information about pathkeys and the associated clauses.
*/
typedef struct PathKeyInfo
{
NodeTag type;
List *pathkeys;
List *clauses;
} PathKeyInfo;
/*
* VolatileFunctionStatus -- allows nodes to cache their
* contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet

View File

@ -114,9 +114,7 @@ extern void cost_incremental_sort(Path *path,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples, int width, Cost comparison_cost, int sort_mem,
double limit_tuples);
extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
int nPresortedKeys, double tuples);
extern void cost_append(AppendPath *path, PlannerInfo *root);
extern void cost_append(AppendPath *path);
extern void cost_merge_append(Path *path, PlannerInfo *root,
List *pathkeys, int n_streams,
Cost input_startup_cost, Cost input_total_cost,

View File

@ -24,7 +24,6 @@ extern PGDLLIMPORT bool enable_geqo;
extern PGDLLIMPORT int geqo_threshold;
extern PGDLLIMPORT int min_parallel_table_scan_size;
extern PGDLLIMPORT int min_parallel_index_scan_size;
extern PGDLLIMPORT bool enable_group_by_reordering;
/* Hook for plugins to get control in set_rel_pathlist() */
typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@ -203,12 +202,6 @@ typedef enum
extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
extern bool pathkeys_contained_in(List *keys1, List *keys2);
extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
extern int group_keys_reorder_by_pathkeys(List *pathkeys,
List **group_pathkeys,
List **group_clauses);
extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
List *path_pathkeys,
List *group_pathkeys, List *group_clauses);
extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
Relids required_outer,
CostSelector cost_criterion,

View File

@ -214,11 +214,6 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
double input_rows, List **pgset,
EstimationInfo *estinfo);
extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
double input_rows, List **pgset,
EstimationInfo *estinfo,
List **cache_varinfos, int prevNExprs);
extern void estimate_hash_bucket_stats(PlannerInfo *root,
Node *hashkey, double nbuckets,
Selectivity *mcv_freq,

View File

@ -1210,8 +1210,7 @@ explain (costs off)
select distinct min(f1), max(f1) from minmaxtest;
QUERY PLAN
---------------------------------------------------------------------------------------------
HashAggregate
Group Key: $0, $1
Unique
InitPlan 1 (returns $0)
-> Limit
-> Merge Append
@ -1234,8 +1233,10 @@ explain (costs off)
-> Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
Index Cond: (f1 IS NOT NULL)
-> Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-> Result
(25 rows)
-> Sort
Sort Key: ($0), ($1)
-> Result
(26 rows)
select distinct min(f1), max(f1) from minmaxtest;
min | max
@ -2447,241 +2448,6 @@ SELECT balk(hundred) FROM tenk1;
(1 row)
ROLLBACK;
-- GROUP BY optimization by reorder columns
SELECT
i AS id,
i/2 AS p,
format('%60s', i%2) AS v,
i/4 AS c,
i/8 AS d,
(random() * (10000/8))::int as e --the same as d but no correlation with p
INTO btg
FROM
generate_series(1, 10000) i;
VACUUM btg;
ANALYZE btg;
-- GROUP BY optimization by reorder columns by frequency
SET enable_hashagg=off;
SET max_parallel_workers= 0;
SET max_parallel_workers_per_gather = 0;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, v;
QUERY PLAN
-----------------------------
GroupAggregate
Group Key: p, v
-> Sort
Sort Key: p, v
-> Seq Scan on btg
(5 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p;
QUERY PLAN
-----------------------------
GroupAggregate
Group Key: p, v
-> Sort
Sort Key: p, v
-> Seq Scan on btg
(5 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, c;
QUERY PLAN
-----------------------------
GroupAggregate
Group Key: p, c, v
-> Sort
Sort Key: p, c, v
-> Seq Scan on btg
(5 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
QUERY PLAN
-----------------------------
GroupAggregate
Group Key: v, p, c
-> Sort
Sort Key: v, p, c
-> Seq Scan on btg
(5 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, d, c;
QUERY PLAN
------------------------------
GroupAggregate
Group Key: p, d, c, v
-> Sort
Sort Key: p, d, c, v
-> Seq Scan on btg
(5 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
QUERY PLAN
------------------------------
GroupAggregate
Group Key: v, p, d, c
-> Sort
Sort Key: v, p, d, c
-> Seq Scan on btg
(5 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
QUERY PLAN
------------------------------
GroupAggregate
Group Key: p, v, d, c
-> Sort
Sort Key: p, v, d, c
-> Seq Scan on btg
(5 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, d, e;
QUERY PLAN
-----------------------------
GroupAggregate
Group Key: p, d, e
-> Sort
Sort Key: p, d, e
-> Seq Scan on btg
(5 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, e, d;
QUERY PLAN
-----------------------------
GroupAggregate
Group Key: p, e, d
-> Sort
Sort Key: p, e, d
-> Seq Scan on btg
(5 rows)
CREATE STATISTICS btg_dep ON d, e, p FROM btg;
ANALYZE btg;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, d, e;
QUERY PLAN
-----------------------------
GroupAggregate
Group Key: p, d, e
-> Sort
Sort Key: p, d, e
-> Seq Scan on btg
(5 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, e, d;
QUERY PLAN
-----------------------------
GroupAggregate
Group Key: p, e, d
-> Sort
Sort Key: p, e, d
-> Seq Scan on btg
(5 rows)
-- GROUP BY optimization by reorder columns by index scan
CREATE INDEX ON btg(p, v);
SET enable_seqscan=off;
SET enable_bitmapscan=off;
VACUUM btg;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, v;
QUERY PLAN
------------------------------------------------
GroupAggregate
Group Key: p, v
-> Index Only Scan using btg_p_v_idx on btg
(3 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
QUERY PLAN
------------------------------------------------
GroupAggregate
Group Key: p, v
-> Index Only Scan using btg_p_v_idx on btg
(3 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p;
QUERY PLAN
------------------------------------------------
GroupAggregate
Group Key: p, v
-> Index Only Scan using btg_p_v_idx on btg
(3 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
QUERY PLAN
------------------------------------------------
GroupAggregate
Group Key: p, v
-> Index Only Scan using btg_p_v_idx on btg
(3 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, c;
QUERY PLAN
-------------------------------------------------
GroupAggregate
Group Key: p, c, v
-> Incremental Sort
Sort Key: p, c, v
Presorted Key: p
-> Index Scan using btg_p_v_idx on btg
(6 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
QUERY PLAN
-------------------------------------------------
GroupAggregate
Group Key: p, v, c
-> Incremental Sort
Sort Key: p, v, c
Presorted Key: p, v
-> Index Scan using btg_p_v_idx on btg
(6 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, c, p, d;
QUERY PLAN
-------------------------------------------------
GroupAggregate
Group Key: p, c, d, v
-> Incremental Sort
Sort Key: p, c, d, v
Presorted Key: p
-> Index Scan using btg_p_v_idx on btg
(6 rows)
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
QUERY PLAN
-------------------------------------------------
GroupAggregate
Group Key: p, v, c, d
-> Incremental Sort
Sort Key: p, v, c, d
Presorted Key: p, v
-> Index Scan using btg_p_v_idx on btg
(6 rows)
DROP TABLE btg;
RESET enable_hashagg;
RESET max_parallel_workers;
RESET max_parallel_workers_per_gather;
RESET enable_seqscan;
RESET enable_bitmapscan;
-- Secondly test the case of a parallel aggregate combiner function
-- returning NULL. For that use normal transition function, but a
-- combiner function returning NULL.

View File

@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0;
set parallel_tuple_cost = 0;
set max_parallel_workers_per_gather = 2;
create table t (a int, b int, c int);
insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
create index on t (a);
analyze t;
set enable_incremental_sort = off;

View File

@ -1984,8 +1984,8 @@ USING (name);
------+----+----
bb | 12 | 13
cc | 22 | 23
ee | 42 |
dd | | 33
ee | 42 |
(4 rows)
-- Cases with non-nullable expressions in subquery results;
@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
------+------+------+------+------
bb | 12 | 2 | 13 | 3
cc | 22 | 2 | 23 | 3
ee | 42 | 2 | |
dd | | | 33 | 3
ee | 42 | 2 | |
(4 rows)
SELECT * FROM
@ -4645,20 +4645,18 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
explain (costs off)
select d.* from d left join (select distinct * from b) s
on d.a = s.id;
QUERY PLAN
---------------------------------------------
Merge Left Join
Merge Cond: (d.a = s.id)
QUERY PLAN
--------------------------------------
Merge Right Join
Merge Cond: (b.id = d.a)
-> Unique
-> Sort
Sort Key: b.id, b.c_id
-> Seq Scan on b
-> Sort
Sort Key: d.a
-> Seq Scan on d
-> Sort
Sort Key: s.id
-> Subquery Scan on s
-> HashAggregate
Group Key: b.id, b.c_id
-> Seq Scan on b
(11 rows)
(9 rows)
-- check join removal works when uniqueness of the join condition is enforced
-- by a UNION
@ -6368,39 +6366,44 @@ select * from j1 natural join j2;
explain (verbose, costs off)
select * from j1
inner join (select distinct id from j3) j3 on j1.id = j3.id;
QUERY PLAN
-----------------------------------
QUERY PLAN
-----------------------------------------
Nested Loop
Output: j1.id, j3.id
Inner Unique: true
Join Filter: (j1.id = j3.id)
-> HashAggregate
-> Unique
Output: j3.id
Group Key: j3.id
-> Seq Scan on public.j3
-> Sort
Output: j3.id
Sort Key: j3.id
-> Seq Scan on public.j3
Output: j3.id
-> Seq Scan on public.j1
Output: j1.id
(11 rows)
(13 rows)
-- ensure group by clause allows the inner to become unique
explain (verbose, costs off)
select * from j1
inner join (select id from j3 group by id) j3 on j1.id = j3.id;
QUERY PLAN
-----------------------------------
QUERY PLAN
-----------------------------------------
Nested Loop
Output: j1.id, j3.id
Inner Unique: true
Join Filter: (j1.id = j3.id)
-> HashAggregate
-> Group
Output: j3.id
Group Key: j3.id
-> Seq Scan on public.j3
-> Sort
Output: j3.id
Sort Key: j3.id
-> Seq Scan on public.j3
Output: j3.id
-> Seq Scan on public.j1
Output: j1.id
(11 rows)
(14 rows)
drop table j1;
drop table j2;

View File

@ -1460,15 +1460,18 @@ WHEN MATCHED AND t.a < 10 THEN
explain_merge
--------------------------------------------------------------------
Merge on ex_mtarget t (actual rows=0 loops=1)
-> Hash Join (actual rows=0 loops=1)
Hash Cond: (s.a = t.a)
-> Seq Scan on ex_msource s (actual rows=1 loops=1)
-> Hash (actual rows=0 loops=1)
Buckets: xxx Batches: xxx Memory Usage: xxx
-> Merge Join (actual rows=0 loops=1)
Merge Cond: (t.a = s.a)
-> Sort (actual rows=0 loops=1)
Sort Key: t.a
Sort Method: quicksort Memory: xxx
-> Seq Scan on ex_mtarget t (actual rows=0 loops=1)
Filter: (a < '-1000'::integer)
Rows Removed by Filter: 54
(9 rows)
-> Sort (never executed)
Sort Key: s.a
-> Seq Scan on ex_msource s (never executed)
(12 rows)
DROP TABLE ex_msource, ex_mtarget;
DROP FUNCTION explain_merge(text);

View File

@ -949,12 +949,12 @@ SET parallel_setup_cost = 0;
-- is not partial agg safe.
EXPLAIN (COSTS OFF)
SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
QUERY PLAN
--------------------------------------------------------------------------------------------
Gather Merge
Workers Planned: 2
-> Sort
Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
QUERY PLAN
--------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-> Gather
Workers Planned: 2
-> Parallel Append
-> GroupAggregate
Group Key: pagg_tab_ml.a
@ -1381,26 +1381,28 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
-- When GROUP BY clause does not match; partial aggregation is performed for each partition.
EXPLAIN (COSTS OFF)
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
QUERY PLAN
-------------------------------------------------------------------------------------
QUERY PLAN
-------------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
-> Finalize HashAggregate
-> Finalize GroupAggregate
Group Key: pagg_tab_para.y
Filter: (avg(pagg_tab_para.x) < '12'::numeric)
-> Gather
-> Gather Merge
Workers Planned: 2
-> Parallel Append
-> Partial HashAggregate
Group Key: pagg_tab_para.y
-> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-> Partial HashAggregate
Group Key: pagg_tab_para_1.y
-> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-> Partial HashAggregate
Group Key: pagg_tab_para_2.y
-> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
(17 rows)
-> Sort
Sort Key: pagg_tab_para.y
-> Parallel Append
-> Partial HashAggregate
Group Key: pagg_tab_para.y
-> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
-> Partial HashAggregate
Group Key: pagg_tab_para_1.y
-> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
-> Partial HashAggregate
Group Key: pagg_tab_para_2.y
-> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
(19 rows)
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
y | sum | avg | count

View File

@ -466,41 +466,52 @@ EXPLAIN (COSTS OFF)
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
WHERE a BETWEEN 490 AND 510
GROUP BY 1, 2 ORDER BY 1, 2;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
Group
Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-> Sort
-> Merge Append
Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-> Append
-> Merge Full Join
Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-> Sort
Sort Key: prt1_1.a, prt1_1.b
-> Seq Scan on prt1_p1 prt1_1
-> Sort
Sort Key: p2_1.a, p2_1.b
-> Seq Scan on prt2_p1 p2_1
-> Merge Full Join
Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-> Sort
Sort Key: prt1_2.a, prt1_2.b
-> Seq Scan on prt1_p2 prt1_2
-> Sort
Sort Key: p2_2.a, p2_2.b
-> Seq Scan on prt2_p2 p2_2
-> Merge Full Join
Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a))
Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
-> Sort
Sort Key: prt1_3.b, prt1_3.a
-> Seq Scan on prt1_p3 prt1_3
-> Sort
Sort Key: p2_3.b, p2_3.a
-> Seq Scan on prt2_p3 p2_3
(32 rows)
-> Group
Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-> Sort
Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
-> Merge Full Join
Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
-> Sort
Sort Key: prt1.a, prt1.b
-> Seq Scan on prt1_p1 prt1
-> Sort
Sort Key: p2.a, p2.b
-> Seq Scan on prt2_p1 p2
-> Group
Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-> Sort
Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
-> Merge Full Join
Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
-> Sort
Sort Key: prt1_1.a, prt1_1.b
-> Seq Scan on prt1_p2 prt1_1
-> Sort
Sort Key: p2_1.a, p2_1.b
-> Seq Scan on prt2_p2 p2_1
-> Group
Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-> Sort
Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
-> Merge Full Join
Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
-> Sort
Sort Key: prt1_2.a, prt1_2.b
-> Seq Scan on prt1_p3 prt1_2
-> Sort
Sort Key: p2_2.a, p2_2.b
-> Seq Scan on prt2_p3 p2_2
(43 rows)
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
WHERE a BETWEEN 490 AND 510

View File

@ -114,7 +114,6 @@ select name, setting from pg_settings where name like 'enable%';
enable_async_append | on
enable_bitmapscan | on
enable_gathermerge | on
enable_group_by_reordering | on
enable_hashagg | on
enable_hashjoin | on
enable_incremental_sort | on
@ -132,7 +131,7 @@ select name, setting from pg_settings where name like 'enable%';
enable_seqscan | on
enable_sort | on
enable_tidscan | on
(21 rows)
(20 rows)
-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
-- more-or-less working. We can't test their contents in any great detail

View File

@ -1303,22 +1303,24 @@ select distinct q1 from
union all
select distinct * from int8_tbl i82) ss
where q2 = q2;
QUERY PLAN
----------------------------------------------------
HashAggregate
Group Key: "*SELECT* 1".q1
-> Append
QUERY PLAN
----------------------------------------------------------
Unique
-> Merge Append
Sort Key: "*SELECT* 1".q1
-> Subquery Scan on "*SELECT* 1"
-> HashAggregate
Group Key: i81.q1, i81.q2
-> Seq Scan on int8_tbl i81
Filter: (q2 IS NOT NULL)
-> Unique
-> Sort
Sort Key: i81.q1, i81.q2
-> Seq Scan on int8_tbl i81
Filter: (q2 IS NOT NULL)
-> Subquery Scan on "*SELECT* 2"
-> HashAggregate
Group Key: i82.q1, i82.q2
-> Seq Scan on int8_tbl i82
Filter: (q2 IS NOT NULL)
(13 rows)
-> Unique
-> Sort
Sort Key: i82.q1, i82.q2
-> Seq Scan on int8_tbl i82
Filter: (q2 IS NOT NULL)
(15 rows)
select distinct q1 from
(select distinct * from int8_tbl i81
@ -1337,22 +1339,24 @@ select distinct q1 from
union all
select distinct * from int8_tbl i82) ss
where -q1 = q2;
QUERY PLAN
--------------------------------------------------
HashAggregate
Group Key: "*SELECT* 1".q1
-> Append
QUERY PLAN
--------------------------------------------------------
Unique
-> Merge Append
Sort Key: "*SELECT* 1".q1
-> Subquery Scan on "*SELECT* 1"
-> HashAggregate
Group Key: i81.q1, i81.q2
-> Seq Scan on int8_tbl i81
Filter: ((- q1) = q2)
-> Unique
-> Sort
Sort Key: i81.q1, i81.q2
-> Seq Scan on int8_tbl i81
Filter: ((- q1) = q2)
-> Subquery Scan on "*SELECT* 2"
-> HashAggregate
Group Key: i82.q1, i82.q2
-> Seq Scan on int8_tbl i82
Filter: ((- q1) = q2)
(13 rows)
-> Unique
-> Sort
Sort Key: i82.q1, i82.q2
-> Seq Scan on int8_tbl i82
Filter: ((- q1) = q2)
(15 rows)
select distinct q1 from
(select distinct * from int8_tbl i81

View File

@ -1025,105 +1025,6 @@ SELECT balk(hundred) FROM tenk1;
ROLLBACK;
-- GROUP BY optimization by reorder columns
SELECT
i AS id,
i/2 AS p,
format('%60s', i%2) AS v,
i/4 AS c,
i/8 AS d,
(random() * (10000/8))::int as e --the same as d but no correlation with p
INTO btg
FROM
generate_series(1, 10000) i;
VACUUM btg;
ANALYZE btg;
-- GROUP BY optimization by reorder columns by frequency
SET enable_hashagg=off;
SET max_parallel_workers= 0;
SET max_parallel_workers_per_gather = 0;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, v;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, c;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, d, c;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, d, e;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, e, d;
CREATE STATISTICS btg_dep ON d, e, p FROM btg;
ANALYZE btg;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, d, e;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, e, d;
-- GROUP BY optimization by reorder columns by index scan
CREATE INDEX ON btg(p, v);
SET enable_seqscan=off;
SET enable_bitmapscan=off;
VACUUM btg;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, v;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, c;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, c, p, d;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v;
DROP TABLE btg;
RESET enable_hashagg;
RESET max_parallel_workers;
RESET max_parallel_workers_per_gather;
RESET enable_seqscan;
RESET enable_bitmapscan;
-- Secondly test the case of a parallel aggregate combiner function
-- returning NULL. For that use normal transition function, but a
-- combiner function returning NULL.

View File

@ -213,7 +213,7 @@ set parallel_tuple_cost = 0;
set max_parallel_workers_per_gather = 2;
create table t (a int, b int, c int);
insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
create index on t (a);
analyze t;