postgresql/src/backend/nodes/outfuncs.c

3898 lines
89 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* outfuncs.c
* Output functions for Postgres tree nodes.
*
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/nodes/outfuncs.c
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
* have an output function defined here (as well as an input function
* in readfuncs.c). In addition, plan nodes should have input and
* output functions so that they can be sent to parallel workers.
* For use in debugging, we also provide output functions for nodes
* that appear in raw parsetrees and path. These nodes however need
* not have input functions.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include "lib/stringinfo.h"
#include "nodes/extensible.h"
#include "nodes/plannodes.h"
#include "nodes/relation.h"
1999-07-16 07:00:38 +02:00
#include "utils/datum.h"
#include "utils/rel.h"
/*
* Macros to simplify output of different kinds of fields. Use these
* wherever possible to reduce the chance for silly typos. Note that these
* hard-wire conventions about the names of the local variables in an Out
* routine.
*/
/* Write the label for the node type */
#define WRITE_NODE_TYPE(nodelabel) \
appendStringInfoString(str, nodelabel)
/* Write an integer field (anything written as ":fldname %d") */
#define WRITE_INT_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname)
/* Write an unsigned integer field (anything written as ":fldname %u") */
#define WRITE_UINT_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname)
/* Write an OID field (don't hard-wire assumption that OID is same as uint) */
#define WRITE_OID_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname)
/* Write a long-integer field */
#define WRITE_LONG_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %ld", node->fldname)
/* Write a char field (ie, one ascii character) */
#define WRITE_CHAR_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %c", node->fldname)
/* Write an enumerated-type field as an integer code */
#define WRITE_ENUM_FIELD(fldname, enumtype) \
appendStringInfo(str, " :" CppAsString(fldname) " %d", \
(int) node->fldname)
/* Write a float field --- caller must give format to define precision */
#define WRITE_FLOAT_FIELD(fldname,format) \
appendStringInfo(str, " :" CppAsString(fldname) " " format, node->fldname)
/* Write a boolean field */
#define WRITE_BOOL_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %s", \
booltostr(node->fldname))
/* Write a character-string (possibly NULL) field */
#define WRITE_STRING_FIELD(fldname) \
(appendStringInfo(str, " :" CppAsString(fldname) " "), \
_outToken(str, node->fldname))
/* Write a parse location field (actually same as INT case) */
#define WRITE_LOCATION_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %d", node->fldname)
/* Write a Node field */
#define WRITE_NODE_FIELD(fldname) \
(appendStringInfo(str, " :" CppAsString(fldname) " "), \
outNode(str, node->fldname))
/* Write a bitmapset field */
#define WRITE_BITMAPSET_FIELD(fldname) \
(appendStringInfo(str, " :" CppAsString(fldname) " "), \
_outBitmapset(str, node->fldname))
#define booltostr(x) ((x) ? "true" : "false")
/*
* _outToken
* Convert an ordinary string (eg, an identifier) into a form that
* will be decoded back to a plain token by read.c's functions.
*
* If a null or empty string is given, it is encoded as "<>".
*/
static void
_outToken(StringInfo str, const char *s)
{
if (s == NULL || *s == '\0')
{
appendStringInfoString(str, "<>");
return;
}
/*
2005-10-15 04:49:52 +02:00
* Look for characters or patterns that are treated specially by read.c
* (either in pg_strtok() or in nodeRead()), and therefore need a
* protective backslash.
*/
/* These characters only need to be quoted at the start of the string */
if (*s == '<' ||
*s == '"' ||
isdigit((unsigned char) *s) ||
((*s == '+' || *s == '-') &&
(isdigit((unsigned char) s[1]) || s[1] == '.')))
appendStringInfoChar(str, '\\');
while (*s)
{
/* These chars must be backslashed anywhere in the string */
if (*s == ' ' || *s == '\n' || *s == '\t' ||
*s == '(' || *s == ')' || *s == '{' || *s == '}' ||
*s == '\\')
appendStringInfoChar(str, '\\');
appendStringInfoChar(str, *s++);
}
}
/* for use by extensions which define extensible nodes */
void
outToken(StringInfo str, const char *s)
{
_outToken(str, s);
}
static void
_outList(StringInfo str, const List *node)
{
const ListCell *lc;
appendStringInfoChar(str, '(');
if (IsA(node, IntList))
appendStringInfoChar(str, 'i');
else if (IsA(node, OidList))
appendStringInfoChar(str, 'o');
2004-08-29 07:07:03 +02:00
foreach(lc, node)
{
/*
* For the sake of backward compatibility, we emit a slightly
2005-10-15 04:49:52 +02:00
* different whitespace format for lists of nodes vs. other types of
* lists. XXX: is this necessary?
*/
if (IsA(node, List))
{
outNode(str, lfirst(lc));
if (lnext(lc))
appendStringInfoChar(str, ' ');
}
else if (IsA(node, IntList))
appendStringInfo(str, " %d", lfirst_int(lc));
else if (IsA(node, OidList))
appendStringInfo(str, " %u", lfirst_oid(lc));
else
2004-08-29 07:07:03 +02:00
elog(ERROR, "unrecognized list node type: %d",
(int) node->type);
}
appendStringInfoChar(str, ')');
}
/*
* _outBitmapset -
* converts a bitmap set of integers
*
* Note: the output format is "(b int int ...)", similar to an integer List.
*/
static void
_outBitmapset(StringInfo str, const Bitmapset *bms)
{
int x;
appendStringInfoChar(str, '(');
appendStringInfoChar(str, 'b');
x = -1;
while ((x = bms_next_member(bms, x)) >= 0)
appendStringInfo(str, " %d", x);
appendStringInfoChar(str, ')');
}
/* for use by extensions which define extensible nodes */
void
outBitmapset(StringInfo str, const Bitmapset *bms)
{
_outBitmapset(str, bms);
}
/*
* Print the value of a Datum given its type.
*/
void
outDatum(StringInfo str, Datum value, int typlen, bool typbyval)
{
Size length,
i;
char *s;
length = datumGetSize(value, typbyval, typlen);
1999-01-21 17:08:55 +01:00
if (typbyval)
{
s = (char *) (&value);
appendStringInfo(str, "%u [ ", (unsigned int) length);
for (i = 0; i < (Size) sizeof(Datum); i++)
appendStringInfo(str, "%d ", (int) (s[i]));
appendStringInfoChar(str, ']');
}
else
{
s = (char *) DatumGetPointer(value);
if (!PointerIsValid(s))
appendStringInfoString(str, "0 [ ]");
else
{
appendStringInfo(str, "%u [ ", (unsigned int) length);
for (i = 0; i < length; i++)
appendStringInfo(str, "%d ", (int) (s[i]));
appendStringInfoChar(str, ']');
}
}
}
/*
* Stuff from plannodes.h
*/
static void
_outPlannedStmt(StringInfo str, const PlannedStmt *node)
{
WRITE_NODE_TYPE("PLANNEDSTMT");
WRITE_ENUM_FIELD(commandType, CmdType);
WRITE_UINT_FIELD(queryId);
WRITE_BOOL_FIELD(hasReturning);
WRITE_BOOL_FIELD(hasModifyingCTE);
WRITE_BOOL_FIELD(canSetTag);
WRITE_BOOL_FIELD(transientPlan);
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
WRITE_BOOL_FIELD(dependsOnRole);
WRITE_BOOL_FIELD(parallelModeNeeded);
WRITE_NODE_FIELD(planTree);
WRITE_NODE_FIELD(rtable);
WRITE_NODE_FIELD(resultRelations);
WRITE_NODE_FIELD(utilityStmt);
WRITE_NODE_FIELD(subplans);
WRITE_BITMAPSET_FIELD(rewindPlanIDs);
WRITE_NODE_FIELD(rowMarks);
WRITE_NODE_FIELD(relationOids);
WRITE_NODE_FIELD(invalItems);
WRITE_INT_FIELD(nParamExec);
}
/*
* print the basic stuff of all nodes that inherit from Plan
*/
static void
_outPlanInfo(StringInfo str, const Plan *node)
{
WRITE_FLOAT_FIELD(startup_cost, "%.2f");
WRITE_FLOAT_FIELD(total_cost, "%.2f");
WRITE_FLOAT_FIELD(plan_rows, "%.0f");
WRITE_INT_FIELD(plan_width);
WRITE_BOOL_FIELD(parallel_aware);
WRITE_INT_FIELD(plan_node_id);
WRITE_NODE_FIELD(targetlist);
WRITE_NODE_FIELD(qual);
WRITE_NODE_FIELD(lefttree);
WRITE_NODE_FIELD(righttree);
WRITE_NODE_FIELD(initPlan);
WRITE_BITMAPSET_FIELD(extParam);
WRITE_BITMAPSET_FIELD(allParam);
}
/*
* print the basic stuff of all nodes that inherit from Scan
*/
static void
_outScanInfo(StringInfo str, const Scan *node)
{
_outPlanInfo(str, (const Plan *) node);
WRITE_UINT_FIELD(scanrelid);
}
/*
* print the basic stuff of all nodes that inherit from Join
*/
static void
_outJoinPlanInfo(StringInfo str, const Join *node)
{
_outPlanInfo(str, (const Plan *) node);
WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_NODE_FIELD(joinqual);
}
static void
_outPlan(StringInfo str, const Plan *node)
{
WRITE_NODE_TYPE("PLAN");
_outPlanInfo(str, (const Plan *) node);
}
static void
_outResult(StringInfo str, const Result *node)
{
WRITE_NODE_TYPE("RESULT");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(resconstantqual);
}
static void
_outModifyTable(StringInfo str, const ModifyTable *node)
{
WRITE_NODE_TYPE("MODIFYTABLE");
_outPlanInfo(str, (const Plan *) node);
WRITE_ENUM_FIELD(operation, CmdType);
WRITE_BOOL_FIELD(canSetTag);
WRITE_UINT_FIELD(nominalRelation);
WRITE_NODE_FIELD(resultRelations);
WRITE_INT_FIELD(resultRelIndex);
WRITE_NODE_FIELD(plans);
WRITE_NODE_FIELD(withCheckOptionLists);
WRITE_NODE_FIELD(returningLists);
WRITE_NODE_FIELD(fdwPrivLists);
WRITE_BITMAPSET_FIELD(fdwDirectModifyPlans);
WRITE_NODE_FIELD(rowMarks);
WRITE_INT_FIELD(epqParam);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
WRITE_ENUM_FIELD(onConflictAction, OnConflictAction);
WRITE_NODE_FIELD(arbiterIndexes);
WRITE_NODE_FIELD(onConflictSet);
WRITE_NODE_FIELD(onConflictWhere);
WRITE_UINT_FIELD(exclRelRTI);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
WRITE_NODE_FIELD(exclRelTlist);
}
static void
_outAppend(StringInfo str, const Append *node)
{
WRITE_NODE_TYPE("APPEND");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(appendplans);
}
static void
_outMergeAppend(StringInfo str, const MergeAppend *node)
{
int i;
WRITE_NODE_TYPE("MERGEAPPEND");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(mergeplans);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :sortColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->sortColIdx[i]);
appendStringInfoString(str, " :sortOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->sortOperators[i]);
appendStringInfoString(str, " :collations");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->collations[i]);
appendStringInfoString(str, " :nullsFirst");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %s", booltostr(node->nullsFirst[i]));
}
static void
_outRecursiveUnion(StringInfo str, const RecursiveUnion *node)
{
int i;
WRITE_NODE_TYPE("RECURSIVEUNION");
_outPlanInfo(str, (const Plan *) node);
WRITE_INT_FIELD(wtParam);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :dupColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->dupColIdx[i]);
appendStringInfoString(str, " :dupOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->dupOperators[i]);
WRITE_LONG_FIELD(numGroups);
}
static void
_outBitmapAnd(StringInfo str, const BitmapAnd *node)
{
WRITE_NODE_TYPE("BITMAPAND");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(bitmapplans);
}
static void
_outBitmapOr(StringInfo str, const BitmapOr *node)
{
WRITE_NODE_TYPE("BITMAPOR");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(bitmapplans);
}
static void
_outGather(StringInfo str, const Gather *node)
{
WRITE_NODE_TYPE("GATHER");
_outPlanInfo(str, (const Plan *) node);
WRITE_INT_FIELD(num_workers);
WRITE_BOOL_FIELD(single_copy);
WRITE_BOOL_FIELD(invisible);
}
static void
_outScan(StringInfo str, const Scan *node)
{
WRITE_NODE_TYPE("SCAN");
_outScanInfo(str, node);
}
static void
_outSeqScan(StringInfo str, const SeqScan *node)
{
WRITE_NODE_TYPE("SEQSCAN");
_outScanInfo(str, (const Scan *) node);
}
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
static void
_outSampleScan(StringInfo str, const SampleScan *node)
{
WRITE_NODE_TYPE("SAMPLESCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(tablesample);
}
static void
_outIndexScan(StringInfo str, const IndexScan *node)
{
WRITE_NODE_TYPE("INDEXSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_OID_FIELD(indexid);
WRITE_NODE_FIELD(indexqual);
WRITE_NODE_FIELD(indexqualorig);
WRITE_NODE_FIELD(indexorderby);
WRITE_NODE_FIELD(indexorderbyorig);
WRITE_NODE_FIELD(indexorderbyops);
WRITE_ENUM_FIELD(indexorderdir, ScanDirection);
}
static void
_outIndexOnlyScan(StringInfo str, const IndexOnlyScan *node)
{
WRITE_NODE_TYPE("INDEXONLYSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_OID_FIELD(indexid);
WRITE_NODE_FIELD(indexqual);
WRITE_NODE_FIELD(indexorderby);
WRITE_NODE_FIELD(indextlist);
WRITE_ENUM_FIELD(indexorderdir, ScanDirection);
}
static void
_outBitmapIndexScan(StringInfo str, const BitmapIndexScan *node)
{
WRITE_NODE_TYPE("BITMAPINDEXSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_OID_FIELD(indexid);
WRITE_NODE_FIELD(indexqual);
WRITE_NODE_FIELD(indexqualorig);
}
static void
_outBitmapHeapScan(StringInfo str, const BitmapHeapScan *node)
{
WRITE_NODE_TYPE("BITMAPHEAPSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(bitmapqualorig);
}
static void
_outTidScan(StringInfo str, const TidScan *node)
{
WRITE_NODE_TYPE("TIDSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(tidquals);
}
static void
_outSubqueryScan(StringInfo str, const SubqueryScan *node)
{
WRITE_NODE_TYPE("SUBQUERYSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(subplan);
}
static void
_outFunctionScan(StringInfo str, const FunctionScan *node)
{
WRITE_NODE_TYPE("FUNCTIONSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(functions);
WRITE_BOOL_FIELD(funcordinality);
}
static void
_outValuesScan(StringInfo str, const ValuesScan *node)
{
WRITE_NODE_TYPE("VALUESSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_NODE_FIELD(values_lists);
}
static void
_outCteScan(StringInfo str, const CteScan *node)
{
WRITE_NODE_TYPE("CTESCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_INT_FIELD(ctePlanId);
WRITE_INT_FIELD(cteParam);
}
static void
_outWorkTableScan(StringInfo str, const WorkTableScan *node)
{
WRITE_NODE_TYPE("WORKTABLESCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_INT_FIELD(wtParam);
}
static void
_outForeignScan(StringInfo str, const ForeignScan *node)
{
WRITE_NODE_TYPE("FOREIGNSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_ENUM_FIELD(operation, CmdType);
Code review for foreign/custom join pushdown patch. Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design decisions that seem pretty questionable to me, and there was quite a lot of stuff not to like about the documentation and comments. Clean up as follows: * Consider foreign joins only between foreign tables on the same server, rather than between any two foreign tables with the same underlying FDW handler function. In most if not all cases, the FDW would simply have had to apply the same-server restriction itself (far more expensively, both for lack of caching and because it would be repeated for each combination of input sub-joins), or else risk nasty bugs. Anyone who's really intent on doing something outside this restriction can always use the set_join_pathlist_hook. * Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist to better reflect what they're for, and allow these custom scan tlists to be used even for base relations. * Change make_foreignscan() API to include passing the fdw_scan_tlist value, since the FDW is required to set that. Backwards compatibility doesn't seem like an adequate reason to expect FDWs to set it in some ad-hoc extra step, and anyway existing FDWs can just pass NIL. * Change the API of path-generating subroutines of add_paths_to_joinrel, and in particular that of GetForeignJoinPaths and set_join_pathlist_hook, so that various less-used parameters are passed in a struct rather than as separate parameter-list entries. The objective here is to reduce the probability that future additions to those parameter lists will result in source-level API breaks for users of these hooks. It's possible that this is even a small win for the core code, since most CPU architectures can't pass more than half a dozen parameters efficiently anyway. I kept root, joinrel, outerrel, innerrel, and jointype as separate parameters to reduce code churn in joinpath.c --- in particular, putting jointype into the struct would have been problematic because of the subroutines' habit of changing their local copies of that variable. * Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all right for it to know about IndexOnlyScan, but if the list is to grow we should refactor the knowledge out to the callers. * Restore nodeForeignscan.c's previous use of the relcache to avoid extra GetFdwRoutine lookups for base-relation scans. * Lots of cleanup of documentation and missed comments. Re-order some code additions into more logical places.
2015-05-10 20:36:30 +02:00
WRITE_OID_FIELD(fs_server);
Revise FDW planning API, again. Further reflection shows that a single callback isn't very workable if we desire to let FDWs generate multiple Paths, because that forces the FDW to do all work necessary to generate a valid Plan node for each Path. Instead split the former PlanForeignScan API into three steps: GetForeignRelSize, GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain, and it's substantially more flexible for complex FDWs. Add an fdw_private field to RelOptInfo so that the new functions can save state there rather than possibly having to recalculate information two or three times. In addition, we'd not thought through what would be needed to allow an FDW to set up subexpressions of its choice for runtime execution. We could treat ForeignScan.fdw_private as an executable expression but that seems likely to break existing FDWs unnecessarily (in particular, it would restrict the set of node types allowable in fdw_private to those supported by expression_tree_walker). Instead, invent a separate field fdw_exprs which will receive the postprocessing appropriate for expression trees. (One field is enough since it can be a list of expressions; also, we assume the corresponding expression state tree(s) will be held within fdw_state, so we don't need to add anything to ForeignScanState.) Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this further as we continue to work on that patch, but to me it feels a lot closer to being right now.
2012-03-09 18:48:48 +01:00
WRITE_NODE_FIELD(fdw_exprs);
WRITE_NODE_FIELD(fdw_private);
Code review for foreign/custom join pushdown patch. Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design decisions that seem pretty questionable to me, and there was quite a lot of stuff not to like about the documentation and comments. Clean up as follows: * Consider foreign joins only between foreign tables on the same server, rather than between any two foreign tables with the same underlying FDW handler function. In most if not all cases, the FDW would simply have had to apply the same-server restriction itself (far more expensively, both for lack of caching and because it would be repeated for each combination of input sub-joins), or else risk nasty bugs. Anyone who's really intent on doing something outside this restriction can always use the set_join_pathlist_hook. * Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist to better reflect what they're for, and allow these custom scan tlists to be used even for base relations. * Change make_foreignscan() API to include passing the fdw_scan_tlist value, since the FDW is required to set that. Backwards compatibility doesn't seem like an adequate reason to expect FDWs to set it in some ad-hoc extra step, and anyway existing FDWs can just pass NIL. * Change the API of path-generating subroutines of add_paths_to_joinrel, and in particular that of GetForeignJoinPaths and set_join_pathlist_hook, so that various less-used parameters are passed in a struct rather than as separate parameter-list entries. The objective here is to reduce the probability that future additions to those parameter lists will result in source-level API breaks for users of these hooks. It's possible that this is even a small win for the core code, since most CPU architectures can't pass more than half a dozen parameters efficiently anyway. I kept root, joinrel, outerrel, innerrel, and jointype as separate parameters to reduce code churn in joinpath.c --- in particular, putting jointype into the struct would have been problematic because of the subroutines' habit of changing their local copies of that variable. * Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all right for it to know about IndexOnlyScan, but if the list is to grow we should refactor the knowledge out to the callers. * Restore nodeForeignscan.c's previous use of the relcache to avoid extra GetFdwRoutine lookups for base-relation scans. * Lots of cleanup of documentation and missed comments. Re-order some code additions into more logical places.
2015-05-10 20:36:30 +02:00
WRITE_NODE_FIELD(fdw_scan_tlist);
WRITE_NODE_FIELD(fdw_recheck_quals);
Code review for foreign/custom join pushdown patch. Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design decisions that seem pretty questionable to me, and there was quite a lot of stuff not to like about the documentation and comments. Clean up as follows: * Consider foreign joins only between foreign tables on the same server, rather than between any two foreign tables with the same underlying FDW handler function. In most if not all cases, the FDW would simply have had to apply the same-server restriction itself (far more expensively, both for lack of caching and because it would be repeated for each combination of input sub-joins), or else risk nasty bugs. Anyone who's really intent on doing something outside this restriction can always use the set_join_pathlist_hook. * Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist to better reflect what they're for, and allow these custom scan tlists to be used even for base relations. * Change make_foreignscan() API to include passing the fdw_scan_tlist value, since the FDW is required to set that. Backwards compatibility doesn't seem like an adequate reason to expect FDWs to set it in some ad-hoc extra step, and anyway existing FDWs can just pass NIL. * Change the API of path-generating subroutines of add_paths_to_joinrel, and in particular that of GetForeignJoinPaths and set_join_pathlist_hook, so that various less-used parameters are passed in a struct rather than as separate parameter-list entries. The objective here is to reduce the probability that future additions to those parameter lists will result in source-level API breaks for users of these hooks. It's possible that this is even a small win for the core code, since most CPU architectures can't pass more than half a dozen parameters efficiently anyway. I kept root, joinrel, outerrel, innerrel, and jointype as separate parameters to reduce code churn in joinpath.c --- in particular, putting jointype into the struct would have been problematic because of the subroutines' habit of changing their local copies of that variable. * Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all right for it to know about IndexOnlyScan, but if the list is to grow we should refactor the knowledge out to the callers. * Restore nodeForeignscan.c's previous use of the relcache to avoid extra GetFdwRoutine lookups for base-relation scans. * Lots of cleanup of documentation and missed comments. Re-order some code additions into more logical places.
2015-05-10 20:36:30 +02:00
WRITE_BITMAPSET_FIELD(fs_relids);
Revise FDW planning API, again. Further reflection shows that a single callback isn't very workable if we desire to let FDWs generate multiple Paths, because that forces the FDW to do all work necessary to generate a valid Plan node for each Path. Instead split the former PlanForeignScan API into three steps: GetForeignRelSize, GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain, and it's substantially more flexible for complex FDWs. Add an fdw_private field to RelOptInfo so that the new functions can save state there rather than possibly having to recalculate information two or three times. In addition, we'd not thought through what would be needed to allow an FDW to set up subexpressions of its choice for runtime execution. We could treat ForeignScan.fdw_private as an executable expression but that seems likely to break existing FDWs unnecessarily (in particular, it would restrict the set of node types allowable in fdw_private to those supported by expression_tree_walker). Instead, invent a separate field fdw_exprs which will receive the postprocessing appropriate for expression trees. (One field is enough since it can be a list of expressions; also, we assume the corresponding expression state tree(s) will be held within fdw_state, so we don't need to add anything to ForeignScanState.) Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this further as we continue to work on that patch, but to me it feels a lot closer to being right now.
2012-03-09 18:48:48 +01:00
WRITE_BOOL_FIELD(fsSystemCol);
}
static void
_outCustomScan(StringInfo str, const CustomScan *node)
{
WRITE_NODE_TYPE("CUSTOMSCAN");
_outScanInfo(str, (const Scan *) node);
WRITE_UINT_FIELD(flags);
WRITE_NODE_FIELD(custom_plans);
WRITE_NODE_FIELD(custom_exprs);
WRITE_NODE_FIELD(custom_private);
Code review for foreign/custom join pushdown patch. Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design decisions that seem pretty questionable to me, and there was quite a lot of stuff not to like about the documentation and comments. Clean up as follows: * Consider foreign joins only between foreign tables on the same server, rather than between any two foreign tables with the same underlying FDW handler function. In most if not all cases, the FDW would simply have had to apply the same-server restriction itself (far more expensively, both for lack of caching and because it would be repeated for each combination of input sub-joins), or else risk nasty bugs. Anyone who's really intent on doing something outside this restriction can always use the set_join_pathlist_hook. * Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist to better reflect what they're for, and allow these custom scan tlists to be used even for base relations. * Change make_foreignscan() API to include passing the fdw_scan_tlist value, since the FDW is required to set that. Backwards compatibility doesn't seem like an adequate reason to expect FDWs to set it in some ad-hoc extra step, and anyway existing FDWs can just pass NIL. * Change the API of path-generating subroutines of add_paths_to_joinrel, and in particular that of GetForeignJoinPaths and set_join_pathlist_hook, so that various less-used parameters are passed in a struct rather than as separate parameter-list entries. The objective here is to reduce the probability that future additions to those parameter lists will result in source-level API breaks for users of these hooks. It's possible that this is even a small win for the core code, since most CPU architectures can't pass more than half a dozen parameters efficiently anyway. I kept root, joinrel, outerrel, innerrel, and jointype as separate parameters to reduce code churn in joinpath.c --- in particular, putting jointype into the struct would have been problematic because of the subroutines' habit of changing their local copies of that variable. * Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all right for it to know about IndexOnlyScan, but if the list is to grow we should refactor the knowledge out to the callers. * Restore nodeForeignscan.c's previous use of the relcache to avoid extra GetFdwRoutine lookups for base-relation scans. * Lots of cleanup of documentation and missed comments. Re-order some code additions into more logical places.
2015-05-10 20:36:30 +02:00
WRITE_NODE_FIELD(custom_scan_tlist);
WRITE_BITMAPSET_FIELD(custom_relids);
/* CustomName is a key to lookup CustomScanMethods */
appendStringInfoString(str, " :methods ");
_outToken(str, node->methods->CustomName);
}
static void
_outJoin(StringInfo str, const Join *node)
{
WRITE_NODE_TYPE("JOIN");
_outJoinPlanInfo(str, (const Join *) node);
}
static void
_outNestLoop(StringInfo str, const NestLoop *node)
{
WRITE_NODE_TYPE("NESTLOOP");
_outJoinPlanInfo(str, (const Join *) node);
WRITE_NODE_FIELD(nestParams);
}
static void
_outMergeJoin(StringInfo str, const MergeJoin *node)
{
int numCols;
int i;
WRITE_NODE_TYPE("MERGEJOIN");
_outJoinPlanInfo(str, (const Join *) node);
WRITE_NODE_FIELD(mergeclauses);
numCols = list_length(node->mergeclauses);
appendStringInfoString(str, " :mergeFamilies");
for (i = 0; i < numCols; i++)
appendStringInfo(str, " %u", node->mergeFamilies[i]);
appendStringInfoString(str, " :mergeCollations");
for (i = 0; i < numCols; i++)
appendStringInfo(str, " %u", node->mergeCollations[i]);
appendStringInfoString(str, " :mergeStrategies");
for (i = 0; i < numCols; i++)
appendStringInfo(str, " %d", node->mergeStrategies[i]);
appendStringInfoString(str, " :mergeNullsFirst");
for (i = 0; i < numCols; i++)
appendStringInfo(str, " %s", booltostr(node->mergeNullsFirst[i]));
}
static void
_outHashJoin(StringInfo str, const HashJoin *node)
{
WRITE_NODE_TYPE("HASHJOIN");
_outJoinPlanInfo(str, (const Join *) node);
WRITE_NODE_FIELD(hashclauses);
}
static void
_outAgg(StringInfo str, const Agg *node)
{
int i;
WRITE_NODE_TYPE("AGG");
_outPlanInfo(str, (const Plan *) node);
WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
WRITE_ENUM_FIELD(aggsplit, AggSplit);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :grpColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->grpColIdx[i]);
appendStringInfoString(str, " :grpOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->grpOperators[i]);
WRITE_LONG_FIELD(numGroups);
WRITE_BITMAPSET_FIELD(aggParams);
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
WRITE_NODE_FIELD(groupingSets);
WRITE_NODE_FIELD(chain);
}
static void
_outWindowAgg(StringInfo str, const WindowAgg *node)
{
int i;
WRITE_NODE_TYPE("WINDOWAGG");
_outPlanInfo(str, (const Plan *) node);
WRITE_UINT_FIELD(winref);
WRITE_INT_FIELD(partNumCols);
appendStringInfoString(str, " :partColIdx");
for (i = 0; i < node->partNumCols; i++)
appendStringInfo(str, " %d", node->partColIdx[i]);
appendStringInfoString(str, " :partOperations");
for (i = 0; i < node->partNumCols; i++)
appendStringInfo(str, " %u", node->partOperators[i]);
WRITE_INT_FIELD(ordNumCols);
appendStringInfoString(str, " :ordColIdx");
for (i = 0; i < node->ordNumCols; i++)
appendStringInfo(str, " %d", node->ordColIdx[i]);
appendStringInfoString(str, " :ordOperations");
for (i = 0; i < node->ordNumCols; i++)
appendStringInfo(str, " %u", node->ordOperators[i]);
WRITE_INT_FIELD(frameOptions);
WRITE_NODE_FIELD(startOffset);
WRITE_NODE_FIELD(endOffset);
}
static void
_outGroup(StringInfo str, const Group *node)
{
int i;
WRITE_NODE_TYPE("GROUP");
_outPlanInfo(str, (const Plan *) node);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :grpColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->grpColIdx[i]);
appendStringInfoString(str, " :grpOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->grpOperators[i]);
}
static void
_outMaterial(StringInfo str, const Material *node)
{
WRITE_NODE_TYPE("MATERIAL");
_outPlanInfo(str, (const Plan *) node);
}
static void
_outSort(StringInfo str, const Sort *node)
{
int i;
WRITE_NODE_TYPE("SORT");
_outPlanInfo(str, (const Plan *) node);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :sortColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->sortColIdx[i]);
appendStringInfoString(str, " :sortOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->sortOperators[i]);
appendStringInfoString(str, " :collations");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->collations[i]);
appendStringInfoString(str, " :nullsFirst");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %s", booltostr(node->nullsFirst[i]));
}
static void
_outUnique(StringInfo str, const Unique *node)
{
2001-03-22 05:01:46 +01:00
int i;
WRITE_NODE_TYPE("UNIQUE");
_outPlanInfo(str, (const Plan *) node);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :uniqColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->uniqColIdx[i]);
appendStringInfoString(str, " :uniqOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->uniqOperators[i]);
}
static void
_outHash(StringInfo str, const Hash *node)
{
WRITE_NODE_TYPE("HASH");
_outPlanInfo(str, (const Plan *) node);
WRITE_OID_FIELD(skewTable);
WRITE_INT_FIELD(skewColumn);
WRITE_BOOL_FIELD(skewInherit);
WRITE_OID_FIELD(skewColType);
WRITE_INT_FIELD(skewColTypmod);
}
static void
_outSetOp(StringInfo str, const SetOp *node)
{
2001-03-22 05:01:46 +01:00
int i;
WRITE_NODE_TYPE("SETOP");
_outPlanInfo(str, (const Plan *) node);
WRITE_ENUM_FIELD(cmd, SetOpCmd);
WRITE_ENUM_FIELD(strategy, SetOpStrategy);
WRITE_INT_FIELD(numCols);
appendStringInfoString(str, " :dupColIdx");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %d", node->dupColIdx[i]);
appendStringInfoString(str, " :dupOperators");
for (i = 0; i < node->numCols; i++)
appendStringInfo(str, " %u", node->dupOperators[i]);
WRITE_INT_FIELD(flagColIdx);
WRITE_INT_FIELD(firstFlag);
WRITE_LONG_FIELD(numGroups);
}
static void
_outLockRows(StringInfo str, const LockRows *node)
{
WRITE_NODE_TYPE("LOCKROWS");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(rowMarks);
WRITE_INT_FIELD(epqParam);
}
static void
_outLimit(StringInfo str, const Limit *node)
{
WRITE_NODE_TYPE("LIMIT");
_outPlanInfo(str, (const Plan *) node);
WRITE_NODE_FIELD(limitOffset);
WRITE_NODE_FIELD(limitCount);
}
static void
_outNestLoopParam(StringInfo str, const NestLoopParam *node)
{
WRITE_NODE_TYPE("NESTLOOPPARAM");
WRITE_INT_FIELD(paramno);
WRITE_NODE_FIELD(paramval);
}
static void
_outPlanRowMark(StringInfo str, const PlanRowMark *node)
{
WRITE_NODE_TYPE("PLANROWMARK");
WRITE_UINT_FIELD(rti);
WRITE_UINT_FIELD(prti);
WRITE_UINT_FIELD(rowmarkId);
WRITE_ENUM_FIELD(markType, RowMarkType);
2015-03-15 23:41:47 +01:00
WRITE_INT_FIELD(allMarkTypes);
WRITE_ENUM_FIELD(strength, LockClauseStrength);
WRITE_ENUM_FIELD(waitPolicy, LockWaitPolicy);
WRITE_BOOL_FIELD(isParent);
}
static void
_outPlanInvalItem(StringInfo str, const PlanInvalItem *node)
{
WRITE_NODE_TYPE("PLANINVALITEM");
WRITE_INT_FIELD(cacheId);
WRITE_UINT_FIELD(hashValue);
}
/*****************************************************************************
*
* Stuff from primnodes.h.
*
*****************************************************************************/
static void
_outAlias(StringInfo str, const Alias *node)
{
WRITE_NODE_TYPE("ALIAS");
WRITE_STRING_FIELD(aliasname);
WRITE_NODE_FIELD(colnames);
}
static void
_outRangeVar(StringInfo str, const RangeVar *node)
{
WRITE_NODE_TYPE("RANGEVAR");
/*
* we deliberately ignore catalogname here, since it is presently not
* semantically meaningful
*/
WRITE_STRING_FIELD(schemaname);
WRITE_STRING_FIELD(relname);
WRITE_ENUM_FIELD(inhOpt, InhOption);
WRITE_CHAR_FIELD(relpersistence);
WRITE_NODE_FIELD(alias);
WRITE_LOCATION_FIELD(location);
}
static void
_outIntoClause(StringInfo str, const IntoClause *node)
{
WRITE_NODE_TYPE("INTOCLAUSE");
WRITE_NODE_FIELD(rel);
WRITE_NODE_FIELD(colNames);
WRITE_NODE_FIELD(options);
WRITE_ENUM_FIELD(onCommit, OnCommitAction);
WRITE_STRING_FIELD(tableSpaceName);
WRITE_NODE_FIELD(viewQuery);
WRITE_BOOL_FIELD(skipData);
}
static void
_outVar(StringInfo str, const Var *node)
{
WRITE_NODE_TYPE("VAR");
WRITE_UINT_FIELD(varno);
WRITE_INT_FIELD(varattno);
WRITE_OID_FIELD(vartype);
WRITE_INT_FIELD(vartypmod);
WRITE_OID_FIELD(varcollid);
WRITE_UINT_FIELD(varlevelsup);
WRITE_UINT_FIELD(varnoold);
WRITE_INT_FIELD(varoattno);
WRITE_LOCATION_FIELD(location);
}
static void
_outConst(StringInfo str, const Const *node)
{
WRITE_NODE_TYPE("CONST");
WRITE_OID_FIELD(consttype);
WRITE_INT_FIELD(consttypmod);
WRITE_OID_FIELD(constcollid);
WRITE_INT_FIELD(constlen);
WRITE_BOOL_FIELD(constbyval);
WRITE_BOOL_FIELD(constisnull);
WRITE_LOCATION_FIELD(location);
appendStringInfoString(str, " :constvalue ");
if (node->constisnull)
appendStringInfoString(str, "<>");
else
outDatum(str, node->constvalue, node->constlen, node->constbyval);
}
static void
_outParam(StringInfo str, const Param *node)
{
WRITE_NODE_TYPE("PARAM");
WRITE_ENUM_FIELD(paramkind, ParamKind);
WRITE_INT_FIELD(paramid);
WRITE_OID_FIELD(paramtype);
WRITE_INT_FIELD(paramtypmod);
WRITE_OID_FIELD(paramcollid);
WRITE_LOCATION_FIELD(location);
}
static void
_outAggref(StringInfo str, const Aggref *node)
{
WRITE_NODE_TYPE("AGGREF");
WRITE_OID_FIELD(aggfnoid);
WRITE_OID_FIELD(aggtype);
WRITE_OID_FIELD(aggcollid);
WRITE_OID_FIELD(inputcollid);
Fix handling of argument and result datatypes for partial aggregation. When doing partial aggregation, the args list of the upper (combining) Aggref node is replaced by a Var representing the output of the partial aggregation steps, which has either the aggregate's transition data type or a serialized representation of that. However, nodeAgg.c blindly continued to use the args list as an indication of the user-level argument types. This broke resolution of polymorphic transition datatypes at executor startup (though it accidentally failed to fail for the ANYARRAY case, which is likely the only one anyone had tested). Moreover, the constructed FuncExpr passed to the finalfunc contained completely wrong information, which would have led to bogus answers or crashes for any case where the finalfunc examined that information (which is only likely to be with polymorphic aggregates using a non-polymorphic transition type). As an independent bug, apply_partialaggref_adjustment neglected to resolve a polymorphic transition datatype before assigning it as the output type of the lower-level Aggref node. This again accidentally failed to fail for ANYARRAY but would be unlikely to work in other cases. To fix the first problem, record the user-level argument types in a separate OID-list field of Aggref, and look to that rather than the args list when asking what the argument types were. (It turns out to be convenient to include any "direct" arguments in this list too, although those are not currently subject to being overwritten.) Rather than adding yet another resolve_aggregate_transtype() call to fix the second problem, add an aggtranstype field to Aggref, and store the resolved transition type OID there when the planner first computes it. (By doing this in the planner and not the parser, we can allow the aggregate's transition type to change from time to time, although no DDL support yet exists for that.) This saves nothing of consequence for simple non-polymorphic aggregates, but for polymorphic transition types we save a catalog lookup during executor startup as well as several planner lookups that are new in 9.6 due to parallel query planning. In passing, fix an error that was introduced into count_agg_clauses_walker some time ago: it was applying exprTypmod() to something that wasn't an expression node at all, but a TargetEntry. exprTypmod silently returned -1 so that there was not an obvious failure, but this broke the intended sensitivity of aggregate space consumption estimates to the typmod of varchar and similar data types. This part needs to be back-patched. Catversion bump due to change of stored Aggref nodes. Discussion: <8229.1466109074@sss.pgh.pa.us>
2016-06-18 03:44:37 +02:00
WRITE_OID_FIELD(aggtranstype);
WRITE_NODE_FIELD(aggargtypes);
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
WRITE_NODE_FIELD(aggdirectargs);
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(aggorder);
WRITE_NODE_FIELD(aggdistinct);
WRITE_NODE_FIELD(aggfilter);
WRITE_BOOL_FIELD(aggstar);
WRITE_BOOL_FIELD(aggvariadic);
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
WRITE_CHAR_FIELD(aggkind);
WRITE_UINT_FIELD(agglevelsup);
WRITE_ENUM_FIELD(aggsplit, AggSplit);
WRITE_LOCATION_FIELD(location);
}
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
static void
_outGroupingFunc(StringInfo str, const GroupingFunc *node)
{
WRITE_NODE_TYPE("GROUPINGFUNC");
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(refs);
WRITE_NODE_FIELD(cols);
WRITE_UINT_FIELD(agglevelsup);
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
WRITE_LOCATION_FIELD(location);
}
static void
_outWindowFunc(StringInfo str, const WindowFunc *node)
{
WRITE_NODE_TYPE("WINDOWFUNC");
WRITE_OID_FIELD(winfnoid);
WRITE_OID_FIELD(wintype);
WRITE_OID_FIELD(wincollid);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(aggfilter);
WRITE_UINT_FIELD(winref);
WRITE_BOOL_FIELD(winstar);
WRITE_BOOL_FIELD(winagg);
WRITE_LOCATION_FIELD(location);
}
static void
_outArrayRef(StringInfo str, const ArrayRef *node)
{
WRITE_NODE_TYPE("ARRAYREF");
WRITE_OID_FIELD(refarraytype);
WRITE_OID_FIELD(refelemtype);
WRITE_INT_FIELD(reftypmod);
WRITE_OID_FIELD(refcollid);
WRITE_NODE_FIELD(refupperindexpr);
WRITE_NODE_FIELD(reflowerindexpr);
WRITE_NODE_FIELD(refexpr);
WRITE_NODE_FIELD(refassgnexpr);
}
static void
_outFuncExpr(StringInfo str, const FuncExpr *node)
{
WRITE_NODE_TYPE("FUNCEXPR");
WRITE_OID_FIELD(funcid);
WRITE_OID_FIELD(funcresulttype);
WRITE_BOOL_FIELD(funcretset);
WRITE_BOOL_FIELD(funcvariadic);
WRITE_ENUM_FIELD(funcformat, CoercionForm);
WRITE_OID_FIELD(funccollid);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outNamedArgExpr(StringInfo str, const NamedArgExpr *node)
{
WRITE_NODE_TYPE("NAMEDARGEXPR");
WRITE_NODE_FIELD(arg);
WRITE_STRING_FIELD(name);
WRITE_INT_FIELD(argnumber);
WRITE_LOCATION_FIELD(location);
}
static void
_outOpExpr(StringInfo str, const OpExpr *node)
{
WRITE_NODE_TYPE("OPEXPR");
WRITE_OID_FIELD(opno);
WRITE_OID_FIELD(opfuncid);
WRITE_OID_FIELD(opresulttype);
WRITE_BOOL_FIELD(opretset);
WRITE_OID_FIELD(opcollid);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outDistinctExpr(StringInfo str, const DistinctExpr *node)
{
WRITE_NODE_TYPE("DISTINCTEXPR");
WRITE_OID_FIELD(opno);
WRITE_OID_FIELD(opfuncid);
WRITE_OID_FIELD(opresulttype);
WRITE_BOOL_FIELD(opretset);
WRITE_OID_FIELD(opcollid);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outNullIfExpr(StringInfo str, const NullIfExpr *node)
{
WRITE_NODE_TYPE("NULLIFEXPR");
WRITE_OID_FIELD(opno);
WRITE_OID_FIELD(opfuncid);
WRITE_OID_FIELD(opresulttype);
WRITE_BOOL_FIELD(opretset);
WRITE_OID_FIELD(opcollid);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outScalarArrayOpExpr(StringInfo str, const ScalarArrayOpExpr *node)
{
WRITE_NODE_TYPE("SCALARARRAYOPEXPR");
WRITE_OID_FIELD(opno);
WRITE_OID_FIELD(opfuncid);
WRITE_BOOL_FIELD(useOr);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outBoolExpr(StringInfo str, const BoolExpr *node)
{
char *opstr = NULL;
WRITE_NODE_TYPE("BOOLEXPR");
/* do-it-yourself enum representation */
switch (node->boolop)
{
case AND_EXPR:
opstr = "and";
break;
case OR_EXPR:
opstr = "or";
break;
case NOT_EXPR:
opstr = "not";
break;
}
appendStringInfoString(str, " :boolop ");
_outToken(str, opstr);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outSubLink(StringInfo str, const SubLink *node)
{
WRITE_NODE_TYPE("SUBLINK");
WRITE_ENUM_FIELD(subLinkType, SubLinkType);
WRITE_INT_FIELD(subLinkId);
WRITE_NODE_FIELD(testexpr);
WRITE_NODE_FIELD(operName);
WRITE_NODE_FIELD(subselect);
WRITE_LOCATION_FIELD(location);
}
static void
_outSubPlan(StringInfo str, const SubPlan *node)
{
WRITE_NODE_TYPE("SUBPLAN");
WRITE_ENUM_FIELD(subLinkType, SubLinkType);
WRITE_NODE_FIELD(testexpr);
WRITE_NODE_FIELD(paramIds);
WRITE_INT_FIELD(plan_id);
WRITE_STRING_FIELD(plan_name);
WRITE_OID_FIELD(firstColType);
WRITE_INT_FIELD(firstColTypmod);
WRITE_OID_FIELD(firstColCollation);
WRITE_BOOL_FIELD(useHashTable);
WRITE_BOOL_FIELD(unknownEqFalse);
WRITE_NODE_FIELD(setParam);
WRITE_NODE_FIELD(parParam);
WRITE_NODE_FIELD(args);
WRITE_FLOAT_FIELD(startup_cost, "%.2f");
WRITE_FLOAT_FIELD(per_call_cost, "%.2f");
}
static void
_outAlternativeSubPlan(StringInfo str, const AlternativeSubPlan *node)
{
WRITE_NODE_TYPE("ALTERNATIVESUBPLAN");
WRITE_NODE_FIELD(subplans);
}
static void
_outFieldSelect(StringInfo str, const FieldSelect *node)
{
WRITE_NODE_TYPE("FIELDSELECT");
WRITE_NODE_FIELD(arg);
WRITE_INT_FIELD(fieldnum);
WRITE_OID_FIELD(resulttype);
WRITE_INT_FIELD(resulttypmod);
WRITE_OID_FIELD(resultcollid);
}
static void
_outFieldStore(StringInfo str, const FieldStore *node)
{
WRITE_NODE_TYPE("FIELDSTORE");
WRITE_NODE_FIELD(arg);
WRITE_NODE_FIELD(newvals);
WRITE_NODE_FIELD(fieldnums);
WRITE_OID_FIELD(resulttype);
}
static void
_outRelabelType(StringInfo str, const RelabelType *node)
{
WRITE_NODE_TYPE("RELABELTYPE");
WRITE_NODE_FIELD(arg);
WRITE_OID_FIELD(resulttype);
WRITE_INT_FIELD(resulttypmod);
WRITE_OID_FIELD(resultcollid);
WRITE_ENUM_FIELD(relabelformat, CoercionForm);
WRITE_LOCATION_FIELD(location);
}
static void
_outCoerceViaIO(StringInfo str, const CoerceViaIO *node)
{
WRITE_NODE_TYPE("COERCEVIAIO");
WRITE_NODE_FIELD(arg);
WRITE_OID_FIELD(resulttype);
WRITE_OID_FIELD(resultcollid);
WRITE_ENUM_FIELD(coerceformat, CoercionForm);
WRITE_LOCATION_FIELD(location);
}
static void
_outArrayCoerceExpr(StringInfo str, const ArrayCoerceExpr *node)
{
WRITE_NODE_TYPE("ARRAYCOERCEEXPR");
WRITE_NODE_FIELD(arg);
WRITE_OID_FIELD(elemfuncid);
WRITE_OID_FIELD(resulttype);
WRITE_INT_FIELD(resulttypmod);
WRITE_OID_FIELD(resultcollid);
WRITE_BOOL_FIELD(isExplicit);
WRITE_ENUM_FIELD(coerceformat, CoercionForm);
WRITE_LOCATION_FIELD(location);
}
static void
_outConvertRowtypeExpr(StringInfo str, const ConvertRowtypeExpr *node)
{
WRITE_NODE_TYPE("CONVERTROWTYPEEXPR");
WRITE_NODE_FIELD(arg);
WRITE_OID_FIELD(resulttype);
WRITE_ENUM_FIELD(convertformat, CoercionForm);
WRITE_LOCATION_FIELD(location);
}
static void
_outCollateExpr(StringInfo str, const CollateExpr *node)
{
WRITE_NODE_TYPE("COLLATE");
WRITE_NODE_FIELD(arg);
WRITE_OID_FIELD(collOid);
WRITE_LOCATION_FIELD(location);
}
static void
_outCaseExpr(StringInfo str, const CaseExpr *node)
{
WRITE_NODE_TYPE("CASE");
WRITE_OID_FIELD(casetype);
WRITE_OID_FIELD(casecollid);
WRITE_NODE_FIELD(arg);
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(defresult);
WRITE_LOCATION_FIELD(location);
}
static void
_outCaseWhen(StringInfo str, const CaseWhen *node)
{
WRITE_NODE_TYPE("WHEN");
WRITE_NODE_FIELD(expr);
WRITE_NODE_FIELD(result);
WRITE_LOCATION_FIELD(location);
}
static void
_outCaseTestExpr(StringInfo str, const CaseTestExpr *node)
{
WRITE_NODE_TYPE("CASETESTEXPR");
WRITE_OID_FIELD(typeId);
WRITE_INT_FIELD(typeMod);
WRITE_OID_FIELD(collation);
}
static void
_outArrayExpr(StringInfo str, const ArrayExpr *node)
{
WRITE_NODE_TYPE("ARRAY");
WRITE_OID_FIELD(array_typeid);
WRITE_OID_FIELD(array_collid);
WRITE_OID_FIELD(element_typeid);
WRITE_NODE_FIELD(elements);
WRITE_BOOL_FIELD(multidims);
WRITE_LOCATION_FIELD(location);
}
static void
_outRowExpr(StringInfo str, const RowExpr *node)
{
WRITE_NODE_TYPE("ROW");
WRITE_NODE_FIELD(args);
WRITE_OID_FIELD(row_typeid);
WRITE_ENUM_FIELD(row_format, CoercionForm);
WRITE_NODE_FIELD(colnames);
WRITE_LOCATION_FIELD(location);
}
static void
_outRowCompareExpr(StringInfo str, const RowCompareExpr *node)
{
WRITE_NODE_TYPE("ROWCOMPARE");
WRITE_ENUM_FIELD(rctype, RowCompareType);
WRITE_NODE_FIELD(opnos);
WRITE_NODE_FIELD(opfamilies);
WRITE_NODE_FIELD(inputcollids);
WRITE_NODE_FIELD(largs);
WRITE_NODE_FIELD(rargs);
}
static void
_outCoalesceExpr(StringInfo str, const CoalesceExpr *node)
{
WRITE_NODE_TYPE("COALESCE");
WRITE_OID_FIELD(coalescetype);
WRITE_OID_FIELD(coalescecollid);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outMinMaxExpr(StringInfo str, const MinMaxExpr *node)
{
WRITE_NODE_TYPE("MINMAX");
WRITE_OID_FIELD(minmaxtype);
WRITE_OID_FIELD(minmaxcollid);
WRITE_OID_FIELD(inputcollid);
WRITE_ENUM_FIELD(op, MinMaxOp);
WRITE_NODE_FIELD(args);
WRITE_LOCATION_FIELD(location);
}
static void
_outSQLValueFunction(StringInfo str, const SQLValueFunction *node)
{
WRITE_NODE_TYPE("SQLVALUEFUNCTION");
WRITE_ENUM_FIELD(op, SQLValueFunctionOp);
WRITE_OID_FIELD(type);
WRITE_INT_FIELD(typmod);
WRITE_LOCATION_FIELD(location);
}
static void
_outXmlExpr(StringInfo str, const XmlExpr *node)
{
WRITE_NODE_TYPE("XMLEXPR");
2007-11-15 22:14:46 +01:00
WRITE_ENUM_FIELD(op, XmlExprOp);
WRITE_STRING_FIELD(name);
WRITE_NODE_FIELD(named_args);
WRITE_NODE_FIELD(arg_names);
WRITE_NODE_FIELD(args);
WRITE_ENUM_FIELD(xmloption, XmlOptionType);
WRITE_OID_FIELD(type);
WRITE_INT_FIELD(typmod);
WRITE_LOCATION_FIELD(location);
}
static void
_outNullTest(StringInfo str, const NullTest *node)
{
WRITE_NODE_TYPE("NULLTEST");
WRITE_NODE_FIELD(arg);
WRITE_ENUM_FIELD(nulltesttype, NullTestType);
WRITE_BOOL_FIELD(argisrow);
WRITE_LOCATION_FIELD(location);
}
static void
_outBooleanTest(StringInfo str, const BooleanTest *node)
{
WRITE_NODE_TYPE("BOOLEANTEST");
WRITE_NODE_FIELD(arg);
WRITE_ENUM_FIELD(booltesttype, BoolTestType);
WRITE_LOCATION_FIELD(location);
}
static void
_outCoerceToDomain(StringInfo str, const CoerceToDomain *node)
{
WRITE_NODE_TYPE("COERCETODOMAIN");
WRITE_NODE_FIELD(arg);
WRITE_OID_FIELD(resulttype);
WRITE_INT_FIELD(resulttypmod);
WRITE_OID_FIELD(resultcollid);
WRITE_ENUM_FIELD(coercionformat, CoercionForm);
WRITE_LOCATION_FIELD(location);
}
static void
_outCoerceToDomainValue(StringInfo str, const CoerceToDomainValue *node)
{
WRITE_NODE_TYPE("COERCETODOMAINVALUE");
WRITE_OID_FIELD(typeId);
WRITE_INT_FIELD(typeMod);
WRITE_OID_FIELD(collation);
WRITE_LOCATION_FIELD(location);
}
static void
_outSetToDefault(StringInfo str, const SetToDefault *node)
{
WRITE_NODE_TYPE("SETTODEFAULT");
WRITE_OID_FIELD(typeId);
WRITE_INT_FIELD(typeMod);
WRITE_OID_FIELD(collation);
WRITE_LOCATION_FIELD(location);
}
static void
_outCurrentOfExpr(StringInfo str, const CurrentOfExpr *node)
{
WRITE_NODE_TYPE("CURRENTOFEXPR");
WRITE_UINT_FIELD(cvarno);
WRITE_STRING_FIELD(cursor_name);
WRITE_INT_FIELD(cursor_param);
}
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
static void
_outInferenceElem(StringInfo str, const InferenceElem *node)
{
WRITE_NODE_TYPE("INFERENCEELEM");
WRITE_NODE_FIELD(expr);
WRITE_OID_FIELD(infercollid);
WRITE_OID_FIELD(inferopclass);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
}
static void
_outTargetEntry(StringInfo str, const TargetEntry *node)
{
WRITE_NODE_TYPE("TARGETENTRY");
WRITE_NODE_FIELD(expr);
WRITE_INT_FIELD(resno);
WRITE_STRING_FIELD(resname);
WRITE_UINT_FIELD(ressortgroupref);
WRITE_OID_FIELD(resorigtbl);
WRITE_INT_FIELD(resorigcol);
WRITE_BOOL_FIELD(resjunk);
}
static void
_outRangeTblRef(StringInfo str, const RangeTblRef *node)
{
WRITE_NODE_TYPE("RANGETBLREF");
WRITE_INT_FIELD(rtindex);
}
static void
_outJoinExpr(StringInfo str, const JoinExpr *node)
{
WRITE_NODE_TYPE("JOINEXPR");
WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_BOOL_FIELD(isNatural);
WRITE_NODE_FIELD(larg);
WRITE_NODE_FIELD(rarg);
WRITE_NODE_FIELD(usingClause);
WRITE_NODE_FIELD(quals);
WRITE_NODE_FIELD(alias);
WRITE_INT_FIELD(rtindex);
}
static void
_outFromExpr(StringInfo str, const FromExpr *node)
{
WRITE_NODE_TYPE("FROMEXPR");
WRITE_NODE_FIELD(fromlist);
WRITE_NODE_FIELD(quals);
1999-01-21 17:08:55 +01:00
}
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
static void
_outOnConflictExpr(StringInfo str, const OnConflictExpr *node)
{
WRITE_NODE_TYPE("ONCONFLICTEXPR");
WRITE_ENUM_FIELD(action, OnConflictAction);
WRITE_NODE_FIELD(arbiterElems);
WRITE_NODE_FIELD(arbiterWhere);
WRITE_OID_FIELD(constraint);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
WRITE_NODE_FIELD(onConflictSet);
WRITE_NODE_FIELD(onConflictWhere);
WRITE_INT_FIELD(exclRelIndex);
WRITE_NODE_FIELD(exclRelTlist);
}
/*****************************************************************************
*
* Stuff from relation.h.
*
*****************************************************************************/
/*
* print the basic stuff of all nodes that inherit from Path
*
* Note we do NOT print the parent, else we'd be in infinite recursion.
* We can print the parent's relids for identification purposes, though.
Add an explicit representation of the output targetlist to Paths. Up to now, there's been an assumption that all Paths for a given relation compute the same output column set (targetlist). However, there are good reasons to remove that assumption. For example, an indexscan on an expression index might be able to return the value of an expensive function "for free". While we have the ability to generate such a plan today in simple cases, we don't have a way to model that it's cheaper than a plan that computes the function from scratch, nor a way to create such a plan in join cases (where the function computation would normally happen at the topmost join node). Also, we need this so that we can have Paths representing post-scan/join steps, where the targetlist may well change from one step to the next. Therefore, invent a "struct PathTarget" representing the columns we expect a plan step to emit. It's convenient to include the output tuple width and tlist evaluation cost in this struct, and there will likely be additional fields in future. While Path nodes that actually do have custom outputs will need their own PathTargets, it will still be true that most Paths for a given relation will compute the same tlist. To reduce the overhead added by this patch, keep a "default PathTarget" in RelOptInfo, and allow Paths that compute that column set to just point to their parent RelOptInfo's reltarget. (In the patch as committed, actually every Path is like that, since we do not yet have any cases of custom PathTargets.) I took this opportunity to provide some more-honest costing of PlaceHolderVar evaluation. Up to now, the assumption that "scan/join reltargetlists have cost zero" was applied not only to Vars, where it's reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval cost of a PlaceHolderVar's expression to the first plan level where it can be computed, by including it in the PathTarget cost field and adding that to the cost estimates for Paths. This isn't perfect yet but it's much better than before, and there is a way forward to improve it more. This costing change affects the join order chosen for a couple of the regression tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
* We print the pathtarget only if it's not the default one for the rel.
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
* We also do not print the whole of param_info, since it's printed by
* _outRelOptInfo; it's sufficient and less cluttering to print just the
* required outer relids.
*/
static void
_outPathInfo(StringInfo str, const Path *node)
{
WRITE_ENUM_FIELD(pathtype, NodeTag);
appendStringInfoString(str, " :parent_relids ");
Add an explicit representation of the output targetlist to Paths. Up to now, there's been an assumption that all Paths for a given relation compute the same output column set (targetlist). However, there are good reasons to remove that assumption. For example, an indexscan on an expression index might be able to return the value of an expensive function "for free". While we have the ability to generate such a plan today in simple cases, we don't have a way to model that it's cheaper than a plan that computes the function from scratch, nor a way to create such a plan in join cases (where the function computation would normally happen at the topmost join node). Also, we need this so that we can have Paths representing post-scan/join steps, where the targetlist may well change from one step to the next. Therefore, invent a "struct PathTarget" representing the columns we expect a plan step to emit. It's convenient to include the output tuple width and tlist evaluation cost in this struct, and there will likely be additional fields in future. While Path nodes that actually do have custom outputs will need their own PathTargets, it will still be true that most Paths for a given relation will compute the same tlist. To reduce the overhead added by this patch, keep a "default PathTarget" in RelOptInfo, and allow Paths that compute that column set to just point to their parent RelOptInfo's reltarget. (In the patch as committed, actually every Path is like that, since we do not yet have any cases of custom PathTargets.) I took this opportunity to provide some more-honest costing of PlaceHolderVar evaluation. Up to now, the assumption that "scan/join reltargetlists have cost zero" was applied not only to Vars, where it's reasonable, but also PlaceHolderVars where it isn't. Now, we add the eval cost of a PlaceHolderVar's expression to the first plan level where it can be computed, by including it in the PathTarget cost field and adding that to the cost estimates for Paths. This isn't perfect yet but it's much better than before, and there is a way forward to improve it more. This costing change affects the join order chosen for a couple of the regression tests, changing expected row ordering.
2016-02-19 02:01:49 +01:00
_outBitmapset(str, node->parent->relids);
if (node->pathtarget != node->parent->reltarget)
WRITE_NODE_FIELD(pathtarget);
appendStringInfoString(str, " :required_outer ");
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
if (node->param_info)
_outBitmapset(str, node->param_info->ppi_req_outer);
else
_outBitmapset(str, NULL);
WRITE_BOOL_FIELD(parallel_aware);
WRITE_BOOL_FIELD(parallel_safe);
WRITE_INT_FIELD(parallel_workers);
WRITE_FLOAT_FIELD(rows, "%.0f");
WRITE_FLOAT_FIELD(startup_cost, "%.2f");
WRITE_FLOAT_FIELD(total_cost, "%.2f");
WRITE_NODE_FIELD(pathkeys);
}
/*
* print the basic stuff of all nodes that inherit from JoinPath
*/
static void
_outJoinPathInfo(StringInfo str, const JoinPath *node)
{
_outPathInfo(str, (const Path *) node);
WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_NODE_FIELD(outerjoinpath);
WRITE_NODE_FIELD(innerjoinpath);
WRITE_NODE_FIELD(joinrestrictinfo);
}
static void
_outPath(StringInfo str, const Path *node)
{
WRITE_NODE_TYPE("PATH");
_outPathInfo(str, (const Path *) node);
}
static void
_outIndexPath(StringInfo str, const IndexPath *node)
{
WRITE_NODE_TYPE("INDEXPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(indexinfo);
WRITE_NODE_FIELD(indexclauses);
WRITE_NODE_FIELD(indexquals);
WRITE_NODE_FIELD(indexqualcols);
WRITE_NODE_FIELD(indexorderbys);
WRITE_NODE_FIELD(indexorderbycols);
WRITE_ENUM_FIELD(indexscandir, ScanDirection);
WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
WRITE_FLOAT_FIELD(indexselectivity, "%.4f");
}
static void
_outBitmapHeapPath(StringInfo str, const BitmapHeapPath *node)
{
WRITE_NODE_TYPE("BITMAPHEAPPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(bitmapqual);
}
static void
_outBitmapAndPath(StringInfo str, const BitmapAndPath *node)
{
WRITE_NODE_TYPE("BITMAPANDPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(bitmapquals);
WRITE_FLOAT_FIELD(bitmapselectivity, "%.4f");
}
static void
_outBitmapOrPath(StringInfo str, const BitmapOrPath *node)
{
WRITE_NODE_TYPE("BITMAPORPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(bitmapquals);
WRITE_FLOAT_FIELD(bitmapselectivity, "%.4f");
}
static void
_outTidPath(StringInfo str, const TidPath *node)
{
WRITE_NODE_TYPE("TIDPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(tidquals);
}
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
static void
_outSubqueryScanPath(StringInfo str, const SubqueryScanPath *node)
{
WRITE_NODE_TYPE("SUBQUERYSCANPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
}
static void
_outForeignPath(StringInfo str, const ForeignPath *node)
{
WRITE_NODE_TYPE("FOREIGNPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(fdw_outerpath);
WRITE_NODE_FIELD(fdw_private);
}
static void
_outCustomPath(StringInfo str, const CustomPath *node)
{
WRITE_NODE_TYPE("CUSTOMPATH");
_outPathInfo(str, (const Path *) node);
WRITE_UINT_FIELD(flags);
WRITE_NODE_FIELD(custom_paths);
WRITE_NODE_FIELD(custom_private);
appendStringInfoString(str, " :methods ");
_outToken(str, node->methods->CustomName);
}
static void
_outAppendPath(StringInfo str, const AppendPath *node)
{
WRITE_NODE_TYPE("APPENDPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpaths);
}
static void
_outMergeAppendPath(StringInfo str, const MergeAppendPath *node)
{
WRITE_NODE_TYPE("MERGEAPPENDPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpaths);
WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
}
static void
_outResultPath(StringInfo str, const ResultPath *node)
{
WRITE_NODE_TYPE("RESULTPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(quals);
}
static void
_outMaterialPath(StringInfo str, const MaterialPath *node)
{
WRITE_NODE_TYPE("MATERIALPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
}
static void
_outUniquePath(StringInfo str, const UniquePath *node)
{
WRITE_NODE_TYPE("UNIQUEPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_ENUM_FIELD(umethod, UniquePathMethod);
WRITE_NODE_FIELD(in_operators);
WRITE_NODE_FIELD(uniq_exprs);
}
static void
_outGatherPath(StringInfo str, const GatherPath *node)
{
WRITE_NODE_TYPE("GATHERPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_BOOL_FIELD(single_copy);
}
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
static void
_outProjectionPath(StringInfo str, const ProjectionPath *node)
{
WRITE_NODE_TYPE("PROJECTIONPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
Refactor planning of projection steps that don't need a Result plan node. The original upper-planner-pathification design (commit 3fc6e2d7f5b652b4) assumed that we could always determine during Path formation whether or not we would need a Result plan node to perform projection of a targetlist. That turns out not to work very well, though, because createplan.c still has some responsibilities for choosing the specific target list associated with sorting/grouping nodes (in particular it might choose to add resjunk columns for sorting). We might not ever refactor that --- doing so would push more work into Path formation, which isn't attractive --- and we certainly won't do so for 9.6. So, while create_projection_path and apply_projection_to_path can tell for sure what will happen if the subpath is projection-capable, they can't tell for sure when it isn't. This is at least a latent bug in apply_projection_to_path, which might think it can apply a target to a non-projecting node when the node will end up computing something different. Also, I'd tied the creation of a ProjectionPath node to whether or not a Result is needed, but it turns out that we sometimes need a ProjectionPath node anyway to avoid modifying a possibly-shared subpath node. Callers had to use create_projection_path for such cases, and we added code to them that knew about the potential omission of a Result node and attempted to adjust the cost estimates for that. That was uncertainly correct and definitely ugly/unmaintainable. To fix, have create_projection_path explicitly check whether a Result is needed and adjust its cost estimate accordingly, though it creates a ProjectionPath in either case. apply_projection_to_path is now mostly just an optimized version that can avoid creating an extra Path node when the input is known to not be shared with any other live path. (There is one case that create_projection_path doesn't handle, which is pushing parallel-safe expressions below a Gather node. We could make it do that by duplicating the GatherPath, but there seems no need as yet.) create_projection_plan still has to recheck the tlist-match condition, which means that if the matching situation does get changed by createplan.c then we'll have made a slightly incorrect cost estimate. But there seems no help for that in the near term, and I doubt it occurs often enough, let alone would change planning decisions often enough, to be worth stressing about. I added a "dummypp" field to ProjectionPath to track whether create_projection_path thinks a Result is needed. This is not really necessary as-committed because create_projection_plan doesn't look at the flag; but it seems like a good idea to remember what we thought when forming the cost estimate, if only for debugging purposes. In passing, get rid of the target_parallel parameter added to apply_projection_to_path by commit 54f5c5150. I don't think that's a good idea because it involves callers in what should be an internal decision, and opens us up to missing optimization opportunities if callers think they don't need to provide a valid flag, as most don't. For the moment, this just costs us an extra has_parallel_hazard call when planning a Gather. If that starts to look expensive, I think a better solution would be to teach PathTarget to carry/cache knowledge of parallel-safety of its contents.
2016-06-22 00:38:20 +02:00
WRITE_BOOL_FIELD(dummypp);
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
}
static void
_outSortPath(StringInfo str, const SortPath *node)
{
WRITE_NODE_TYPE("SORTPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
}
static void
_outGroupPath(StringInfo str, const GroupPath *node)
{
WRITE_NODE_TYPE("GROUPPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_NODE_FIELD(groupClause);
WRITE_NODE_FIELD(qual);
}
static void
_outUpperUniquePath(StringInfo str, const UpperUniquePath *node)
{
WRITE_NODE_TYPE("UPPERUNIQUEPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_INT_FIELD(numkeys);
}
static void
_outAggPath(StringInfo str, const AggPath *node)
{
WRITE_NODE_TYPE("AGGPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_ENUM_FIELD(aggstrategy, AggStrategy);
WRITE_ENUM_FIELD(aggsplit, AggSplit);
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
WRITE_FLOAT_FIELD(numGroups, "%.0f");
WRITE_NODE_FIELD(groupClause);
WRITE_NODE_FIELD(qual);
}
static void
_outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
{
WRITE_NODE_TYPE("GROUPINGSETSPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_NODE_FIELD(rollup_groupclauses);
WRITE_NODE_FIELD(rollup_lists);
WRITE_NODE_FIELD(qual);
}
static void
_outMinMaxAggPath(StringInfo str, const MinMaxAggPath *node)
{
WRITE_NODE_TYPE("MINMAXAGGPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(mmaggregates);
WRITE_NODE_FIELD(quals);
}
static void
_outWindowAggPath(StringInfo str, const WindowAggPath *node)
{
WRITE_NODE_TYPE("WINDOWAGGPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_NODE_FIELD(winclause);
WRITE_NODE_FIELD(winpathkeys);
}
static void
_outSetOpPath(StringInfo str, const SetOpPath *node)
{
WRITE_NODE_TYPE("SETOPPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_ENUM_FIELD(cmd, SetOpCmd);
WRITE_ENUM_FIELD(strategy, SetOpStrategy);
WRITE_NODE_FIELD(distinctList);
WRITE_INT_FIELD(flagColIdx);
WRITE_INT_FIELD(firstFlag);
WRITE_FLOAT_FIELD(numGroups, "%.0f");
}
static void
_outRecursiveUnionPath(StringInfo str, const RecursiveUnionPath *node)
{
WRITE_NODE_TYPE("RECURSIVEUNIONPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(leftpath);
WRITE_NODE_FIELD(rightpath);
WRITE_NODE_FIELD(distinctList);
WRITE_INT_FIELD(wtParam);
WRITE_FLOAT_FIELD(numGroups, "%.0f");
}
static void
_outLockRowsPath(StringInfo str, const LockRowsPath *node)
{
WRITE_NODE_TYPE("LOCKROWSPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_NODE_FIELD(rowMarks);
WRITE_INT_FIELD(epqParam);
}
static void
_outModifyTablePath(StringInfo str, const ModifyTablePath *node)
{
WRITE_NODE_TYPE("MODIFYTABLEPATH");
_outPathInfo(str, (const Path *) node);
WRITE_ENUM_FIELD(operation, CmdType);
WRITE_BOOL_FIELD(canSetTag);
WRITE_UINT_FIELD(nominalRelation);
WRITE_NODE_FIELD(resultRelations);
WRITE_NODE_FIELD(subpaths);
WRITE_NODE_FIELD(subroots);
WRITE_NODE_FIELD(withCheckOptionLists);
WRITE_NODE_FIELD(returningLists);
WRITE_NODE_FIELD(rowMarks);
WRITE_NODE_FIELD(onconflict);
WRITE_INT_FIELD(epqParam);
}
static void
_outLimitPath(StringInfo str, const LimitPath *node)
{
WRITE_NODE_TYPE("LIMITPATH");
_outPathInfo(str, (const Path *) node);
WRITE_NODE_FIELD(subpath);
WRITE_NODE_FIELD(limitOffset);
WRITE_NODE_FIELD(limitCount);
}
static void
_outNestPath(StringInfo str, const NestPath *node)
{
WRITE_NODE_TYPE("NESTPATH");
_outJoinPathInfo(str, (const JoinPath *) node);
}
static void
_outMergePath(StringInfo str, const MergePath *node)
{
WRITE_NODE_TYPE("MERGEPATH");
_outJoinPathInfo(str, (const JoinPath *) node);
WRITE_NODE_FIELD(path_mergeclauses);
WRITE_NODE_FIELD(outersortkeys);
WRITE_NODE_FIELD(innersortkeys);
WRITE_BOOL_FIELD(materialize_inner);
}
static void
_outHashPath(StringInfo str, const HashPath *node)
{
WRITE_NODE_TYPE("HASHPATH");
_outJoinPathInfo(str, (const JoinPath *) node);
WRITE_NODE_FIELD(path_hashclauses);
WRITE_INT_FIELD(num_batches);
}
static void
_outPlannerGlobal(StringInfo str, const PlannerGlobal *node)
{
WRITE_NODE_TYPE("PLANNERGLOBAL");
/* NB: this isn't a complete set of fields */
WRITE_NODE_FIELD(subplans);
WRITE_BITMAPSET_FIELD(rewindPlanIDs);
WRITE_NODE_FIELD(finalrtable);
WRITE_NODE_FIELD(finalrowmarks);
WRITE_NODE_FIELD(resultRelations);
WRITE_NODE_FIELD(relationOids);
WRITE_NODE_FIELD(invalItems);
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH. The planner previously assumed that parameter Vars having the same absolute query level, varno, and varattno could safely be assigned the same runtime PARAM_EXEC slot, even though they might be different Vars appearing in different subqueries. This was (probably) safe before the introduction of CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can be executed during execution of some other subquery, causing the lifespan of Params at the same syntactic nesting level as the CTE to overlap with use of the same slots inside the CTE. In 9.1 we created additional hazards by using the same parameter-assignment technology for nestloop inner scan parameters, but it was broken before that, as illustrated by the added regression test. To fix, restructure the planner's management of PlannerParamItems so that items having different semantic lifespans are kept rigorously separated. This will probably result in complex queries using more runtime PARAM_EXEC slots than before, but the slots are cheap enough that this hardly matters. Also, stop generating PlannerParamItems containing Params for subquery outputs: all we really need to do is reserve the PARAM_EXEC slot number, and that now only takes incrementing a counter. The planning code is simpler and probably faster than before, as well as being more correct. Per report from Vik Reykja. These changes will mostly also need to be made in the back branches, but I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
WRITE_INT_FIELD(nParamExec);
WRITE_UINT_FIELD(lastPHId);
WRITE_UINT_FIELD(lastRowMarkId);
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
WRITE_INT_FIELD(lastPlanNodeId);
WRITE_BOOL_FIELD(transientPlan);
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
WRITE_BOOL_FIELD(dependsOnRole);
Determine whether it's safe to attempt a parallel plan for a query. Commit 924bcf4f16d54c55310b28f77686608684734f42 introduced a framework for parallel computation in PostgreSQL that makes most but not all built-in functions safe to execute in parallel mode. In order to have parallel query, we'll need to be able to determine whether that query contains functions (either built-in or user-defined) that cannot be safely executed in parallel mode. This requires those functions to be labeled, so this patch introduces an infrastructure for that. Some functions currently labeled as safe may need to be revised depending on how pending issues related to heavyweight locking under paralllelism are resolved. Parallel plans can't be used except for the case where the query will run to completion. If portal execution were suspended, the parallel mode restrictions would need to remain in effect during that time, but that might make other queries fail. Therefore, this patch introduces a framework that enables consideration of parallel plans only when it is known that the plan will be run to completion. This probably needs some refinement; for example, at bind time, we do not know whether a query run via the extended protocol will be execution to completion or run with a limited fetch count. Having the client indicate its intentions at bind time would constitute a wire protocol break. Some contexts in which parallel mode would be safe are not adjusted by this patch; the default is not to try parallel plans except from call sites that have been updated to say that such plans are OK. This commit doesn't introduce any parallel paths or plans; it just provides a way to determine whether they could potentially be used. I'm committing it on the theory that the remaining parallel sequential scan patches will also get committed to this release, hopefully in the not-too-distant future. Robert Haas and Amit Kapila. Reviewed (in earlier versions) by Noah Misch.
2015-09-16 21:38:47 +02:00
WRITE_BOOL_FIELD(parallelModeOK);
WRITE_BOOL_FIELD(parallelModeNeeded);
WRITE_CHAR_FIELD(maxParallelHazard);
}
static void
_outPlannerInfo(StringInfo str, const PlannerInfo *node)
{
WRITE_NODE_TYPE("PLANNERINFO");
/* NB: this isn't a complete set of fields */
WRITE_NODE_FIELD(parse);
WRITE_NODE_FIELD(glob);
WRITE_UINT_FIELD(query_level);
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH. The planner previously assumed that parameter Vars having the same absolute query level, varno, and varattno could safely be assigned the same runtime PARAM_EXEC slot, even though they might be different Vars appearing in different subqueries. This was (probably) safe before the introduction of CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can be executed during execution of some other subquery, causing the lifespan of Params at the same syntactic nesting level as the CTE to overlap with use of the same slots inside the CTE. In 9.1 we created additional hazards by using the same parameter-assignment technology for nestloop inner scan parameters, but it was broken before that, as illustrated by the added regression test. To fix, restructure the planner's management of PlannerParamItems so that items having different semantic lifespans are kept rigorously separated. This will probably result in complex queries using more runtime PARAM_EXEC slots than before, but the slots are cheap enough that this hardly matters. Also, stop generating PlannerParamItems containing Params for subquery outputs: all we really need to do is reserve the PARAM_EXEC slot number, and that now only takes incrementing a counter. The planning code is simpler and probably faster than before, as well as being more correct. Per report from Vik Reykja. These changes will mostly also need to be made in the back branches, but I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
WRITE_NODE_FIELD(plan_params);
WRITE_BITMAPSET_FIELD(outer_params);
WRITE_BITMAPSET_FIELD(all_baserels);
Compute correct em_nullable_relids in get_eclass_for_sort_expr(). Bug #8591 from Claudio Freire demonstrates that get_eclass_for_sort_expr must be able to compute valid em_nullable_relids for any new equivalence class members it creates. I'd worried about this in the commit message for db9f0e1d9a4a0842c814a464cdc9758c3f20b96c, but claimed that it wasn't a problem because multi-member ECs should already exist when it runs. That is transparently wrong, though, because this function is also called by initialize_mergeclause_eclasses, which runs during deconstruct_jointree. The example given in the bug report (which the new regression test item is based upon) fails because the COALESCE() expression is first seen by initialize_mergeclause_eclasses rather than process_equivalence. Fixing this requires passing the appropriate nullable_relids set to get_eclass_for_sort_expr, and it requires new code to compute that set for top-level expressions such as ORDER BY, GROUP BY, etc. We store the top-level nullable_relids in a new field in PlannerInfo to avoid computing it many times. In the back branches, I've added the new field at the end of the struct to minimize ABI breakage for planner plugins. There doesn't seem to be a good alternative to changing get_eclass_for_sort_expr's API signature, though. There probably aren't any third-party extensions calling that function directly; moreover, if there are, they probably need to think about what to pass for nullable_relids anyway. Back-patch to 9.2, like the previous patch in this area.
2013-11-15 22:46:18 +01:00
WRITE_BITMAPSET_FIELD(nullable_baserels);
WRITE_NODE_FIELD(join_rel_list);
WRITE_INT_FIELD(join_cur_level);
WRITE_NODE_FIELD(init_plans);
WRITE_NODE_FIELD(cte_plan_ids);
WRITE_NODE_FIELD(multiexpr_params);
WRITE_NODE_FIELD(eq_classes);
WRITE_NODE_FIELD(canon_pathkeys);
WRITE_NODE_FIELD(left_join_clauses);
WRITE_NODE_FIELD(right_join_clauses);
WRITE_NODE_FIELD(full_join_clauses);
WRITE_NODE_FIELD(join_info_list);
WRITE_NODE_FIELD(append_rel_list);
WRITE_NODE_FIELD(rowMarks);
WRITE_NODE_FIELD(placeholder_list);
WRITE_NODE_FIELD(fkey_list);
WRITE_NODE_FIELD(query_pathkeys);
WRITE_NODE_FIELD(group_pathkeys);
WRITE_NODE_FIELD(window_pathkeys);
WRITE_NODE_FIELD(distinct_pathkeys);
WRITE_NODE_FIELD(sort_pathkeys);
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
WRITE_NODE_FIELD(processed_tlist);
WRITE_NODE_FIELD(minmax_aggs);
WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
WRITE_BOOL_FIELD(hasInheritedTarget);
WRITE_BOOL_FIELD(hasJoinRTEs);
WRITE_BOOL_FIELD(hasLateralRTEs);
WRITE_BOOL_FIELD(hasDeletedRTEs);
WRITE_BOOL_FIELD(hasHavingQual);
WRITE_BOOL_FIELD(hasPseudoConstantQuals);
WRITE_BOOL_FIELD(hasRecursion);
WRITE_INT_FIELD(wt_param_id);
WRITE_BITMAPSET_FIELD(curOuterRels);
WRITE_NODE_FIELD(curOuterParams);
}
static void
_outRelOptInfo(StringInfo str, const RelOptInfo *node)
{
WRITE_NODE_TYPE("RELOPTINFO");
/* NB: this isn't a complete set of fields */
WRITE_ENUM_FIELD(reloptkind, RelOptKind);
WRITE_BITMAPSET_FIELD(relids);
WRITE_FLOAT_FIELD(rows, "%.0f");
WRITE_BOOL_FIELD(consider_startup);
Fix planner's cost estimation for SEMI/ANTI joins with inner indexscans. When the inner side of a nestloop SEMI or ANTI join is an indexscan that uses all the join clauses as indexquals, it can be presumed that both matched and unmatched outer rows will be processed very quickly: for matched rows, we'll stop after fetching one row from the indexscan, while for unmatched rows we'll have an indexscan that finds no matching index entries, which should also be quick. The planner already knew about this, but it was nonetheless charging for at least one full run of the inner indexscan, as a consequence of concerns about the behavior of materialized inner scans --- but those concerns don't apply in the fast case. If the inner side has low cardinality (many matching rows) this could make an indexscan plan look far more expensive than it actually is. To fix, rearrange the work in initial_cost_nestloop/final_cost_nestloop so that we don't add the inner scan cost until we've inspected the indexquals, and then we can add either the full-run cost or just the first tuple's cost as appropriate. Experimentation with this fix uncovered another problem: add_path and friends were coded to disregard cheap startup cost when considering parameterized paths. That's usually okay (and desirable, because it thins the path herd faster); but in this fast case for SEMI/ANTI joins, it could result in throwing away the desired plain indexscan path in favor of a bitmap scan path before we ever get to the join costing logic. In the many-matching-rows cases of interest here, a bitmap scan will do a lot more work than required, so this is a problem. To fix, add a per-relation flag consider_param_startup that works like the existing consider_startup flag, but applies to parameterized paths, and set it for relations that are the inside of a SEMI or ANTI join. To make this patch reasonably safe to back-patch, care has been taken to avoid changing the planner's behavior except in the very narrow case of SEMI/ANTI joins with inner indexscans. There are places in compare_path_costs_fuzzily and add_path_precheck that are not terribly consistent with the new approach, but changing them will affect planner decisions at the margins in other cases, so we'll leave that for a HEAD-only fix. Back-patch to 9.3; before that, the consider_startup flag didn't exist, meaning that the second aspect of the patch would be too invasive. Per a complaint from Peter Holzer and analysis by Tomas Vondra.
2015-06-03 17:58:47 +02:00
WRITE_BOOL_FIELD(consider_param_startup);
Generate parallel sequential scan plans in simple cases. Add a new flag, consider_parallel, to each RelOptInfo, indicating whether a plan for that relation could conceivably be run inside of a parallel worker. Right now, we're pretty conservative: for example, it might be possible to defer applying a parallel-restricted qual in a worker, and later do it in the leader, but right now we just don't try to parallelize access to that relation. That's probably the right decision in most cases, anyway. Using the new flag, generate parallel sequential scan plans for plain baserels, meaning that we now have parallel sequential scan in PostgreSQL. The logic here is pretty unsophisticated right now: the costing model probably isn't right in detail, and we can't push joins beneath Gather nodes, so the number of plans that can actually benefit from this is pretty limited right now. Lots more work is needed. Nevertheless, it seems time to enable this functionality so that all this code can actually be tested easily by users and developers. Note that, if you wish to test this functionality, it will be necessary to set max_parallel_degree to a value greater than the default of 0. Once a few more loose ends have been tidied up here, we might want to consider changing the default value of this GUC, but I'm leaving it alone for now. Along the way, fix a bug in cost_gather: the previous coding thought that a Gather node's transfer overhead should be costed on the basis of the relation size rather than the number of tuples that actually need to be passed off to the leader. Patch by me, reviewed in earlier versions by Amit Kapila.
2015-11-11 15:02:52 +01:00
WRITE_BOOL_FIELD(consider_parallel);
WRITE_NODE_FIELD(reltarget);
WRITE_NODE_FIELD(pathlist);
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
WRITE_NODE_FIELD(ppilist);
WRITE_NODE_FIELD(partial_pathlist);
WRITE_NODE_FIELD(cheapest_startup_path);
WRITE_NODE_FIELD(cheapest_total_path);
WRITE_NODE_FIELD(cheapest_unique_path);
WRITE_NODE_FIELD(cheapest_parameterized_paths);
WRITE_BITMAPSET_FIELD(direct_lateral_relids);
WRITE_BITMAPSET_FIELD(lateral_relids);
WRITE_UINT_FIELD(relid);
WRITE_OID_FIELD(reltablespace);
WRITE_ENUM_FIELD(rtekind, RTEKind);
WRITE_INT_FIELD(min_attr);
WRITE_INT_FIELD(max_attr);
WRITE_NODE_FIELD(lateral_vars);
WRITE_BITMAPSET_FIELD(lateral_referencers);
WRITE_NODE_FIELD(indexlist);
WRITE_UINT_FIELD(pages);
WRITE_FLOAT_FIELD(tuples, "%.0f");
WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
WRITE_NODE_FIELD(subroot);
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH. The planner previously assumed that parameter Vars having the same absolute query level, varno, and varattno could safely be assigned the same runtime PARAM_EXEC slot, even though they might be different Vars appearing in different subqueries. This was (probably) safe before the introduction of CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can be executed during execution of some other subquery, causing the lifespan of Params at the same syntactic nesting level as the CTE to overlap with use of the same slots inside the CTE. In 9.1 we created additional hazards by using the same parameter-assignment technology for nestloop inner scan parameters, but it was broken before that, as illustrated by the added regression test. To fix, restructure the planner's management of PlannerParamItems so that items having different semantic lifespans are kept rigorously separated. This will probably result in complex queries using more runtime PARAM_EXEC slots than before, but the slots are cheap enough that this hardly matters. Also, stop generating PlannerParamItems containing Params for subquery outputs: all we really need to do is reserve the PARAM_EXEC slot number, and that now only takes incrementing a counter. The planning code is simpler and probably faster than before, as well as being more correct. Per report from Vik Reykja. These changes will mostly also need to be made in the back branches, but I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
WRITE_NODE_FIELD(subplan_params);
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
WRITE_INT_FIELD(rel_parallel_workers);
Code review for foreign/custom join pushdown patch. Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design decisions that seem pretty questionable to me, and there was quite a lot of stuff not to like about the documentation and comments. Clean up as follows: * Consider foreign joins only between foreign tables on the same server, rather than between any two foreign tables with the same underlying FDW handler function. In most if not all cases, the FDW would simply have had to apply the same-server restriction itself (far more expensively, both for lack of caching and because it would be repeated for each combination of input sub-joins), or else risk nasty bugs. Anyone who's really intent on doing something outside this restriction can always use the set_join_pathlist_hook. * Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist to better reflect what they're for, and allow these custom scan tlists to be used even for base relations. * Change make_foreignscan() API to include passing the fdw_scan_tlist value, since the FDW is required to set that. Backwards compatibility doesn't seem like an adequate reason to expect FDWs to set it in some ad-hoc extra step, and anyway existing FDWs can just pass NIL. * Change the API of path-generating subroutines of add_paths_to_joinrel, and in particular that of GetForeignJoinPaths and set_join_pathlist_hook, so that various less-used parameters are passed in a struct rather than as separate parameter-list entries. The objective here is to reduce the probability that future additions to those parameter lists will result in source-level API breaks for users of these hooks. It's possible that this is even a small win for the core code, since most CPU architectures can't pass more than half a dozen parameters efficiently anyway. I kept root, joinrel, outerrel, innerrel, and jointype as separate parameters to reduce code churn in joinpath.c --- in particular, putting jointype into the struct would have been problematic because of the subroutines' habit of changing their local copies of that variable. * Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all right for it to know about IndexOnlyScan, but if the list is to grow we should refactor the knowledge out to the callers. * Restore nodeForeignscan.c's previous use of the relcache to avoid extra GetFdwRoutine lookups for base-relation scans. * Lots of cleanup of documentation and missed comments. Re-order some code additions into more logical places.
2015-05-10 20:36:30 +02:00
WRITE_OID_FIELD(serverid);
Avoid invalidating all foreign-join cached plans when user mappings change. We must not push down a foreign join when the foreign tables involved should be accessed under different user mappings. Previously we tried to enforce that rule literally during planning, but that meant that the resulting plans were dependent on the current contents of the pg_user_mapping catalog, and we had to blow away all cached plans containing any remote join when anything at all changed in pg_user_mapping. This could have been improved somewhat, but the fact that a syscache inval callback has very limited info about what changed made it hard to do better within that design. Instead, let's change the planner to not consider user mappings per se, but to allow a foreign join if both RTEs have the same checkAsUser value. If they do, then they necessarily will use the same user mapping at runtime, and we don't need to know specifically which one that is. Post-plan-time changes in pg_user_mapping no longer require any plan invalidation. This rule does give up some optimization ability, to wit where two foreign table references come from views with different owners or one's from a view and one's directly in the query, but nonetheless the same user mapping would have applied. We'll sacrifice the first case, but to not regress more than we have to in the second case, allow a foreign join involving both zero and nonzero checkAsUser values if the nonzero one is the same as the prevailing effective userID. In that case, mark the plan as only runnable by that userID. The plancache code already had a notion of plans being userID-specific, in order to support RLS. It was a little confused though, in particular lacking clarity of thought as to whether it was the rewritten query or just the finished plan that's dependent on the userID. Rearrange that code so that it's clearer what depends on which, and so that the same logic applies to both RLS-injected role dependency and foreign-join-injected role dependency. Note that this patch doesn't remove the other issue mentioned in the original complaint, which is that while we'll reliably stop using a foreign join if it's disallowed in a new context, we might fail to start using a foreign join if it's now allowed, but we previously created a generic cached plan that didn't use one. It was agreed that the chance of winning that way was not high enough to justify the much larger number of plan invalidations that would have to occur if we tried to cause it to happen. In passing, clean up randomly-varying spelling of EXPLAIN commands in postgres_fdw.sql, and fix a COSTS ON example that had been allowed to leak into the committed tests. This reverts most of commits fbe5a3fb7 and 5d4171d1c, which were the previous attempt at ensuring we wouldn't push down foreign joins that span permissions contexts. Etsuro Fujita and Tom Lane Discussion: <d49c1e5b-f059-20f4-c132-e9752ee0113e@lab.ntt.co.jp>
2016-07-15 23:22:56 +02:00
WRITE_OID_FIELD(userid);
WRITE_BOOL_FIELD(useridiscurrent);
Revise FDW planning API, again. Further reflection shows that a single callback isn't very workable if we desire to let FDWs generate multiple Paths, because that forces the FDW to do all work necessary to generate a valid Plan node for each Path. Instead split the former PlanForeignScan API into three steps: GetForeignRelSize, GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain, and it's substantially more flexible for complex FDWs. Add an fdw_private field to RelOptInfo so that the new functions can save state there rather than possibly having to recalculate information two or three times. In addition, we'd not thought through what would be needed to allow an FDW to set up subexpressions of its choice for runtime execution. We could treat ForeignScan.fdw_private as an executable expression but that seems likely to break existing FDWs unnecessarily (in particular, it would restrict the set of node types allowable in fdw_private to those supported by expression_tree_walker). Instead, invent a separate field fdw_exprs which will receive the postprocessing appropriate for expression trees. (One field is enough since it can be a list of expressions; also, we assume the corresponding expression state tree(s) will be held within fdw_state, so we don't need to add anything to ForeignScanState.) Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this further as we continue to work on that patch, but to me it feels a lot closer to being right now.
2012-03-09 18:48:48 +01:00
/* we don't try to print fdwroutine or fdw_private */
WRITE_NODE_FIELD(baserestrictinfo);
WRITE_NODE_FIELD(joininfo);
WRITE_BOOL_FIELD(has_eclass_joins);
}
static void
_outIndexOptInfo(StringInfo str, const IndexOptInfo *node)
{
WRITE_NODE_TYPE("INDEXOPTINFO");
/* NB: this isn't a complete set of fields */
WRITE_OID_FIELD(indexoid);
/* Do NOT print rel field, else infinite recursion */
WRITE_UINT_FIELD(pages);
WRITE_FLOAT_FIELD(tuples, "%.0f");
Redesign the planner's handling of index-descent cost estimation. Historically we've used a couple of very ad-hoc fudge factors to try to get the right results when indexes of different sizes would satisfy a query with the same number of index leaf tuples being visited. In commit 21a39de5809cd3050a37d2554323cc1d0cbeed9d I tweaked one of these fudge factors, with results that proved disastrous for larger indexes. Commit bf01e34b556ff37982ba2d882db424aa484c0d07 fudged it some more, but still with not a lot of principle behind it. What seems like a better way to address these issues is to explicitly model index-descent costs, since that's what's really at stake when considering diferent indexes with similar leaf-page-level costs. We tried that once long ago, and found that charging random_page_cost per page descended through was way too much, because upper btree levels tend to stay in cache in real-world workloads. However, there's still CPU costs to think about, and the previous fudge factors can be seen as a crude attempt to account for those costs. So this patch replaces those fudge factors with explicit charges for the number of tuple comparisons needed to descend the index tree, plus a small charge per page touched in the descent. The cost multipliers are chosen so that the resulting charges are in the vicinity of the historical (pre-9.2) fudge factors for indexes of up to about a million tuples, while not ballooning unreasonably beyond that, as the old fudge factor did (even more so in 9.2). To make this work accurately for btree indexes, add some code that allows extraction of the known root-page height from a btree. There's no equivalent number readily available for other index types, but we can use the log of the number of index pages as an approximate substitute. This seems like too much of a behavioral change to risk back-patching, but it should improve matters going forward. In 9.2 I'll just revert the fudge-factor change.
2013-01-11 18:56:58 +01:00
WRITE_INT_FIELD(tree_height);
WRITE_INT_FIELD(ncolumns);
Redesign the planner's handling of index-descent cost estimation. Historically we've used a couple of very ad-hoc fudge factors to try to get the right results when indexes of different sizes would satisfy a query with the same number of index leaf tuples being visited. In commit 21a39de5809cd3050a37d2554323cc1d0cbeed9d I tweaked one of these fudge factors, with results that proved disastrous for larger indexes. Commit bf01e34b556ff37982ba2d882db424aa484c0d07 fudged it some more, but still with not a lot of principle behind it. What seems like a better way to address these issues is to explicitly model index-descent costs, since that's what's really at stake when considering diferent indexes with similar leaf-page-level costs. We tried that once long ago, and found that charging random_page_cost per page descended through was way too much, because upper btree levels tend to stay in cache in real-world workloads. However, there's still CPU costs to think about, and the previous fudge factors can be seen as a crude attempt to account for those costs. So this patch replaces those fudge factors with explicit charges for the number of tuple comparisons needed to descend the index tree, plus a small charge per page touched in the descent. The cost multipliers are chosen so that the resulting charges are in the vicinity of the historical (pre-9.2) fudge factors for indexes of up to about a million tuples, while not ballooning unreasonably beyond that, as the old fudge factor did (even more so in 9.2). To make this work accurately for btree indexes, add some code that allows extraction of the known root-page height from a btree. There's no equivalent number readily available for other index types, but we can use the log of the number of index pages as an approximate substitute. This seems like too much of a behavioral change to risk back-patching, but it should improve matters going forward. In 9.2 I'll just revert the fudge-factor change.
2013-01-11 18:56:58 +01:00
/* array fields aren't really worth the trouble to print */
WRITE_OID_FIELD(relam);
/* indexprs is redundant since we print indextlist */
WRITE_NODE_FIELD(indpred);
WRITE_NODE_FIELD(indextlist);
Support using index-only scans with partial indexes in more cases. Previously, the planner would reject an index-only scan if any restriction clause for its table used a column not available from the index, even if that restriction clause would later be dropped from the plan entirely because it's implied by the index's predicate. This is a fairly common situation for partial indexes because predicates using columns not included in the index are often the most useful kind of predicate, and we have to duplicate (or at least imply) the predicate in the WHERE clause in order to get the index to be considered at all. So index-only scans were essentially unavailable with such partial indexes. To fix, we have to do detection of implied-by-predicate clauses much earlier in the planner. This patch puts it in check_index_predicates (nee check_partial_indexes), meaning it gets done for every partial index, whereas we previously only considered this issue at createplan time, so that the work was only done for an index actually selected for use. That could result in a noticeable planning slowdown for queries against tables with many partial indexes. However, testing suggested that there isn't really a significant cost, especially not with reasonable numbers of partial indexes. We do get a small additional benefit, which is that cost_index is more accurate since it correctly discounts the evaluation cost of clauses that will be removed. We can also avoid considering such clauses as potential indexquals, which saves useless matching cycles in the case where the predicate columns aren't in the index, and prevents generating bogus plans that double-count the clause's selectivity when the columns are in the index. Tomas Vondra and Kyotaro Horiguchi, reviewed by Kevin Grittner and Konstantin Knizhnik, and whacked around a little by me
2016-03-31 20:48:56 +02:00
WRITE_NODE_FIELD(indrestrictinfo);
WRITE_BOOL_FIELD(predOK);
WRITE_BOOL_FIELD(unique);
WRITE_BOOL_FIELD(immediate);
WRITE_BOOL_FIELD(hypothetical);
/* we don't bother with fields copied from the index AM's API struct */
}
static void
_outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node)
{
int i;
WRITE_NODE_TYPE("FOREIGNKEYOPTINFO");
WRITE_UINT_FIELD(con_relid);
WRITE_UINT_FIELD(ref_relid);
WRITE_INT_FIELD(nkeys);
appendStringInfoString(str, " :conkey");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %d", node->conkey[i]);
appendStringInfoString(str, " :confkey");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %d", node->confkey[i]);
appendStringInfoString(str, " :conpfeqop");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %u", node->conpfeqop[i]);
WRITE_INT_FIELD(nmatched_ec);
WRITE_INT_FIELD(nmatched_rcols);
WRITE_INT_FIELD(nmatched_ri);
/* for compactness, just print the number of matches per column: */
appendStringInfoString(str, " :eclass");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %d", (node->eclass[i] != NULL));
appendStringInfoString(str, " :rinfos");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %d", list_length(node->rinfos[i]));
}
static void
_outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
{
/*
* To simplify reading, we just chase up to the topmost merged EC and
* print that, without bothering to show the merge-ees separately.
*/
while (node->ec_merged)
node = node->ec_merged;
WRITE_NODE_TYPE("EQUIVALENCECLASS");
WRITE_NODE_FIELD(ec_opfamilies);
WRITE_OID_FIELD(ec_collation);
WRITE_NODE_FIELD(ec_members);
WRITE_NODE_FIELD(ec_sources);
WRITE_NODE_FIELD(ec_derives);
WRITE_BITMAPSET_FIELD(ec_relids);
WRITE_BOOL_FIELD(ec_has_const);
WRITE_BOOL_FIELD(ec_has_volatile);
WRITE_BOOL_FIELD(ec_below_outer_join);
WRITE_BOOL_FIELD(ec_broken);
WRITE_UINT_FIELD(ec_sortref);
}
static void
_outEquivalenceMember(StringInfo str, const EquivalenceMember *node)
{
WRITE_NODE_TYPE("EQUIVALENCEMEMBER");
WRITE_NODE_FIELD(em_expr);
WRITE_BITMAPSET_FIELD(em_relids);
Fix planning of non-strict equivalence clauses above outer joins. If a potential equivalence clause references a variable from the nullable side of an outer join, the planner needs to take care that derived clauses are not pushed to below the outer join; else they may use the wrong value for the variable. (The problem arises only with non-strict clauses, since if an upper clause can be proven strict then the outer join will get simplified to a plain join.) The planner attempted to prevent this type of error by checking that potential equivalence clauses aren't outerjoin-delayed as a whole, but actually we have to check each side separately, since the two sides of the clause will get moved around separately if it's treated as an equivalence. Bugs of this type can be demonstrated as far back as 7.4, even though releases before 8.3 had only a very ad-hoc notion of equivalence clauses. In addition, we neglected to account for the possibility that such clauses might have nonempty nullable_relids even when not outerjoin-delayed; so the equivalence-class machinery lacked logic to compute correct nullable_relids values for clauses it constructs. This oversight was harmless before 9.2 because we were only using RestrictInfo.nullable_relids for OR clauses; but as of 9.2 it could result in pushing constructed equivalence clauses to incorrect places. (This accounts for bug #7604 from Bill MacArthur.) Fix the first problem by adding a new test check_equivalence_delay() in distribute_qual_to_rels, and fix the second one by adding code in equivclass.c and called functions to set correct nullable_relids for generated clauses. Although I believe the second part of this is not currently necessary before 9.2, I chose to back-patch it anyway, partly to keep the logic similar across branches and partly because it seems possible we might find other reasons why we need valid values of nullable_relids in the older branches. Add regression tests illustrating these problems. In 9.0 and up, also add test cases checking that we can push constants through outer joins, since we've broken that optimization before and I nearly broke it again with an overly simplistic patch for this problem.
2012-10-18 18:28:45 +02:00
WRITE_BITMAPSET_FIELD(em_nullable_relids);
WRITE_BOOL_FIELD(em_is_const);
WRITE_BOOL_FIELD(em_is_child);
WRITE_OID_FIELD(em_datatype);
}
static void
_outPathKey(StringInfo str, const PathKey *node)
{
WRITE_NODE_TYPE("PATHKEY");
WRITE_NODE_FIELD(pk_eclass);
WRITE_OID_FIELD(pk_opfamily);
WRITE_INT_FIELD(pk_strategy);
WRITE_BOOL_FIELD(pk_nulls_first);
}
static void
_outPathTarget(StringInfo str, const PathTarget *node)
{
WRITE_NODE_TYPE("PATHTARGET");
WRITE_NODE_FIELD(exprs);
if (node->sortgrouprefs)
{
int i;
appendStringInfoString(str, " :sortgrouprefs");
for (i = 0; i < list_length(node->exprs); i++)
appendStringInfo(str, " %u", node->sortgrouprefs[i]);
}
WRITE_FLOAT_FIELD(cost.startup, "%.2f");
WRITE_FLOAT_FIELD(cost.per_tuple, "%.2f");
WRITE_INT_FIELD(width);
}
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
static void
_outParamPathInfo(StringInfo str, const ParamPathInfo *node)
{
WRITE_NODE_TYPE("PARAMPATHINFO");
WRITE_BITMAPSET_FIELD(ppi_req_outer);
WRITE_FLOAT_FIELD(ppi_rows, "%.0f");
WRITE_NODE_FIELD(ppi_clauses);
}
static void
_outRestrictInfo(StringInfo str, const RestrictInfo *node)
{
WRITE_NODE_TYPE("RESTRICTINFO");
/* NB: this isn't a complete set of fields */
WRITE_NODE_FIELD(clause);
WRITE_BOOL_FIELD(is_pushed_down);
WRITE_BOOL_FIELD(outerjoin_delayed);
WRITE_BOOL_FIELD(can_join);
WRITE_BOOL_FIELD(pseudoconstant);
WRITE_BITMAPSET_FIELD(clause_relids);
WRITE_BITMAPSET_FIELD(required_relids);
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
WRITE_BITMAPSET_FIELD(outer_relids);
WRITE_BITMAPSET_FIELD(nullable_relids);
WRITE_BITMAPSET_FIELD(left_relids);
WRITE_BITMAPSET_FIELD(right_relids);
WRITE_NODE_FIELD(orclause);
/* don't write parent_ec, leads to infinite recursion in plan tree dump */
WRITE_FLOAT_FIELD(norm_selec, "%.4f");
WRITE_FLOAT_FIELD(outer_selec, "%.4f");
WRITE_NODE_FIELD(mergeopfamilies);
/* don't write left_ec, leads to infinite recursion in plan tree dump */
/* don't write right_ec, leads to infinite recursion in plan tree dump */
WRITE_NODE_FIELD(left_em);
WRITE_NODE_FIELD(right_em);
WRITE_BOOL_FIELD(outer_is_left);
WRITE_OID_FIELD(hashjoinoperator);
}
static void
_outPlaceHolderVar(StringInfo str, const PlaceHolderVar *node)
{
WRITE_NODE_TYPE("PLACEHOLDERVAR");
WRITE_NODE_FIELD(phexpr);
WRITE_BITMAPSET_FIELD(phrels);
WRITE_UINT_FIELD(phid);
WRITE_UINT_FIELD(phlevelsup);
}
static void
_outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node)
{
WRITE_NODE_TYPE("SPECIALJOININFO");
WRITE_BITMAPSET_FIELD(min_lefthand);
WRITE_BITMAPSET_FIELD(min_righthand);
Rewrite make_outerjoininfo's construction of min_lefthand and min_righthand sets for outer joins, in the light of bug #3588 and additional thought and experimentation. The original methodology was fatally flawed for nests of more than two outer joins: it got the relationships between adjacent joins right, but didn't always come to the right conclusions about whether a join could be interchanged with one two or more levels below it. This was largely caused by a mistaken idea that we should use the min_lefthand + min_righthand sets of a sub-join as the minimum left or right input set of an upper join when we conclude that the sub-join can't commute with the upper one. If there's a still-lower join that the sub-join *can* commute with, this method led us to think that that one could commute with the topmost join; which it can't. Another problem (not directly connected to bug #3588) was that make_outerjoininfo's processing-order-dependent method for enforcing outer join identity #3 didn't work right: if we decided that join A could safely commute with lower join B, we dropped all information about sub-joins under B that join A could perhaps not safely commute with, because we removed B's entire min_righthand from A's. To fix, make an explicit computation of all inner join combinations that occur below an outer join, and add to that the full syntactic relsets of any lower outer joins that we determine it can't commute with. This method gives much more direct enforcement of the outer join rearrangement identities, and it turns out not to cost a lot of additional bookkeeping. Thanks to Richard Harris for the bug report and test case.
2007-08-31 03:44:06 +02:00
WRITE_BITMAPSET_FIELD(syn_lefthand);
WRITE_BITMAPSET_FIELD(syn_righthand);
WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_BOOL_FIELD(lhs_strict);
WRITE_BOOL_FIELD(delay_upper_joins);
Improve planner's cost estimation in the presence of semijoins. If we have a semijoin, say SELECT * FROM x WHERE x1 IN (SELECT y1 FROM y) and we're estimating the cost of a parameterized indexscan on x, the number of repetitions of the indexscan should not be taken as the size of y; it'll really only be the number of distinct values of y1, because the only valid plan with y on the outside of a nestloop would require y to be unique-ified before joining it to x. Most of the time this doesn't make that much difference, but sometimes it can lead to drastically underestimating the cost of the indexscan and hence choosing a bad plan, as pointed out by David Kubečka. Fixing this is a bit difficult because parameterized indexscans are costed out quite early in the planning process, before we have the information that would be needed to call estimate_num_groups() and thereby estimate the number of distinct values of the join column(s). However we can move the code that extracts a semijoin RHS's unique-ification columns, so that it's done in initsplan.c rather than on-the-fly in create_unique_path(). That shouldn't make any difference speed-wise and it's really a bit cleaner too. The other bit of information we need is the size of the semijoin RHS, which is easy if it's a single relation (we make those estimates before considering indexscan costs) but problematic if it's a join relation. The solution adopted here is just to use the product of the sizes of the join component rels. That will generally be an overestimate, but since estimate_num_groups() only uses this input as a clamp, an overestimate shouldn't hurt us too badly. In any case we don't allow this new logic to produce a value larger than we would have chosen before, so that at worst an overestimate leaves us no wiser than we were before.
2015-03-12 02:21:00 +01:00
WRITE_BOOL_FIELD(semi_can_btree);
WRITE_BOOL_FIELD(semi_can_hash);
WRITE_NODE_FIELD(semi_operators);
WRITE_NODE_FIELD(semi_rhs_exprs);
}
static void
_outAppendRelInfo(StringInfo str, const AppendRelInfo *node)
{
WRITE_NODE_TYPE("APPENDRELINFO");
WRITE_UINT_FIELD(parent_relid);
WRITE_UINT_FIELD(child_relid);
WRITE_OID_FIELD(parent_reltype);
WRITE_OID_FIELD(child_reltype);
WRITE_NODE_FIELD(translated_vars);
WRITE_OID_FIELD(parent_reloid);
}
static void
_outPlaceHolderInfo(StringInfo str, const PlaceHolderInfo *node)
{
WRITE_NODE_TYPE("PLACEHOLDERINFO");
WRITE_UINT_FIELD(phid);
WRITE_NODE_FIELD(ph_var);
WRITE_BITMAPSET_FIELD(ph_eval_at);
WRITE_BITMAPSET_FIELD(ph_lateral);
WRITE_BITMAPSET_FIELD(ph_needed);
WRITE_INT_FIELD(ph_width);
}
static void
_outMinMaxAggInfo(StringInfo str, const MinMaxAggInfo *node)
{
WRITE_NODE_TYPE("MINMAXAGGINFO");
WRITE_OID_FIELD(aggfnoid);
WRITE_OID_FIELD(aggsortop);
WRITE_NODE_FIELD(target);
/* We intentionally omit subroot --- too large, not interesting enough */
WRITE_NODE_FIELD(path);
WRITE_FLOAT_FIELD(pathcost, "%.2f");
WRITE_NODE_FIELD(param);
}
static void
_outPlannerParamItem(StringInfo str, const PlannerParamItem *node)
{
WRITE_NODE_TYPE("PLANNERPARAMITEM");
WRITE_NODE_FIELD(item);
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH. The planner previously assumed that parameter Vars having the same absolute query level, varno, and varattno could safely be assigned the same runtime PARAM_EXEC slot, even though they might be different Vars appearing in different subqueries. This was (probably) safe before the introduction of CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can be executed during execution of some other subquery, causing the lifespan of Params at the same syntactic nesting level as the CTE to overlap with use of the same slots inside the CTE. In 9.1 we created additional hazards by using the same parameter-assignment technology for nestloop inner scan parameters, but it was broken before that, as illustrated by the added regression test. To fix, restructure the planner's management of PlannerParamItems so that items having different semantic lifespans are kept rigorously separated. This will probably result in complex queries using more runtime PARAM_EXEC slots than before, but the slots are cheap enough that this hardly matters. Also, stop generating PlannerParamItems containing Params for subquery outputs: all we really need to do is reserve the PARAM_EXEC slot number, and that now only takes incrementing a counter. The planning code is simpler and probably faster than before, as well as being more correct. Per report from Vik Reykja. These changes will mostly also need to be made in the back branches, but I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
WRITE_INT_FIELD(paramId);
}
/*****************************************************************************
*
* Stuff from extensible.h
*
*****************************************************************************/
static void
_outExtensibleNode(StringInfo str, const ExtensibleNode *node)
{
const ExtensibleNodeMethods *methods;
methods = GetExtensibleNodeMethods(node->extnodename, false);
WRITE_NODE_TYPE("EXTENSIBLENODE");
WRITE_STRING_FIELD(extnodename);
/* serialize the private fields */
methods->nodeOut(str, node);
}
/*****************************************************************************
*
* Stuff from parsenodes.h.
*
*****************************************************************************/
/*
* print the basic stuff of all nodes that inherit from CreateStmt
*/
static void
_outCreateStmtInfo(StringInfo str, const CreateStmt *node)
{
WRITE_NODE_FIELD(relation);
WRITE_NODE_FIELD(tableElts);
WRITE_NODE_FIELD(inhRelations);
WRITE_NODE_FIELD(ofTypename);
WRITE_NODE_FIELD(constraints);
WRITE_NODE_FIELD(options);
WRITE_ENUM_FIELD(oncommit, OnCommitAction);
WRITE_STRING_FIELD(tablespacename);
WRITE_BOOL_FIELD(if_not_exists);
}
static void
_outCreateStmt(StringInfo str, const CreateStmt *node)
{
WRITE_NODE_TYPE("CREATESTMT");
_outCreateStmtInfo(str, (const CreateStmt *) node);
}
static void
_outCreateForeignTableStmt(StringInfo str, const CreateForeignTableStmt *node)
{
WRITE_NODE_TYPE("CREATEFOREIGNTABLESTMT");
_outCreateStmtInfo(str, (const CreateStmt *) node);
WRITE_STRING_FIELD(servername);
WRITE_NODE_FIELD(options);
}
static void
_outImportForeignSchemaStmt(StringInfo str, const ImportForeignSchemaStmt *node)
{
WRITE_NODE_TYPE("IMPORTFOREIGNSCHEMASTMT");
WRITE_STRING_FIELD(server_name);
WRITE_STRING_FIELD(remote_schema);
WRITE_STRING_FIELD(local_schema);
WRITE_ENUM_FIELD(list_type, ImportForeignSchemaType);
WRITE_NODE_FIELD(table_list);
WRITE_NODE_FIELD(options);
}
static void
_outIndexStmt(StringInfo str, const IndexStmt *node)
{
WRITE_NODE_TYPE("INDEXSTMT");
WRITE_STRING_FIELD(idxname);
WRITE_NODE_FIELD(relation);
WRITE_STRING_FIELD(accessMethod);
WRITE_STRING_FIELD(tableSpace);
WRITE_NODE_FIELD(indexParams);
WRITE_NODE_FIELD(options);
WRITE_NODE_FIELD(whereClause);
WRITE_NODE_FIELD(excludeOpNames);
WRITE_STRING_FIELD(idxcomment);
WRITE_OID_FIELD(indexOid);
WRITE_OID_FIELD(oldNode);
WRITE_BOOL_FIELD(unique);
WRITE_BOOL_FIELD(primary);
WRITE_BOOL_FIELD(isconstraint);
WRITE_BOOL_FIELD(deferrable);
WRITE_BOOL_FIELD(initdeferred);
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
WRITE_BOOL_FIELD(transformed);
WRITE_BOOL_FIELD(concurrent);
Get rid of multiple applications of transformExpr() to the same tree. transformExpr() has for many years had provisions to do nothing when applied to an already-transformed expression tree. However, this was always ugly and of dubious reliability, so we'd be much better off without it. The primary historical reason for it was that gram.y sometimes returned multiple links to the same subexpression, which is no longer true as of my BETWEEN fixes. We'd also grown some lazy hacks in CREATE TABLE LIKE (failing to distinguish between raw and already-transformed index specifications) and one or two other places. This patch removes the need for and support for re-transforming already transformed expressions. The index case is dealt with by adding a flag to struct IndexStmt to indicate that it's already been transformed; which has some benefit anyway in that tablecmds.c can now Assert that transformation has happened rather than just assuming. The other main reason was some rather sloppy code for array type coercion, which can be fixed (and its performance improved too) by refactoring. I did leave transformJoinUsingClause() still constructing expressions containing untransformed operator nodes being applied to Vars, so that transformExpr() still has to allow Var inputs. But that's a much narrower, and safer, special case than before, since Vars will never appear in a raw parse tree, and they don't have any substructure to worry about. In passing fix some oversights in the patch that added CREATE INDEX IF NOT EXISTS (missing processing of IndexStmt.if_not_exists). These appear relatively harmless, but still sloppy coding practice.
2015-02-22 19:59:09 +01:00
WRITE_BOOL_FIELD(if_not_exists);
}
static void
_outNotifyStmt(StringInfo str, const NotifyStmt *node)
{
WRITE_NODE_TYPE("NOTIFY");
WRITE_STRING_FIELD(conditionname);
WRITE_STRING_FIELD(payload);
}
static void
_outDeclareCursorStmt(StringInfo str, const DeclareCursorStmt *node)
{
WRITE_NODE_TYPE("DECLARECURSOR");
WRITE_STRING_FIELD(portalname);
WRITE_INT_FIELD(options);
WRITE_NODE_FIELD(query);
}
static void
_outSelectStmt(StringInfo str, const SelectStmt *node)
{
WRITE_NODE_TYPE("SELECT");
WRITE_NODE_FIELD(distinctClause);
WRITE_NODE_FIELD(intoClause);
WRITE_NODE_FIELD(targetList);
WRITE_NODE_FIELD(fromClause);
WRITE_NODE_FIELD(whereClause);
WRITE_NODE_FIELD(groupClause);
WRITE_NODE_FIELD(havingClause);
WRITE_NODE_FIELD(windowClause);
WRITE_NODE_FIELD(valuesLists);
WRITE_NODE_FIELD(sortClause);
WRITE_NODE_FIELD(limitOffset);
WRITE_NODE_FIELD(limitCount);
WRITE_NODE_FIELD(lockingClause);
WRITE_NODE_FIELD(withClause);
WRITE_ENUM_FIELD(op, SetOperation);
WRITE_BOOL_FIELD(all);
WRITE_NODE_FIELD(larg);
WRITE_NODE_FIELD(rarg);
}
static void
_outFuncCall(StringInfo str, const FuncCall *node)
{
WRITE_NODE_TYPE("FUNCCALL");
WRITE_NODE_FIELD(funcname);
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(agg_order);
WRITE_NODE_FIELD(agg_filter);
Support ordered-set (WITHIN GROUP) aggregates. This patch introduces generic support for ordered-set and hypothetical-set aggregate functions, as well as implementations of the instances defined in SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(), percent_rank(), cume_dist()). We also added mode() though it is not in the spec, as well as versions of percentile_cont() and percentile_disc() that can compute multiple percentile values in one pass over the data. Unlike the original submission, this patch puts full control of the sorting process in the hands of the aggregate's support functions. To allow the support functions to find out how they're supposed to sort, a new API function AggGetAggref() is added to nodeAgg.c. This allows retrieval of the aggregate call's Aggref node, which may have other uses beyond the immediate need. There is also support for ordered-set aggregates to install cleanup callback functions, so that they can be sure that infrastructure such as tuplesort objects gets cleaned up. In passing, make some fixes in the recently-added support for variadic aggregates, and make some editorial adjustments in the recent FILTER additions for aggregates. Also, simplify use of IsBinaryCoercible() by allowing it to succeed whenever the target type is ANY or ANYELEMENT. It was inconsistent that it dealt with other polymorphic target types but not these. Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing, and rather heavily editorialized upon by Tom Lane
2013-12-23 22:11:35 +01:00
WRITE_BOOL_FIELD(agg_within_group);
WRITE_BOOL_FIELD(agg_star);
WRITE_BOOL_FIELD(agg_distinct);
WRITE_BOOL_FIELD(func_variadic);
WRITE_NODE_FIELD(over);
WRITE_LOCATION_FIELD(location);
}
static void
_outDefElem(StringInfo str, const DefElem *node)
{
WRITE_NODE_TYPE("DEFELEM");
WRITE_STRING_FIELD(defnamespace);
WRITE_STRING_FIELD(defname);
WRITE_NODE_FIELD(arg);
WRITE_ENUM_FIELD(defaction, DefElemAction);
WRITE_LOCATION_FIELD(location);
}
static void
_outTableLikeClause(StringInfo str, const TableLikeClause *node)
{
WRITE_NODE_TYPE("TABLELIKECLAUSE");
WRITE_NODE_FIELD(relation);
WRITE_UINT_FIELD(options);
}
static void
_outLockingClause(StringInfo str, const LockingClause *node)
{
WRITE_NODE_TYPE("LOCKINGCLAUSE");
WRITE_NODE_FIELD(lockedRels);
Improve concurrency of foreign key locking This patch introduces two additional lock modes for tuples: "SELECT FOR KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each other, in contrast with already existing "SELECT FOR SHARE" and "SELECT FOR UPDATE". UPDATE commands that do not modify the values stored in the columns that are part of the key of the tuple now grab a SELECT FOR NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently with tuple locks of the FOR KEY SHARE variety. Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this means the concurrency improvement applies to them, which is the whole point of this patch. The added tuple lock semantics require some rejiggering of the multixact module, so that the locking level that each transaction is holding can be stored alongside its Xid. Also, multixacts now need to persist across server restarts and crashes, because they can now represent not only tuple locks, but also tuple updates. This means we need more careful tracking of lifetime of pg_multixact SLRU files; since they now persist longer, we require more infrastructure to figure out when they can be removed. pg_upgrade also needs to be careful to copy pg_multixact files over from the old server to the new, or at least part of multixact.c state, depending on the versions of the old and new servers. Tuple time qualification rules (HeapTupleSatisfies routines) need to be careful not to consider tuples with the "is multi" infomask bit set as being only locked; they might need to look up MultiXact values (i.e. possibly do pg_multixact I/O) to find out the Xid that updated a tuple, whereas they previously were assured to only use information readily available from the tuple header. This is considered acceptable, because the extra I/O would involve cases that would previously cause some commands to block waiting for concurrent transactions to finish. Another important change is the fact that locking tuples that have previously been updated causes the future versions to be marked as locked, too; this is essential for correctness of foreign key checks. This causes additional WAL-logging, also (there was previously a single WAL record for a locked tuple; now there are as many as updated copies of the tuple there exist.) With all this in place, contention related to tuples being checked by foreign key rules should be much reduced. As a bonus, the old behavior that a subtransaction grabbing a stronger tuple lock than the parent (sub)transaction held on a given tuple and later aborting caused the weaker lock to be lost, has been fixed. Many new spec files were added for isolation tester framework, to ensure overall behavior is sane. There's probably room for several more tests. There were several reviewers of this patch; in particular, Noah Misch and Andres Freund spent considerable time in it. Original idea for the patch came from Simon Riggs, after a problem report by Joel Jacobson. Most code is from me, with contributions from Marti Raudsepp, Alexander Shulgin, Noah Misch and Andres Freund. This patch was discussed in several pgsql-hackers threads; the most important start at the following message-ids: AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com 1290721684-sup-3951@alvh.no-ip.org 1294953201-sup-2099@alvh.no-ip.org 1320343602-sup-2290@alvh.no-ip.org 1339690386-sup-8927@alvh.no-ip.org 4FE5FF020200002500048A3D@gw.wicourts.gov 4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
WRITE_ENUM_FIELD(strength, LockClauseStrength);
WRITE_ENUM_FIELD(waitPolicy, LockWaitPolicy);
}
static void
_outXmlSerialize(StringInfo str, const XmlSerialize *node)
{
WRITE_NODE_TYPE("XMLSERIALIZE");
WRITE_ENUM_FIELD(xmloption, XmlOptionType);
WRITE_NODE_FIELD(expr);
WRITE_NODE_FIELD(typeName);
WRITE_LOCATION_FIELD(location);
}
static void
_outColumnDef(StringInfo str, const ColumnDef *node)
{
WRITE_NODE_TYPE("COLUMNDEF");
WRITE_STRING_FIELD(colname);
WRITE_NODE_FIELD(typeName);
WRITE_INT_FIELD(inhcount);
WRITE_BOOL_FIELD(is_local);
WRITE_BOOL_FIELD(is_not_null);
Remove collation information from TypeName, where it does not belong. The initial collations patch treated a COLLATE spec as part of a TypeName, following what can only be described as brain fade on the part of the SQL committee. It's a lot more reasonable to treat COLLATE as a syntactically separate object, so that it can be added in only the productions where it actually belongs, rather than needing to reject it in a boatload of places where it doesn't belong (something the original patch mostly failed to do). In addition this change lets us meet the spec's requirement to allow COLLATE anywhere in the clauses of a ColumnDef, and it avoids unfriendly behavior for constructs such as "foo::type COLLATE collation". To do this, pull collation information out of TypeName and put it in ColumnDef instead, thus reverting most of the collation-related changes in parse_type.c's API. I made one additional structural change, which was to use a ColumnDef as an intermediate node in AT_AlterColumnType AlterTableCmd nodes. This provides enough room to get rid of the "transform" wart in AlterTableCmd too, since the ColumnDef can carry the USING expression easily enough. Also fix some other minor bugs that have crept in in the same areas, like failure to copy recently-added fields of ColumnDef in copyfuncs.c. While at it, document the formerly secret ability to specify a collation in ALTER TABLE ALTER COLUMN TYPE, ALTER TYPE ADD ATTRIBUTE, and ALTER TYPE ALTER ATTRIBUTE TYPE; and correct some misstatements about what the default collation selection will be when COLLATE is omitted. BTW, the three-parameter form of format_type() should go away too, since it just contributes to the confusion in this area; but I'll do that in a separate patch.
2011-03-10 04:38:52 +01:00
WRITE_BOOL_FIELD(is_from_type);
WRITE_CHAR_FIELD(storage);
WRITE_NODE_FIELD(raw_default);
WRITE_NODE_FIELD(cooked_default);
Remove collation information from TypeName, where it does not belong. The initial collations patch treated a COLLATE spec as part of a TypeName, following what can only be described as brain fade on the part of the SQL committee. It's a lot more reasonable to treat COLLATE as a syntactically separate object, so that it can be added in only the productions where it actually belongs, rather than needing to reject it in a boatload of places where it doesn't belong (something the original patch mostly failed to do). In addition this change lets us meet the spec's requirement to allow COLLATE anywhere in the clauses of a ColumnDef, and it avoids unfriendly behavior for constructs such as "foo::type COLLATE collation". To do this, pull collation information out of TypeName and put it in ColumnDef instead, thus reverting most of the collation-related changes in parse_type.c's API. I made one additional structural change, which was to use a ColumnDef as an intermediate node in AT_AlterColumnType AlterTableCmd nodes. This provides enough room to get rid of the "transform" wart in AlterTableCmd too, since the ColumnDef can carry the USING expression easily enough. Also fix some other minor bugs that have crept in in the same areas, like failure to copy recently-added fields of ColumnDef in copyfuncs.c. While at it, document the formerly secret ability to specify a collation in ALTER TABLE ALTER COLUMN TYPE, ALTER TYPE ADD ATTRIBUTE, and ALTER TYPE ALTER ATTRIBUTE TYPE; and correct some misstatements about what the default collation selection will be when COLLATE is omitted. BTW, the three-parameter form of format_type() should go away too, since it just contributes to the confusion in this area; but I'll do that in a separate patch.
2011-03-10 04:38:52 +01:00
WRITE_NODE_FIELD(collClause);
WRITE_OID_FIELD(collOid);
WRITE_NODE_FIELD(constraints);
WRITE_NODE_FIELD(fdwoptions);
WRITE_LOCATION_FIELD(location);
}
static void
_outTypeName(StringInfo str, const TypeName *node)
{
WRITE_NODE_TYPE("TYPENAME");
WRITE_NODE_FIELD(names);
WRITE_OID_FIELD(typeOid);
WRITE_BOOL_FIELD(setof);
WRITE_BOOL_FIELD(pct_type);
WRITE_NODE_FIELD(typmods);
WRITE_INT_FIELD(typemod);
WRITE_NODE_FIELD(arrayBounds);
WRITE_LOCATION_FIELD(location);
}
static void
_outTypeCast(StringInfo str, const TypeCast *node)
{
WRITE_NODE_TYPE("TYPECAST");
WRITE_NODE_FIELD(arg);
WRITE_NODE_FIELD(typeName);
WRITE_LOCATION_FIELD(location);
}
static void
_outCollateClause(StringInfo str, const CollateClause *node)
{
WRITE_NODE_TYPE("COLLATECLAUSE");
WRITE_NODE_FIELD(arg);
WRITE_NODE_FIELD(collname);
WRITE_LOCATION_FIELD(location);
}
static void
_outIndexElem(StringInfo str, const IndexElem *node)
{
WRITE_NODE_TYPE("INDEXELEM");
WRITE_STRING_FIELD(name);
WRITE_NODE_FIELD(expr);
WRITE_STRING_FIELD(indexcolname);
WRITE_NODE_FIELD(collation);
WRITE_NODE_FIELD(opclass);
WRITE_ENUM_FIELD(ordering, SortByDir);
WRITE_ENUM_FIELD(nulls_ordering, SortByNulls);
}
static void
_outQuery(StringInfo str, const Query *node)
{
WRITE_NODE_TYPE("QUERY");
WRITE_ENUM_FIELD(commandType, CmdType);
WRITE_ENUM_FIELD(querySource, QuerySource);
/* we intentionally do not print the queryId field */
WRITE_BOOL_FIELD(canSetTag);
/*
* Hack to work around missing outfuncs routines for a lot of the
* utility-statement node types. (The only one we actually *need* for
2005-10-15 04:49:52 +02:00
* rules support is NotifyStmt.) Someday we ought to support 'em all, but
* for the meantime do this to avoid getting lots of warnings when running
* with debug_print_parse on.
*/
if (node->utilityStmt)
{
switch (nodeTag(node->utilityStmt))
{
case T_CreateStmt:
case T_IndexStmt:
case T_NotifyStmt:
case T_DeclareCursorStmt:
WRITE_NODE_FIELD(utilityStmt);
break;
default:
appendStringInfoString(str, " :utilityStmt ?");
break;
}
}
else
appendStringInfoString(str, " :utilityStmt <>");
WRITE_INT_FIELD(resultRelation);
WRITE_BOOL_FIELD(hasAggs);
WRITE_BOOL_FIELD(hasWindowFuncs);
Improve parser's and planner's handling of set-returning functions. Teach the parser to reject misplaced set-returning functions during parse analysis using p_expr_kind, in much the same way as we do for aggregates and window functions (cf commit eaccfded9). While this isn't complete (it misses nesting-based restrictions), it's much better than the previous error reporting for such cases, and it allows elimination of assorted ad-hoc expression_returns_set() error checks. We could add nesting checks later if it seems important to catch all cases at parse time. There is one case the parser will now throw error for although previous versions allowed it, which is SRFs in the tlist of an UPDATE. That never behaved sensibly (since it's ill-defined which generated row should be used to perform the update) and it's hard to see why it should not be treated as an error. It's a release-note-worthy change though. Also, add a new Query field hasTargetSRFs reporting whether there are any SRFs in the targetlist (including GROUP BY/ORDER BY expressions). The parser can now set that basically for free during parse analysis, and we can use it in a number of places to avoid expression_returns_set searches. (There will be more such checks soon.) In some places, this allows decontorting the logic since it's no longer expensive to check for SRFs in the tlist --- so I made the checks parallel to the handling of hasAggs/hasWindowFuncs wherever it seemed appropriate. catversion bump because adding a Query field changes stored rules. Andres Freund and Tom Lane Discussion: <24639.1473782855@sss.pgh.pa.us>
2016-09-13 19:54:24 +02:00
WRITE_BOOL_FIELD(hasTargetSRFs);
WRITE_BOOL_FIELD(hasSubLinks);
WRITE_BOOL_FIELD(hasDistinctOn);
WRITE_BOOL_FIELD(hasRecursive);
WRITE_BOOL_FIELD(hasModifyingCTE);
WRITE_BOOL_FIELD(hasForUpdate);
Row-Level Security Policies (RLS) Building on the updatable security-barrier views work, add the ability to define policies on tables to limit the set of rows which are returned from a query and which are allowed to be added to a table. Expressions defined by the policy for filtering are added to the security barrier quals of the query, while expressions defined to check records being added to a table are added to the with-check options of the query. New top-level commands are CREATE/ALTER/DROP POLICY and are controlled by the table owner. Row Security is able to be enabled and disabled by the owner on a per-table basis using ALTER TABLE .. ENABLE/DISABLE ROW SECURITY. Per discussion, ROW SECURITY is disabled on tables by default and must be enabled for policies on the table to be used. If no policies exist on a table with ROW SECURITY enabled, a default-deny policy is used and no records will be visible. By default, row security is applied at all times except for the table owner and the superuser. A new GUC, row_security, is added which can be set to ON, OFF, or FORCE. When set to FORCE, row security will be applied even for the table owner and superusers. When set to OFF, row security will be disabled when allowed and an error will be thrown if the user does not have rights to bypass row security. Per discussion, pg_dump sets row_security = OFF by default to ensure that exports and backups will have all data in the table or will error if there are insufficient privileges to bypass row security. A new option has been added to pg_dump, --enable-row-security, to ask pg_dump to export with row security enabled. A new role capability, BYPASSRLS, which can only be set by the superuser, is added to allow other users to be able to bypass row security using row_security = OFF. Many thanks to the various individuals who have helped with the design, particularly Robert Haas for his feedback. Authors include Craig Ringer, KaiGai Kohei, Adam Brightwell, Dean Rasheed, with additional changes and rework by me. Reviewers have included all of the above, Greg Smith, Jeff McCormick, and Robert Haas.
2014-09-19 17:18:35 +02:00
WRITE_BOOL_FIELD(hasRowSecurity);
WRITE_NODE_FIELD(cteList);
WRITE_NODE_FIELD(rtable);
WRITE_NODE_FIELD(jointree);
WRITE_NODE_FIELD(targetList);
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
WRITE_NODE_FIELD(onConflict);
WRITE_NODE_FIELD(returningList);
WRITE_NODE_FIELD(groupClause);
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
WRITE_NODE_FIELD(groupingSets);
WRITE_NODE_FIELD(havingQual);
WRITE_NODE_FIELD(windowClause);
WRITE_NODE_FIELD(distinctClause);
WRITE_NODE_FIELD(sortClause);
WRITE_NODE_FIELD(limitOffset);
WRITE_NODE_FIELD(limitCount);
WRITE_NODE_FIELD(rowMarks);
WRITE_NODE_FIELD(setOperations);
WRITE_NODE_FIELD(constraintDeps);
}
static void
_outWithCheckOption(StringInfo str, const WithCheckOption *node)
{
WRITE_NODE_TYPE("WITHCHECKOPTION");
WRITE_ENUM_FIELD(kind, WCOKind);
WRITE_STRING_FIELD(relname);
WRITE_STRING_FIELD(polname);
WRITE_NODE_FIELD(qual);
WRITE_BOOL_FIELD(cascaded);
}
static void
_outSortGroupClause(StringInfo str, const SortGroupClause *node)
{
WRITE_NODE_TYPE("SORTGROUPCLAUSE");
WRITE_UINT_FIELD(tleSortGroupRef);
WRITE_OID_FIELD(eqop);
WRITE_OID_FIELD(sortop);
WRITE_BOOL_FIELD(nulls_first);
WRITE_BOOL_FIELD(hashable);
}
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
static void
_outGroupingSet(StringInfo str, const GroupingSet *node)
{
WRITE_NODE_TYPE("GROUPINGSET");
WRITE_ENUM_FIELD(kind, GroupingSetKind);
WRITE_NODE_FIELD(content);
WRITE_LOCATION_FIELD(location);
}
static void
_outWindowClause(StringInfo str, const WindowClause *node)
{
WRITE_NODE_TYPE("WINDOWCLAUSE");
WRITE_STRING_FIELD(name);
WRITE_STRING_FIELD(refname);
WRITE_NODE_FIELD(partitionClause);
WRITE_NODE_FIELD(orderClause);
WRITE_INT_FIELD(frameOptions);
WRITE_NODE_FIELD(startOffset);
WRITE_NODE_FIELD(endOffset);
WRITE_UINT_FIELD(winref);
WRITE_BOOL_FIELD(copiedOrder);
}
static void
_outRowMarkClause(StringInfo str, const RowMarkClause *node)
{
WRITE_NODE_TYPE("ROWMARKCLAUSE");
WRITE_UINT_FIELD(rti);
Improve concurrency of foreign key locking This patch introduces two additional lock modes for tuples: "SELECT FOR KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each other, in contrast with already existing "SELECT FOR SHARE" and "SELECT FOR UPDATE". UPDATE commands that do not modify the values stored in the columns that are part of the key of the tuple now grab a SELECT FOR NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently with tuple locks of the FOR KEY SHARE variety. Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this means the concurrency improvement applies to them, which is the whole point of this patch. The added tuple lock semantics require some rejiggering of the multixact module, so that the locking level that each transaction is holding can be stored alongside its Xid. Also, multixacts now need to persist across server restarts and crashes, because they can now represent not only tuple locks, but also tuple updates. This means we need more careful tracking of lifetime of pg_multixact SLRU files; since they now persist longer, we require more infrastructure to figure out when they can be removed. pg_upgrade also needs to be careful to copy pg_multixact files over from the old server to the new, or at least part of multixact.c state, depending on the versions of the old and new servers. Tuple time qualification rules (HeapTupleSatisfies routines) need to be careful not to consider tuples with the "is multi" infomask bit set as being only locked; they might need to look up MultiXact values (i.e. possibly do pg_multixact I/O) to find out the Xid that updated a tuple, whereas they previously were assured to only use information readily available from the tuple header. This is considered acceptable, because the extra I/O would involve cases that would previously cause some commands to block waiting for concurrent transactions to finish. Another important change is the fact that locking tuples that have previously been updated causes the future versions to be marked as locked, too; this is essential for correctness of foreign key checks. This causes additional WAL-logging, also (there was previously a single WAL record for a locked tuple; now there are as many as updated copies of the tuple there exist.) With all this in place, contention related to tuples being checked by foreign key rules should be much reduced. As a bonus, the old behavior that a subtransaction grabbing a stronger tuple lock than the parent (sub)transaction held on a given tuple and later aborting caused the weaker lock to be lost, has been fixed. Many new spec files were added for isolation tester framework, to ensure overall behavior is sane. There's probably room for several more tests. There were several reviewers of this patch; in particular, Noah Misch and Andres Freund spent considerable time in it. Original idea for the patch came from Simon Riggs, after a problem report by Joel Jacobson. Most code is from me, with contributions from Marti Raudsepp, Alexander Shulgin, Noah Misch and Andres Freund. This patch was discussed in several pgsql-hackers threads; the most important start at the following message-ids: AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com 1290721684-sup-3951@alvh.no-ip.org 1294953201-sup-2099@alvh.no-ip.org 1320343602-sup-2290@alvh.no-ip.org 1339690386-sup-8927@alvh.no-ip.org 4FE5FF020200002500048A3D@gw.wicourts.gov 4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
WRITE_ENUM_FIELD(strength, LockClauseStrength);
WRITE_ENUM_FIELD(waitPolicy, LockWaitPolicy);
WRITE_BOOL_FIELD(pushedDown);
}
static void
_outWithClause(StringInfo str, const WithClause *node)
{
WRITE_NODE_TYPE("WITHCLAUSE");
WRITE_NODE_FIELD(ctes);
WRITE_BOOL_FIELD(recursive);
WRITE_LOCATION_FIELD(location);
}
static void
_outCommonTableExpr(StringInfo str, const CommonTableExpr *node)
{
WRITE_NODE_TYPE("COMMONTABLEEXPR");
WRITE_STRING_FIELD(ctename);
WRITE_NODE_FIELD(aliascolnames);
WRITE_NODE_FIELD(ctequery);
WRITE_LOCATION_FIELD(location);
WRITE_BOOL_FIELD(cterecursive);
WRITE_INT_FIELD(cterefcount);
WRITE_NODE_FIELD(ctecolnames);
WRITE_NODE_FIELD(ctecoltypes);
WRITE_NODE_FIELD(ctecoltypmods);
WRITE_NODE_FIELD(ctecolcollations);
}
static void
_outSetOperationStmt(StringInfo str, const SetOperationStmt *node)
{
WRITE_NODE_TYPE("SETOPERATIONSTMT");
WRITE_ENUM_FIELD(op, SetOperation);
WRITE_BOOL_FIELD(all);
WRITE_NODE_FIELD(larg);
WRITE_NODE_FIELD(rarg);
WRITE_NODE_FIELD(colTypes);
WRITE_NODE_FIELD(colTypmods);
WRITE_NODE_FIELD(colCollations);
WRITE_NODE_FIELD(groupClauses);
}
static void
_outRangeTblEntry(StringInfo str, const RangeTblEntry *node)
{
WRITE_NODE_TYPE("RTE");
/* put alias + eref first to make dump more legible */
WRITE_NODE_FIELD(alias);
WRITE_NODE_FIELD(eref);
WRITE_ENUM_FIELD(rtekind, RTEKind);
switch (node->rtekind)
{
case RTE_RELATION:
WRITE_OID_FIELD(relid);
WRITE_CHAR_FIELD(relkind);
WRITE_NODE_FIELD(tablesample);
break;
case RTE_SUBQUERY:
WRITE_NODE_FIELD(subquery);
WRITE_BOOL_FIELD(security_barrier);
break;
case RTE_JOIN:
WRITE_ENUM_FIELD(jointype, JoinType);
WRITE_NODE_FIELD(joinaliasvars);
break;
case RTE_FUNCTION:
WRITE_NODE_FIELD(functions);
WRITE_BOOL_FIELD(funcordinality);
break;
case RTE_VALUES:
WRITE_NODE_FIELD(values_lists);
WRITE_NODE_FIELD(values_collations);
break;
case RTE_CTE:
WRITE_STRING_FIELD(ctename);
WRITE_UINT_FIELD(ctelevelsup);
WRITE_BOOL_FIELD(self_reference);
WRITE_NODE_FIELD(ctecoltypes);
WRITE_NODE_FIELD(ctecoltypmods);
WRITE_NODE_FIELD(ctecolcollations);
break;
default:
elog(ERROR, "unrecognized RTE kind: %d", (int) node->rtekind);
break;
}
WRITE_BOOL_FIELD(lateral);
WRITE_BOOL_FIELD(inh);
WRITE_BOOL_FIELD(inFromCl);
WRITE_UINT_FIELD(requiredPerms);
WRITE_OID_FIELD(checkAsUser);
WRITE_BITMAPSET_FIELD(selectedCols);
WRITE_BITMAPSET_FIELD(insertedCols);
WRITE_BITMAPSET_FIELD(updatedCols);
WRITE_NODE_FIELD(securityQuals);
}
static void
_outRangeTblFunction(StringInfo str, const RangeTblFunction *node)
{
WRITE_NODE_TYPE("RANGETBLFUNCTION");
WRITE_NODE_FIELD(funcexpr);
WRITE_INT_FIELD(funccolcount);
WRITE_NODE_FIELD(funccolnames);
WRITE_NODE_FIELD(funccoltypes);
WRITE_NODE_FIELD(funccoltypmods);
WRITE_NODE_FIELD(funccolcollations);
WRITE_BITMAPSET_FIELD(funcparams);
}
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
static void
_outTableSampleClause(StringInfo str, const TableSampleClause *node)
{
WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
WRITE_OID_FIELD(tsmhandler);
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(repeatable);
}
static void
_outAExpr(StringInfo str, const A_Expr *node)
{
WRITE_NODE_TYPE("AEXPR");
switch (node->kind)
{
case AEXPR_OP:
appendStringInfoChar(str, ' ');
WRITE_NODE_FIELD(name);
break;
case AEXPR_OP_ANY:
appendStringInfoChar(str, ' ');
WRITE_NODE_FIELD(name);
appendStringInfoString(str, " ANY ");
break;
case AEXPR_OP_ALL:
appendStringInfoChar(str, ' ');
WRITE_NODE_FIELD(name);
appendStringInfoString(str, " ALL ");
break;
case AEXPR_DISTINCT:
appendStringInfoString(str, " DISTINCT ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_NOT_DISTINCT:
appendStringInfoString(str, " NOT_DISTINCT ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_NULLIF:
appendStringInfoString(str, " NULLIF ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_OF:
appendStringInfoString(str, " OF ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_IN:
appendStringInfoString(str, " IN ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_LIKE:
appendStringInfoString(str, " LIKE ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_ILIKE:
appendStringInfoString(str, " ILIKE ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_SIMILAR:
appendStringInfoString(str, " SIMILAR ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_BETWEEN:
appendStringInfoString(str, " BETWEEN ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_NOT_BETWEEN:
appendStringInfoString(str, " NOT_BETWEEN ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_BETWEEN_SYM:
appendStringInfoString(str, " BETWEEN_SYM ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_NOT_BETWEEN_SYM:
appendStringInfoString(str, " NOT_BETWEEN_SYM ");
WRITE_NODE_FIELD(name);
break;
Make operator precedence follow the SQL standard more closely. While the SQL standard is pretty vague on the overall topic of operator precedence (because it never presents a unified BNF for all expressions), it does seem reasonable to conclude from the spec for <boolean value expression> that OR has the lowest precedence, then AND, then NOT, then IS tests, then the six standard comparison operators, then everything else (since any non-boolean operator in a WHERE clause would need to be an argument of one of these). We were only sort of on board with that: most notably, while "<" ">" and "=" had properly low precedence, "<=" ">=" and "<>" were treated as generic operators and so had significantly higher precedence. And "IS" tests were even higher precedence than those, which is very clearly wrong per spec. Another problem was that "foo NOT SOMETHING bar" constructs, such as "x NOT LIKE y", were treated inconsistently because of a bison implementation artifact: they had the documented precedence with respect to operators to their right, but behaved like NOT (i.e., very low priority) with respect to operators to their left. Fixing the precedence issues is just a small matter of rearranging the precedence declarations in gram.y, except for the NOT problem, which requires adding an additional lookahead case in base_yylex() so that we can attach a different token precedence to NOT LIKE and allied two-word operators. The bulk of this patch is not the bug fix per se, but adding logic to parse_expr.c to allow giving warnings if an expression has changed meaning because of these precedence changes. These warnings are off by default and are enabled by the new GUC operator_precedence_warning. It's believed that very few applications will be affected by these changes, but it was agreed that a warning mechanism is essential to help debug any that are.
2015-03-11 18:22:52 +01:00
case AEXPR_PAREN:
appendStringInfoString(str, " PAREN");
break;
2000-05-26 00:43:12 +02:00
default:
appendStringInfoString(str, " ??");
2000-05-26 00:43:12 +02:00
break;
}
WRITE_NODE_FIELD(lexpr);
WRITE_NODE_FIELD(rexpr);
WRITE_LOCATION_FIELD(location);
}
static void
_outValue(StringInfo str, const Value *value)
{
switch (value->type)
{
case T_Integer:
appendStringInfo(str, "%ld", value->val.ival);
break;
case T_Float:
2004-08-29 07:07:03 +02:00
/*
2005-10-15 04:49:52 +02:00
* We assume the value is a valid numeric literal and so does not
* need quoting.
*/
appendStringInfoString(str, value->val.str);
break;
case T_String:
Partial fix for dropped columns in functions returning composite. When a view has a function-returning-composite in FROM, and there are some dropped columns in the underlying composite type, ruleutils.c printed junk in the column alias list for the reconstructed FROM entry. Before 9.3, this was prevented by doing get_rte_attribute_is_dropped tests while printing the column alias list; but that solution is not currently available to us for reasons I'll explain below. Instead, check for empty-string entries in the alias list, which can only exist if that column position had been dropped at the time the view was made. (The parser fills in empty strings to preserve the invariant that the aliases correspond to physical column positions.) While this is sufficient to handle the case of columns dropped before the view was made, we have still got issues with columns dropped after the view was made. In particular, the view could contain Vars that explicitly reference such columns! The dependency machinery really ought to refuse the column drop attempt in such cases, as it would do when trying to drop a table column that's explicitly referenced in views. However, we currently neglect to store dependencies on columns of composite types, and fixing that is likely to be too big to be back-patchable (not to mention that existing views in existing databases would not have the needed pg_depend entries anyway). So I'll leave that for a separate patch. Pre-9.3, ruleutils would print such Vars normally (with their original column names) even though it suppressed their entries in the RTE's column alias list. This is certainly bogus, since the printed view definition would fail to reload, but at least it didn't crash. However, as of 9.3 the printed column alias list is tightly tied to the names printed for Vars; so we can't treat columns as dropped for one purpose and not dropped for the other. This is why we can't just put back the get_rte_attribute_is_dropped test: it results in an assertion failure if the view in fact contains any Vars referencing the dropped column. Once we've got dependencies preventing such cases, we'll probably want to do it that way instead of relying on the empty-string test used here. This fix turned up a very ancient bug in outfuncs/readfuncs, namely that T_String nodes containing empty strings were not dumped/reloaded correctly: the node was printed as "<>" which is read as a string value of <>. Since (per SQL) we disallow empty-string identifiers, such nodes don't occur normally, which is why we'd not noticed. (Such nodes aren't used for literal constants, just identifiers.) Per report from Marc Schablewski. Back-patch to 9.3 which is where the rule printing behavior changed. The dangling-variable case is broken all the way back, but that's not what his complaint is about.
2014-07-19 20:28:22 +02:00
/*
* We use _outToken to provide escaping of the string's content,
* but we don't want it to do anything with an empty string.
*/
appendStringInfoChar(str, '"');
Partial fix for dropped columns in functions returning composite. When a view has a function-returning-composite in FROM, and there are some dropped columns in the underlying composite type, ruleutils.c printed junk in the column alias list for the reconstructed FROM entry. Before 9.3, this was prevented by doing get_rte_attribute_is_dropped tests while printing the column alias list; but that solution is not currently available to us for reasons I'll explain below. Instead, check for empty-string entries in the alias list, which can only exist if that column position had been dropped at the time the view was made. (The parser fills in empty strings to preserve the invariant that the aliases correspond to physical column positions.) While this is sufficient to handle the case of columns dropped before the view was made, we have still got issues with columns dropped after the view was made. In particular, the view could contain Vars that explicitly reference such columns! The dependency machinery really ought to refuse the column drop attempt in such cases, as it would do when trying to drop a table column that's explicitly referenced in views. However, we currently neglect to store dependencies on columns of composite types, and fixing that is likely to be too big to be back-patchable (not to mention that existing views in existing databases would not have the needed pg_depend entries anyway). So I'll leave that for a separate patch. Pre-9.3, ruleutils would print such Vars normally (with their original column names) even though it suppressed their entries in the RTE's column alias list. This is certainly bogus, since the printed view definition would fail to reload, but at least it didn't crash. However, as of 9.3 the printed column alias list is tightly tied to the names printed for Vars; so we can't treat columns as dropped for one purpose and not dropped for the other. This is why we can't just put back the get_rte_attribute_is_dropped test: it results in an assertion failure if the view in fact contains any Vars referencing the dropped column. Once we've got dependencies preventing such cases, we'll probably want to do it that way instead of relying on the empty-string test used here. This fix turned up a very ancient bug in outfuncs/readfuncs, namely that T_String nodes containing empty strings were not dumped/reloaded correctly: the node was printed as "<>" which is read as a string value of <>. Since (per SQL) we disallow empty-string identifiers, such nodes don't occur normally, which is why we'd not noticed. (Such nodes aren't used for literal constants, just identifiers.) Per report from Marc Schablewski. Back-patch to 9.3 which is where the rule printing behavior changed. The dangling-variable case is broken all the way back, but that's not what his complaint is about.
2014-07-19 20:28:22 +02:00
if (value->val.str[0] != '\0')
_outToken(str, value->val.str);
appendStringInfoChar(str, '"');
break;
case T_BitString:
/* internal representation already has leading 'b' */
appendStringInfoString(str, value->val.str);
break;
case T_Null:
/* this is seen only within A_Const, not in transformed trees */
appendStringInfoString(str, "NULL");
break;
default:
elog(ERROR, "unrecognized node type: %d", (int) value->type);
break;
}
}
static void
_outColumnRef(StringInfo str, const ColumnRef *node)
{
WRITE_NODE_TYPE("COLUMNREF");
WRITE_NODE_FIELD(fields);
WRITE_LOCATION_FIELD(location);
}
static void
_outParamRef(StringInfo str, const ParamRef *node)
{
WRITE_NODE_TYPE("PARAMREF");
WRITE_INT_FIELD(number);
WRITE_LOCATION_FIELD(location);
}
static void
_outAConst(StringInfo str, const A_Const *node)
{
WRITE_NODE_TYPE("A_CONST");
appendStringInfoString(str, " :val ");
_outValue(str, &(node->val));
WRITE_LOCATION_FIELD(location);
}
static void
_outA_Star(StringInfo str, const A_Star *node)
{
WRITE_NODE_TYPE("A_STAR");
}
static void
_outA_Indices(StringInfo str, const A_Indices *node)
{
WRITE_NODE_TYPE("A_INDICES");
WRITE_BOOL_FIELD(is_slice);
WRITE_NODE_FIELD(lidx);
WRITE_NODE_FIELD(uidx);
}
static void
_outA_Indirection(StringInfo str, const A_Indirection *node)
{
WRITE_NODE_TYPE("A_INDIRECTION");
WRITE_NODE_FIELD(arg);
WRITE_NODE_FIELD(indirection);
}
static void
_outA_ArrayExpr(StringInfo str, const A_ArrayExpr *node)
{
WRITE_NODE_TYPE("A_ARRAYEXPR");
WRITE_NODE_FIELD(elements);
WRITE_LOCATION_FIELD(location);
}
static void
_outResTarget(StringInfo str, const ResTarget *node)
{
WRITE_NODE_TYPE("RESTARGET");
WRITE_STRING_FIELD(name);
WRITE_NODE_FIELD(indirection);
WRITE_NODE_FIELD(val);
WRITE_LOCATION_FIELD(location);
}
static void
_outMultiAssignRef(StringInfo str, const MultiAssignRef *node)
{
WRITE_NODE_TYPE("MULTIASSIGNREF");
WRITE_NODE_FIELD(source);
WRITE_INT_FIELD(colno);
WRITE_INT_FIELD(ncolumns);
}
static void
_outSortBy(StringInfo str, const SortBy *node)
{
WRITE_NODE_TYPE("SORTBY");
WRITE_NODE_FIELD(node);
WRITE_ENUM_FIELD(sortby_dir, SortByDir);
WRITE_ENUM_FIELD(sortby_nulls, SortByNulls);
WRITE_NODE_FIELD(useOp);
WRITE_LOCATION_FIELD(location);
}
static void
_outWindowDef(StringInfo str, const WindowDef *node)
{
WRITE_NODE_TYPE("WINDOWDEF");
WRITE_STRING_FIELD(name);
WRITE_STRING_FIELD(refname);
WRITE_NODE_FIELD(partitionClause);
WRITE_NODE_FIELD(orderClause);
WRITE_INT_FIELD(frameOptions);
WRITE_NODE_FIELD(startOffset);
WRITE_NODE_FIELD(endOffset);
WRITE_LOCATION_FIELD(location);
}
static void
_outRangeSubselect(StringInfo str, const RangeSubselect *node)
{
WRITE_NODE_TYPE("RANGESUBSELECT");
WRITE_BOOL_FIELD(lateral);
WRITE_NODE_FIELD(subquery);
WRITE_NODE_FIELD(alias);
}
static void
_outRangeFunction(StringInfo str, const RangeFunction *node)
{
WRITE_NODE_TYPE("RANGEFUNCTION");
WRITE_BOOL_FIELD(lateral);
WRITE_BOOL_FIELD(ordinality);
WRITE_BOOL_FIELD(is_rowsfrom);
WRITE_NODE_FIELD(functions);
WRITE_NODE_FIELD(alias);
WRITE_NODE_FIELD(coldeflist);
}
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
static void
_outRangeTableSample(StringInfo str, const RangeTableSample *node)
{
WRITE_NODE_TYPE("RANGETABLESAMPLE");
WRITE_NODE_FIELD(relation);
WRITE_NODE_FIELD(method);
WRITE_NODE_FIELD(args);
WRITE_NODE_FIELD(repeatable);
WRITE_LOCATION_FIELD(location);
}
1998-12-04 16:34:49 +01:00
static void
_outConstraint(StringInfo str, const Constraint *node)
1998-12-04 16:34:49 +01:00
{
WRITE_NODE_TYPE("CONSTRAINT");
WRITE_STRING_FIELD(conname);
WRITE_BOOL_FIELD(deferrable);
WRITE_BOOL_FIELD(initdeferred);
WRITE_LOCATION_FIELD(location);
1998-12-04 16:34:49 +01:00
appendStringInfoString(str, " :contype ");
1998-12-04 16:34:49 +01:00
switch (node->contype)
{
case CONSTR_NULL:
appendStringInfoString(str, "NULL");
break;
case CONSTR_NOTNULL:
appendStringInfoString(str, "NOT_NULL");
break;
case CONSTR_DEFAULT:
appendStringInfoString(str, "DEFAULT");
WRITE_NODE_FIELD(raw_expr);
WRITE_STRING_FIELD(cooked_expr);
break;
case CONSTR_CHECK:
appendStringInfoString(str, "CHECK");
WRITE_BOOL_FIELD(is_no_inherit);
WRITE_NODE_FIELD(raw_expr);
WRITE_STRING_FIELD(cooked_expr);
break;
1998-12-04 16:34:49 +01:00
case CONSTR_PRIMARY:
appendStringInfoString(str, "PRIMARY_KEY");
WRITE_NODE_FIELD(keys);
WRITE_NODE_FIELD(options);
WRITE_STRING_FIELD(indexname);
WRITE_STRING_FIELD(indexspace);
/* access_method and where_clause not currently used */
break;
case CONSTR_UNIQUE:
appendStringInfoString(str, "UNIQUE");
WRITE_NODE_FIELD(keys);
WRITE_NODE_FIELD(options);
WRITE_STRING_FIELD(indexname);
WRITE_STRING_FIELD(indexspace);
/* access_method and where_clause not currently used */
break;
case CONSTR_EXCLUSION:
appendStringInfoString(str, "EXCLUSION");
WRITE_NODE_FIELD(exclusions);
WRITE_NODE_FIELD(options);
WRITE_STRING_FIELD(indexname);
WRITE_STRING_FIELD(indexspace);
WRITE_STRING_FIELD(access_method);
WRITE_NODE_FIELD(where_clause);
1998-12-04 16:34:49 +01:00
break;
case CONSTR_FOREIGN:
appendStringInfoString(str, "FOREIGN_KEY");
WRITE_NODE_FIELD(pktable);
WRITE_NODE_FIELD(fk_attrs);
WRITE_NODE_FIELD(pk_attrs);
WRITE_CHAR_FIELD(fk_matchtype);
WRITE_CHAR_FIELD(fk_upd_action);
WRITE_CHAR_FIELD(fk_del_action);
WRITE_NODE_FIELD(old_conpfeqop);
WRITE_OID_FIELD(old_pktable_oid);
WRITE_BOOL_FIELD(skip_validation);
WRITE_BOOL_FIELD(initially_valid);
1998-12-04 16:34:49 +01:00
break;
case CONSTR_ATTR_DEFERRABLE:
appendStringInfoString(str, "ATTR_DEFERRABLE");
1998-12-04 16:34:49 +01:00
break;
case CONSTR_ATTR_NOT_DEFERRABLE:
appendStringInfoString(str, "ATTR_NOT_DEFERRABLE");
1998-12-04 16:34:49 +01:00
break;
case CONSTR_ATTR_DEFERRED:
appendStringInfoString(str, "ATTR_DEFERRED");
1998-12-04 16:34:49 +01:00
break;
case CONSTR_ATTR_IMMEDIATE:
appendStringInfoString(str, "ATTR_IMMEDIATE");
break;
default:
appendStringInfo(str, "<unrecognized_constraint %d>",
(int) node->contype);
break;
}
}
static void
_outForeignKeyCacheInfo(StringInfo str, const ForeignKeyCacheInfo *node)
{
int i;
WRITE_NODE_TYPE("FOREIGNKEYCACHEINFO");
WRITE_OID_FIELD(conrelid);
WRITE_OID_FIELD(confrelid);
WRITE_INT_FIELD(nkeys);
appendStringInfoString(str, " :conkey");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %d", node->conkey[i]);
appendStringInfoString(str, " :confkey");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %d", node->confkey[i]);
appendStringInfoString(str, " :conpfeqop");
for (i = 0; i < node->nkeys; i++)
appendStringInfo(str, " %u", node->conpfeqop[i]);
}
/*
* outNode -
* converts a Node into ascii string and append it to 'str'
*/
void
outNode(StringInfo str, const void *obj)
{
if (obj == NULL)
appendStringInfoString(str, "<>");
2004-08-29 07:07:03 +02:00
else if (IsA(obj, List) ||IsA(obj, IntList) || IsA(obj, OidList))
_outList(str, obj);
else if (IsA(obj, Integer) ||
IsA(obj, Float) ||
IsA(obj, String) ||
IsA(obj, BitString))
{
/* nodeRead does not want to see { } around these! */
_outValue(str, obj);
}
else
{
appendStringInfoChar(str, '{');
switch (nodeTag(obj))
{
case T_PlannedStmt:
_outPlannedStmt(str, obj);
break;
case T_Plan:
_outPlan(str, obj);
break;
case T_Result:
_outResult(str, obj);
break;
case T_ModifyTable:
_outModifyTable(str, obj);
break;
case T_Append:
_outAppend(str, obj);
break;
case T_MergeAppend:
_outMergeAppend(str, obj);
break;
case T_RecursiveUnion:
_outRecursiveUnion(str, obj);
break;
case T_BitmapAnd:
_outBitmapAnd(str, obj);
break;
case T_BitmapOr:
_outBitmapOr(str, obj);
break;
case T_Gather:
_outGather(str, obj);
break;
case T_Scan:
_outScan(str, obj);
break;
case T_SeqScan:
_outSeqScan(str, obj);
break;
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
case T_SampleScan:
_outSampleScan(str, obj);
break;
case T_IndexScan:
_outIndexScan(str, obj);
break;
case T_IndexOnlyScan:
_outIndexOnlyScan(str, obj);
break;
case T_BitmapIndexScan:
_outBitmapIndexScan(str, obj);
break;
case T_BitmapHeapScan:
_outBitmapHeapScan(str, obj);
break;
case T_TidScan:
_outTidScan(str, obj);
break;
case T_SubqueryScan:
_outSubqueryScan(str, obj);
break;
case T_FunctionScan:
_outFunctionScan(str, obj);
break;
case T_ValuesScan:
_outValuesScan(str, obj);
break;
case T_CteScan:
_outCteScan(str, obj);
break;
case T_WorkTableScan:
_outWorkTableScan(str, obj);
break;
case T_ForeignScan:
_outForeignScan(str, obj);
break;
case T_CustomScan:
_outCustomScan(str, obj);
break;
case T_Join:
_outJoin(str, obj);
break;
case T_NestLoop:
_outNestLoop(str, obj);
break;
case T_MergeJoin:
_outMergeJoin(str, obj);
break;
case T_HashJoin:
_outHashJoin(str, obj);
break;
case T_Agg:
_outAgg(str, obj);
break;
case T_WindowAgg:
_outWindowAgg(str, obj);
break;
case T_Group:
_outGroup(str, obj);
break;
case T_Material:
_outMaterial(str, obj);
break;
case T_Sort:
_outSort(str, obj);
break;
case T_Unique:
_outUnique(str, obj);
break;
case T_Hash:
_outHash(str, obj);
break;
case T_SetOp:
_outSetOp(str, obj);
break;
case T_LockRows:
_outLockRows(str, obj);
break;
case T_Limit:
_outLimit(str, obj);
break;
case T_NestLoopParam:
_outNestLoopParam(str, obj);
break;
case T_PlanRowMark:
_outPlanRowMark(str, obj);
break;
case T_PlanInvalItem:
_outPlanInvalItem(str, obj);
break;
case T_Alias:
_outAlias(str, obj);
break;
case T_RangeVar:
_outRangeVar(str, obj);
break;
case T_IntoClause:
_outIntoClause(str, obj);
break;
case T_Var:
_outVar(str, obj);
break;
case T_Const:
_outConst(str, obj);
break;
case T_Param:
_outParam(str, obj);
break;
1999-01-24 01:28:37 +01:00
case T_Aggref:
_outAggref(str, obj);
break;
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
case T_GroupingFunc:
_outGroupingFunc(str, obj);
break;
case T_WindowFunc:
_outWindowFunc(str, obj);
break;
case T_ArrayRef:
_outArrayRef(str, obj);
break;
case T_FuncExpr:
_outFuncExpr(str, obj);
break;
case T_NamedArgExpr:
_outNamedArgExpr(str, obj);
break;
case T_OpExpr:
_outOpExpr(str, obj);
break;
case T_DistinctExpr:
_outDistinctExpr(str, obj);
break;
case T_NullIfExpr:
_outNullIfExpr(str, obj);
break;
case T_ScalarArrayOpExpr:
_outScalarArrayOpExpr(str, obj);
break;
case T_BoolExpr:
_outBoolExpr(str, obj);
break;
case T_SubLink:
_outSubLink(str, obj);
break;
case T_SubPlan:
_outSubPlan(str, obj);
break;
case T_AlternativeSubPlan:
_outAlternativeSubPlan(str, obj);
break;
case T_FieldSelect:
_outFieldSelect(str, obj);
break;
case T_FieldStore:
_outFieldStore(str, obj);
break;
case T_RelabelType:
_outRelabelType(str, obj);
break;
case T_CoerceViaIO:
_outCoerceViaIO(str, obj);
break;
case T_ArrayCoerceExpr:
_outArrayCoerceExpr(str, obj);
break;
case T_ConvertRowtypeExpr:
_outConvertRowtypeExpr(str, obj);
break;
case T_CollateExpr:
_outCollateExpr(str, obj);
break;
case T_CaseExpr:
_outCaseExpr(str, obj);
break;
case T_CaseWhen:
_outCaseWhen(str, obj);
break;
case T_CaseTestExpr:
_outCaseTestExpr(str, obj);
break;
case T_ArrayExpr:
_outArrayExpr(str, obj);
break;
case T_RowExpr:
_outRowExpr(str, obj);
break;
case T_RowCompareExpr:
_outRowCompareExpr(str, obj);
break;
case T_CoalesceExpr:
_outCoalesceExpr(str, obj);
break;
case T_MinMaxExpr:
_outMinMaxExpr(str, obj);
break;
case T_SQLValueFunction:
_outSQLValueFunction(str, obj);
break;
case T_XmlExpr:
_outXmlExpr(str, obj);
break;
case T_NullTest:
_outNullTest(str, obj);
break;
case T_BooleanTest:
_outBooleanTest(str, obj);
break;
case T_CoerceToDomain:
_outCoerceToDomain(str, obj);
break;
case T_CoerceToDomainValue:
_outCoerceToDomainValue(str, obj);
break;
case T_SetToDefault:
_outSetToDefault(str, obj);
break;
case T_CurrentOfExpr:
_outCurrentOfExpr(str, obj);
break;
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
case T_InferenceElem:
_outInferenceElem(str, obj);
break;
case T_TargetEntry:
_outTargetEntry(str, obj);
break;
case T_RangeTblRef:
_outRangeTblRef(str, obj);
break;
case T_JoinExpr:
_outJoinExpr(str, obj);
break;
case T_FromExpr:
_outFromExpr(str, obj);
break;
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE. The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
case T_OnConflictExpr:
_outOnConflictExpr(str, obj);
break;
case T_Path:
_outPath(str, obj);
break;
case T_IndexPath:
_outIndexPath(str, obj);
break;
case T_BitmapHeapPath:
_outBitmapHeapPath(str, obj);
break;
case T_BitmapAndPath:
_outBitmapAndPath(str, obj);
break;
case T_BitmapOrPath:
_outBitmapOrPath(str, obj);
break;
case T_TidPath:
_outTidPath(str, obj);
break;
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
case T_SubqueryScanPath:
_outSubqueryScanPath(str, obj);
break;
case T_ForeignPath:
_outForeignPath(str, obj);
break;
case T_CustomPath:
_outCustomPath(str, obj);
break;
case T_AppendPath:
_outAppendPath(str, obj);
break;
case T_MergeAppendPath:
_outMergeAppendPath(str, obj);
break;
case T_ResultPath:
_outResultPath(str, obj);
break;
case T_MaterialPath:
_outMaterialPath(str, obj);
break;
case T_UniquePath:
_outUniquePath(str, obj);
break;
case T_GatherPath:
_outGatherPath(str, obj);
break;
Make the upper part of the planner work by generating and comparing Paths. I've been saying we needed to do this for more than five years, and here it finally is. This patch removes the ever-growing tangle of spaghetti logic that grouping_planner() used to use to try to identify the best plan for post-scan/join query steps. Now, there is (nearly) independent consideration of each execution step, and entirely separate construction of Paths to represent each of the possible ways to do that step. We choose the best Path or set of Paths using the same add_path() logic that's been used inside query_planner() for years. In addition, this patch removes the old restriction that subquery_planner() could return only a single Plan. It now returns a RelOptInfo containing a set of Paths, just as query_planner() does, and the parent query level can use each of those Paths as the basis of a SubqueryScanPath at its level. This allows finding some optimizations that we missed before, wherein a subquery was capable of returning presorted data and thereby avoiding a sort in the parent level, making the overall cost cheaper even though delivering sorted output was not the cheapest plan for the subquery in isolation. (A couple of regression test outputs change in consequence of that. However, there is very little change in visible planner behavior overall, because the point of this patch is not to get immediate planning benefits but to create the infrastructure for future improvements.) There is a great deal left to do here. This patch unblocks a lot of planner work that was basically impractical in the old code structure, such as allowing FDWs to implement remote aggregation, or rewriting plan_set_operations() to allow consideration of multiple implementation orders for set operations. (The latter will likely require a full rewrite of plan_set_operations(); what I've done here is only to fix it to return Paths not Plans.) I have also left unfinished some localized refactoring in createplan.c and planner.c, because it was not necessary to get this patch to a working state. Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 21:58:22 +01:00
case T_ProjectionPath:
_outProjectionPath(str, obj);
break;
case T_SortPath:
_outSortPath(str, obj);
break;
case T_GroupPath:
_outGroupPath(str, obj);
break;
case T_UpperUniquePath:
_outUpperUniquePath(str, obj);
break;
case T_AggPath:
_outAggPath(str, obj);
break;
case T_GroupingSetsPath:
_outGroupingSetsPath(str, obj);
break;
case T_MinMaxAggPath:
_outMinMaxAggPath(str, obj);
break;
case T_WindowAggPath:
_outWindowAggPath(str, obj);
break;
case T_SetOpPath:
_outSetOpPath(str, obj);
break;
case T_RecursiveUnionPath:
_outRecursiveUnionPath(str, obj);
break;
case T_LockRowsPath:
_outLockRowsPath(str, obj);
break;
case T_ModifyTablePath:
_outModifyTablePath(str, obj);
break;
case T_LimitPath:
_outLimitPath(str, obj);
break;
1999-02-12 07:43:53 +01:00
case T_NestPath:
_outNestPath(str, obj);
break;
case T_MergePath:
_outMergePath(str, obj);
break;
case T_HashPath:
_outHashPath(str, obj);
break;
case T_PlannerGlobal:
_outPlannerGlobal(str, obj);
break;
case T_PlannerInfo:
_outPlannerInfo(str, obj);
break;
case T_RelOptInfo:
_outRelOptInfo(str, obj);
break;
case T_IndexOptInfo:
_outIndexOptInfo(str, obj);
break;
case T_ForeignKeyOptInfo:
_outForeignKeyOptInfo(str, obj);
break;
case T_EquivalenceClass:
_outEquivalenceClass(str, obj);
break;
case T_EquivalenceMember:
_outEquivalenceMember(str, obj);
break;
case T_PathKey:
_outPathKey(str, obj);
break;
case T_PathTarget:
_outPathTarget(str, obj);
break;
Revise parameterized-path mechanism to fix assorted issues. This patch adjusts the treatment of parameterized paths so that all paths with the same parameterization (same set of required outer rels) for the same relation will have the same rowcount estimate. We cache the rowcount estimates to ensure that property, and hopefully save a few cycles too. Doing this makes it practical for add_path_precheck to operate without a rowcount estimate: it need only assume that paths with different parameterizations never dominate each other, which is close enough to true anyway for coarse filtering, because normally a more-parameterized path should yield fewer rows thanks to having more join clauses to apply. In add_path, we do the full nine yards of comparing rowcount estimates along with everything else, so that we can discard parameterized paths that don't actually have an advantage. This fixes some issues I'd found with add_path rejecting parameterized paths on the grounds that they were more expensive than not-parameterized ones, even though they yielded many fewer rows and hence would be cheaper once subsequent joining was considered. To make the same-rowcounts assumption valid, we have to require that any parameterized path enforce *all* join clauses that could be obtained from the particular set of outer rels, even if not all of them are useful for indexing. This is required at both base scans and joins. It's a good thing anyway since the net impact is that join quals are checked at the lowest practical level in the join tree. Hence, discard the original rather ad-hoc mechanism for choosing parameterization joinquals, and build a better one that has a more principled rule for when clauses can be moved. The original rule was actually buggy anyway for lack of knowledge about which relations are part of an outer join's outer side; getting this right requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
case T_ParamPathInfo:
_outParamPathInfo(str, obj);
break;
case T_RestrictInfo:
_outRestrictInfo(str, obj);
break;
case T_PlaceHolderVar:
_outPlaceHolderVar(str, obj);
break;
case T_SpecialJoinInfo:
_outSpecialJoinInfo(str, obj);
break;
case T_AppendRelInfo:
_outAppendRelInfo(str, obj);
break;
case T_PlaceHolderInfo:
_outPlaceHolderInfo(str, obj);
break;
case T_MinMaxAggInfo:
_outMinMaxAggInfo(str, obj);
break;
case T_PlannerParamItem:
_outPlannerParamItem(str, obj);
break;
case T_ExtensibleNode:
_outExtensibleNode(str, obj);
break;
case T_CreateStmt:
_outCreateStmt(str, obj);
break;
case T_CreateForeignTableStmt:
_outCreateForeignTableStmt(str, obj);
break;
case T_ImportForeignSchemaStmt:
_outImportForeignSchemaStmt(str, obj);
break;
case T_IndexStmt:
_outIndexStmt(str, obj);
break;
case T_NotifyStmt:
_outNotifyStmt(str, obj);
break;
case T_DeclareCursorStmt:
_outDeclareCursorStmt(str, obj);
break;
case T_SelectStmt:
_outSelectStmt(str, obj);
break;
case T_ColumnDef:
_outColumnDef(str, obj);
break;
case T_TypeName:
_outTypeName(str, obj);
break;
case T_TypeCast:
_outTypeCast(str, obj);
break;
case T_CollateClause:
_outCollateClause(str, obj);
break;
case T_IndexElem:
_outIndexElem(str, obj);
break;
case T_Query:
_outQuery(str, obj);
break;
case T_WithCheckOption:
_outWithCheckOption(str, obj);
break;
case T_SortGroupClause:
_outSortGroupClause(str, obj);
break;
Support GROUPING SETS, CUBE and ROLLUP. This SQL standard functionality allows to aggregate data by different GROUP BY clauses at once. Each grouping set returns rows with columns grouped by in other sets set to NULL. This could previously be achieved by doing each grouping as a separate query, conjoined by UNION ALLs. Besides being considerably more concise, grouping sets will in many cases be faster, requiring only one scan over the underlying data. The current implementation of grouping sets only supports using sorting for input. Individual sets that share a sort order are computed in one pass. If there are sets that don't share a sort order, additional sort & aggregation steps are performed. These additional passes are sourced by the previous sort step; thus avoiding repeated scans of the source data. The code is structured in a way that adding support for purely using hash aggregation or a mix of hashing and sorting is possible. Sorting was chosen to be supported first, as it is the most generic method of implementation. Instead of, as in an earlier versions of the patch, representing the chain of sort and aggregation steps as full blown planner and executor nodes, all but the first sort are performed inside the aggregation node itself. This avoids the need to do some unusual gymnastics to handle having to return aggregated and non-aggregated tuples from underlying nodes, as well as having to shut down underlying nodes early to limit memory usage. The optimizer still builds Sort/Agg node to describe each phase, but they're not part of the plan tree, but instead additional data for the aggregation node. They're a convenient and preexisting way to describe aggregation and sorting. The first (and possibly only) sort step is still performed as a separate execution step. That retains similarity with existing group by plans, makes rescans fairly simple, avoids very deep plans (leading to slow explains) and easily allows to avoid the sorting step if the underlying data is sorted by other means. A somewhat ugly side of this patch is having to deal with a grammar ambiguity between the new CUBE keyword and the cube extension/functions named cube (and rollup). To avoid breaking existing deployments of the cube extension it has not been renamed, neither has cube been made a reserved keyword. Instead precedence hacking is used to make GROUP BY cube(..) refer to the CUBE grouping sets feature, and not the function cube(). To actually group by a function cube(), unlikely as that might be, the function name has to be quoted. Needs a catversion bump because stored rules may change. Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
case T_GroupingSet:
_outGroupingSet(str, obj);
break;
case T_WindowClause:
_outWindowClause(str, obj);
break;
case T_RowMarkClause:
_outRowMarkClause(str, obj);
break;
case T_WithClause:
_outWithClause(str, obj);
break;
case T_CommonTableExpr:
_outCommonTableExpr(str, obj);
break;
case T_SetOperationStmt:
_outSetOperationStmt(str, obj);
break;
case T_RangeTblEntry:
_outRangeTblEntry(str, obj);
break;
case T_RangeTblFunction:
_outRangeTblFunction(str, obj);
break;
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
case T_TableSampleClause:
_outTableSampleClause(str, obj);
break;
case T_A_Expr:
_outAExpr(str, obj);
break;
case T_ColumnRef:
_outColumnRef(str, obj);
break;
case T_ParamRef:
_outParamRef(str, obj);
break;
case T_A_Const:
_outAConst(str, obj);
break;
case T_A_Star:
_outA_Star(str, obj);
break;
case T_A_Indices:
_outA_Indices(str, obj);
break;
case T_A_Indirection:
_outA_Indirection(str, obj);
break;
case T_A_ArrayExpr:
_outA_ArrayExpr(str, obj);
break;
case T_ResTarget:
_outResTarget(str, obj);
break;
case T_MultiAssignRef:
_outMultiAssignRef(str, obj);
break;
case T_SortBy:
_outSortBy(str, obj);
break;
case T_WindowDef:
_outWindowDef(str, obj);
break;
case T_RangeSubselect:
_outRangeSubselect(str, obj);
break;
case T_RangeFunction:
_outRangeFunction(str, obj);
break;
Redesign tablesample method API, and do extensive code review. The original implementation of TABLESAMPLE modeled the tablesample method API on index access methods, which wasn't a good choice because, without specialized DDL commands, there's no way to build an extension that can implement a TSM. (Raw inserts into system catalogs are not an acceptable thing to do, because we can't undo them during DROP EXTENSION, nor will pg_upgrade behave sanely.) Instead adopt an API more like procedural language handlers or foreign data wrappers, wherein the only SQL-level support object needed is a single handler function identified by having a special return type. This lets us get rid of the supporting catalog altogether, so that no custom DDL support is needed for the feature. Adjust the API so that it can support non-constant tablesample arguments (the original coding assumed we could evaluate the argument expressions at ExecInitSampleScan time, which is undesirable even if it weren't outright unsafe), and discourage sampling methods from looking at invisible tuples. Make sure that the BERNOULLI and SYSTEM methods are genuinely repeatable within and across queries, as required by the SQL standard, and deal more honestly with methods that can't support that requirement. Make a full code-review pass over the tablesample additions, and fix assorted bugs, omissions, infelicities, and cosmetic issues (such as failure to put the added code stanzas in a consistent ordering). Improve EXPLAIN's output of tablesample plans, too. Back-patch to 9.5 so that we don't have to support the original API in production.
2015-07-25 20:39:00 +02:00
case T_RangeTableSample:
_outRangeTableSample(str, obj);
break;
1998-12-04 16:34:49 +01:00
case T_Constraint:
_outConstraint(str, obj);
break;
case T_FuncCall:
_outFuncCall(str, obj);
break;
case T_DefElem:
_outDefElem(str, obj);
break;
case T_TableLikeClause:
_outTableLikeClause(str, obj);
break;
case T_LockingClause:
_outLockingClause(str, obj);
break;
case T_XmlSerialize:
_outXmlSerialize(str, obj);
break;
case T_ForeignKeyCacheInfo:
_outForeignKeyCacheInfo(str, obj);
break;
default:
2003-08-04 02:43:34 +02:00
/*
2005-10-15 04:49:52 +02:00
* This should be an ERROR, but it's too useful to be able to
* dump structures that outNode only understands part of.
*/
elog(WARNING, "could not dump unrecognized node type: %d",
(int) nodeTag(obj));
break;
}
appendStringInfoChar(str, '}');
}
}
/*
* nodeToString -
* returns the ascii representation of the Node as a palloc'd string
*/
char *
nodeToString(const void *obj)
{
1999-05-25 18:15:34 +02:00
StringInfoData str;
/* see stringinfo.h for an explanation of this maneuver */
initStringInfo(&str);
outNode(&str, obj);
return str.data;
}