Avoid recursion when processing simple lists of AND'ed or OR'ed clauses.

Since most of the system thinks AND and OR are N-argument expressions
anyway, let's have the grammar generate a representation of that form when
dealing with input like "x AND y AND z AND ...", rather than generating
a deeply-nested binary tree that just has to be flattened later by the
planner.  This avoids stack overflow in parse analysis when dealing with
queries having more than a few thousand such clauses; and in any case it
removes some rather unsightly inconsistencies, since some parts of parse
analysis were generating N-argument ANDs/ORs already.

It's still possible to get a stack overflow with weirdly parenthesized
input, such as "x AND (y AND (z AND ( ... )))", but such cases are not
mainstream usage.  The maximum depth of parenthesization is already
limited by Bison's stack in such cases, anyway, so that the limit is
probably fairly platform-independent.

Patch originally by Gurjeet Singh, heavily revised by me
This commit is contained in:
Tom Lane 2014-06-16 15:55:05 -04:00
parent ac608fe758
commit 2146f13408
12 changed files with 155 additions and 141 deletions

View File

@ -1716,9 +1716,6 @@ deparseRelabelType(RelabelType *node, deparse_expr_cxt *context)
/*
* Deparse a BoolExpr node.
*
* Note: by the time we get here, AND and OR expressions have been flattened
* into N-argument form, so we'd better be prepared to deal with that.
*/
static void
deparseBoolExpr(BoolExpr *node, deparse_expr_cxt *context)

View File

@ -3047,6 +3047,14 @@ raw_expression_tree_walker(Node *node,
/* operator name is deemed uninteresting */
}
break;
case T_BoolExpr:
{
BoolExpr *expr = (BoolExpr *) node;
if (walker(expr->args, context))
return true;
}
break;
case T_ColumnRef:
/* we assume the fields contain nothing interesting */
break;

View File

@ -2437,15 +2437,6 @@ _outAExpr(StringInfo str, const A_Expr *node)
appendStringInfoChar(str, ' ');
WRITE_NODE_FIELD(name);
break;
case AEXPR_AND:
appendStringInfoString(str, " AND");
break;
case AEXPR_OR:
appendStringInfoString(str, " OR");
break;
case AEXPR_NOT:
appendStringInfoString(str, " NOT");
break;
case AEXPR_OP_ANY:
appendStringInfoChar(str, ' ');
WRITE_NODE_FIELD(name);

View File

@ -133,9 +133,9 @@ static Node *find_jointree_node_for_rel(Node *jtnode, int relid);
* transformations if any are found.
*
* This routine has to run before preprocess_expression(), so the quals
* clauses are not yet reduced to implicit-AND format. That means we need
* to recursively search through explicit AND clauses, which are
* probably only binary ANDs. We stop as soon as we hit a non-AND item.
* clauses are not yet reduced to implicit-AND format, and are not guaranteed
* to be AND/OR-flat either. That means we need to recursively search through
* explicit AND clauses. We stop as soon as we hit a non-AND item.
*/
void
pull_up_sublinks(PlannerInfo *root)

View File

@ -4,13 +4,12 @@
* Routines for preprocessing qualification expressions
*
*
* The parser regards AND and OR as purely binary operators, so a qual like
* (A = 1) OR (A = 2) OR (A = 3) ...
* will produce a nested parsetree
* (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
* In reality, the optimizer and executor regard AND and OR as N-argument
* operators, so this tree can be flattened to
* (OR (A = 1) (A = 2) (A = 3) ...)
* While the parser will produce flattened (N-argument) AND/OR trees from
* simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
* directly underneath another AND, or OR underneath OR, if the input was
* oddly parenthesized. Also, rule expansion and subquery flattening could
* produce such parsetrees. The planner wants to flatten all such cases
* to ensure consistent optimization behavior.
*
* Formerly, this module was responsible for doing the initial flattening,
* but now we leave it to eval_const_expressions to do that since it has to

View File

@ -3447,12 +3447,15 @@ simplify_or_arguments(List *args,
List *unprocessed_args;
/*
* Since the parser considers OR to be a binary operator, long OR lists
* become deeply nested expressions. We must flatten these into long
* argument lists of a single OR operator. To avoid blowing out the stack
* with recursion of eval_const_expressions, we resort to some tenseness
* here: we keep a list of not-yet-processed inputs, and handle flattening
* of nested ORs by prepending to the to-do list instead of recursing.
* We want to ensure that any OR immediately beneath another OR gets
* flattened into a single OR-list, so as to simplify later reasoning.
*
* To avoid stack overflow from recursion of eval_const_expressions, we
* resort to some tenseness here: we keep a list of not-yet-processed
* inputs, and handle flattening of nested ORs by prepending to the to-do
* list instead of recursing. Now that the parser generates N-argument
* ORs from simple lists, this complexity is probably less necessary than
* it once was, but we might as well keep the logic.
*/
unprocessed_args = list_copy(args);
while (unprocessed_args)

View File

@ -151,6 +151,9 @@ static void insertSelectOptions(SelectStmt *stmt,
static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg);
static Node *doNegate(Node *n, int location);
static void doNegateFloat(Value *v);
static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location);
static Node *makeOrExpr(Node *lexpr, Node *rexpr, int location);
static Node *makeNotExpr(Node *expr, int location);
static Node *makeAArrayExpr(List *elements, int location);
static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args,
List *args, int location);
@ -10849,11 +10852,11 @@ a_expr: c_expr { $$ = $1; }
{ $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, NULL, @2); }
| a_expr AND a_expr
{ $$ = (Node *) makeA_Expr(AEXPR_AND, NIL, $1, $3, @2); }
{ $$ = makeAndExpr($1, $3, @2); }
| a_expr OR a_expr
{ $$ = (Node *) makeA_Expr(AEXPR_OR, NIL, $1, $3, @2); }
{ $$ = makeOrExpr($1, $3, @2); }
| NOT a_expr
{ $$ = (Node *) makeA_Expr(AEXPR_NOT, NIL, NULL, $2, @1); }
{ $$ = makeNotExpr($2, @1); }
| a_expr LIKE a_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "~~", $1, $3, @2); }
@ -11022,11 +11025,9 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr IS NOT DISTINCT FROM a_expr %prec IS
{
$$ = (Node *) makeA_Expr(AEXPR_NOT, NIL, NULL,
(Node *) makeSimpleA_Expr(AEXPR_DISTINCT,
"=", $1, $6, @2),
@2);
$$ = makeNotExpr((Node *) makeSimpleA_Expr(AEXPR_DISTINCT,
"=", $1, $6, @2),
@2);
}
| a_expr IS OF '(' type_list ')' %prec IS
{
@ -11044,43 +11045,43 @@ a_expr: c_expr { $$ = $1; }
*/
| a_expr BETWEEN opt_asymmetric b_expr AND b_expr %prec BETWEEN
{
$$ = (Node *) makeA_Expr(AEXPR_AND, NIL,
$$ = makeAndExpr(
(Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $4, @2),
(Node *) makeSimpleA_Expr(AEXPR_OP, "<=", $1, $6, @2),
@2);
@2);
}
| a_expr NOT BETWEEN opt_asymmetric b_expr AND b_expr %prec BETWEEN
{
$$ = (Node *) makeA_Expr(AEXPR_OR, NIL,
$$ = makeOrExpr(
(Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $5, @2),
(Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $7, @2),
@2);
@2);
}
| a_expr BETWEEN SYMMETRIC b_expr AND b_expr %prec BETWEEN
{
$$ = (Node *) makeA_Expr(AEXPR_OR, NIL,
(Node *) makeA_Expr(AEXPR_AND, NIL,
$$ = makeOrExpr(
makeAndExpr(
(Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $4, @2),
(Node *) makeSimpleA_Expr(AEXPR_OP, "<=", $1, $6, @2),
@2),
(Node *) makeA_Expr(AEXPR_AND, NIL,
@2),
makeAndExpr(
(Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $6, @2),
(Node *) makeSimpleA_Expr(AEXPR_OP, "<=", $1, $4, @2),
@2),
@2);
@2),
@2);
}
| a_expr NOT BETWEEN SYMMETRIC b_expr AND b_expr %prec BETWEEN
{
$$ = (Node *) makeA_Expr(AEXPR_AND, NIL,
(Node *) makeA_Expr(AEXPR_OR, NIL,
$$ = makeAndExpr(
makeOrExpr(
(Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $5, @2),
(Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $7, @2),
@2),
(Node *) makeA_Expr(AEXPR_OR, NIL,
@2),
makeOrExpr(
(Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $7, @2),
(Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $5, @2),
@2),
@2);
@2),
@2);
}
| a_expr IN_P in_expr
{
@ -11114,7 +11115,7 @@ a_expr: c_expr { $$ = $1; }
n->operName = list_make1(makeString("="));
n->location = @3;
/* Stick a NOT on top */
$$ = (Node *) makeA_Expr(AEXPR_NOT, NIL, NULL, (Node *) n, @2);
$$ = makeNotExpr((Node *) n, @2);
}
else
{
@ -11162,10 +11163,9 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr IS NOT DOCUMENT_P %prec IS
{
$$ = (Node *) makeA_Expr(AEXPR_NOT, NIL, NULL,
makeXmlExpr(IS_DOCUMENT, NULL, NIL,
list_make1($1), @2),
@2);
$$ = makeNotExpr(makeXmlExpr(IS_DOCUMENT, NULL, NIL,
list_make1($1), @2),
@2);
}
;
@ -11216,8 +11216,9 @@ b_expr: c_expr
}
| b_expr IS NOT DISTINCT FROM b_expr %prec IS
{
$$ = (Node *) makeA_Expr(AEXPR_NOT, NIL,
NULL, (Node *) makeSimpleA_Expr(AEXPR_DISTINCT, "=", $1, $6, @2), @2);
$$ = makeNotExpr((Node *) makeSimpleA_Expr(AEXPR_DISTINCT,
"=", $1, $6, @2),
@2);
}
| b_expr IS OF '(' type_list ')' %prec IS
{
@ -11234,10 +11235,9 @@ b_expr: c_expr
}
| b_expr IS NOT DOCUMENT_P %prec IS
{
$$ = (Node *) makeA_Expr(AEXPR_NOT, NIL, NULL,
makeXmlExpr(IS_DOCUMENT, NULL, NIL,
list_make1($1), @2),
@2);
$$ = makeNotExpr(makeXmlExpr(IS_DOCUMENT, NULL, NIL,
list_make1($1), @2),
@2);
}
;
@ -13692,6 +13692,46 @@ doNegateFloat(Value *v)
v->val.str = psprintf("-%s", oldval);
}
static Node *
makeAndExpr(Node *lexpr, Node *rexpr, int location)
{
/* Flatten "a AND b AND c ..." to a single BoolExpr on sight */
if (IsA(lexpr, BoolExpr))
{
BoolExpr *blexpr = (BoolExpr *) lexpr;
if (blexpr->boolop == AND_EXPR)
{
blexpr->args = lappend(blexpr->args, rexpr);
return (Node *) blexpr;
}
}
return (Node *) makeBoolExpr(AND_EXPR, list_make2(lexpr, rexpr), location);
}
static Node *
makeOrExpr(Node *lexpr, Node *rexpr, int location)
{
/* Flatten "a OR b OR c ..." to a single BoolExpr on sight */
if (IsA(lexpr, BoolExpr))
{
BoolExpr *blexpr = (BoolExpr *) lexpr;
if (blexpr->boolop == OR_EXPR)
{
blexpr->args = lappend(blexpr->args, rexpr);
return (Node *) blexpr;
}
}
return (Node *) makeBoolExpr(OR_EXPR, list_make2(lexpr, rexpr), location);
}
static Node *
makeNotExpr(Node *expr, int location)
{
return (Node *) makeBoolExpr(NOT_EXPR, list_make1(expr), location);
}
static Node *
makeAArrayExpr(List *elements, int location)
{

View File

@ -332,7 +332,8 @@ transformJoinUsingClause(ParseState *pstate,
RangeTblEntry *leftRTE, RangeTblEntry *rightRTE,
List *leftVars, List *rightVars)
{
Node *result = NULL;
Node *result;
List *andargs = NIL;
ListCell *lvars,
*rvars;
@ -358,18 +359,16 @@ transformJoinUsingClause(ParseState *pstate,
copyObject(lvar), copyObject(rvar),
-1);
/* And combine into an AND clause, if multiple join columns */
if (result == NULL)
result = (Node *) e;
else
{
A_Expr *a;
a = makeA_Expr(AEXPR_AND, NIL, result, (Node *) e, -1);
result = (Node *) a;
}
/* Prepare to combine into an AND clause, if multiple join columns */
andargs = lappend(andargs, e);
}
/* Only need an AND if there's more than one join column */
if (list_length(andargs) == 1)
result = (Node *) linitial(andargs);
else
result = (Node *) makeBoolExpr(AND_EXPR, andargs, -1);
/*
* Since the references are already Vars, and are certainly from the input
* relations, we don't have to go through the same pushups that

View File

@ -41,15 +41,13 @@ bool Transform_null_equals = false;
static Node *transformExprRecurse(ParseState *pstate, Node *expr);
static Node *transformParamRef(ParseState *pstate, ParamRef *pref);
static Node *transformAExprOp(ParseState *pstate, A_Expr *a);
static Node *transformAExprAnd(ParseState *pstate, A_Expr *a);
static Node *transformAExprOr(ParseState *pstate, A_Expr *a);
static Node *transformAExprNot(ParseState *pstate, A_Expr *a);
static Node *transformAExprOpAny(ParseState *pstate, A_Expr *a);
static Node *transformAExprOpAll(ParseState *pstate, A_Expr *a);
static Node *transformAExprDistinct(ParseState *pstate, A_Expr *a);
static Node *transformAExprNullIf(ParseState *pstate, A_Expr *a);
static Node *transformAExprOf(ParseState *pstate, A_Expr *a);
static Node *transformAExprIn(ParseState *pstate, A_Expr *a);
static Node *transformBoolExpr(ParseState *pstate, BoolExpr *a);
static Node *transformFuncCall(ParseState *pstate, FuncCall *fn);
static Node *transformCaseExpr(ParseState *pstate, CaseExpr *c);
static Node *transformSubLink(ParseState *pstate, SubLink *sublink);
@ -223,15 +221,6 @@ transformExprRecurse(ParseState *pstate, Node *expr)
case AEXPR_OP:
result = transformAExprOp(pstate, a);
break;
case AEXPR_AND:
result = transformAExprAnd(pstate, a);
break;
case AEXPR_OR:
result = transformAExprOr(pstate, a);
break;
case AEXPR_NOT:
result = transformAExprNot(pstate, a);
break;
case AEXPR_OP_ANY:
result = transformAExprOpAny(pstate, a);
break;
@ -258,6 +247,10 @@ transformExprRecurse(ParseState *pstate, Node *expr)
break;
}
case T_BoolExpr:
result = transformBoolExpr(pstate, (BoolExpr *) expr);
break;
case T_FuncCall:
result = transformFuncCall(pstate, (FuncCall *) expr);
break;
@ -337,7 +330,6 @@ transformExprRecurse(ParseState *pstate, Node *expr)
case T_DistinctExpr:
case T_NullIfExpr:
case T_ScalarArrayOpExpr:
case T_BoolExpr:
case T_FieldSelect:
case T_FieldStore:
case T_RelabelType:
@ -918,46 +910,6 @@ transformAExprOp(ParseState *pstate, A_Expr *a)
return result;
}
static Node *
transformAExprAnd(ParseState *pstate, A_Expr *a)
{
Node *lexpr = transformExprRecurse(pstate, a->lexpr);
Node *rexpr = transformExprRecurse(pstate, a->rexpr);
lexpr = coerce_to_boolean(pstate, lexpr, "AND");
rexpr = coerce_to_boolean(pstate, rexpr, "AND");
return (Node *) makeBoolExpr(AND_EXPR,
list_make2(lexpr, rexpr),
a->location);
}
static Node *
transformAExprOr(ParseState *pstate, A_Expr *a)
{
Node *lexpr = transformExprRecurse(pstate, a->lexpr);
Node *rexpr = transformExprRecurse(pstate, a->rexpr);
lexpr = coerce_to_boolean(pstate, lexpr, "OR");
rexpr = coerce_to_boolean(pstate, rexpr, "OR");
return (Node *) makeBoolExpr(OR_EXPR,
list_make2(lexpr, rexpr),
a->location);
}
static Node *
transformAExprNot(ParseState *pstate, A_Expr *a)
{
Node *rexpr = transformExprRecurse(pstate, a->rexpr);
rexpr = coerce_to_boolean(pstate, rexpr, "NOT");
return (Node *) makeBoolExpr(NOT_EXPR,
list_make1(rexpr),
a->location);
}
static Node *
transformAExprOpAny(ParseState *pstate, A_Expr *a)
{
@ -1237,6 +1189,42 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
return result;
}
static Node *
transformBoolExpr(ParseState *pstate, BoolExpr *a)
{
List *args = NIL;
const char *opname;
ListCell *lc;
switch (a->boolop)
{
case AND_EXPR:
opname = "AND";
break;
case OR_EXPR:
opname = "OR";
break;
case NOT_EXPR:
opname = "NOT";
break;
default:
elog(ERROR, "unrecognized boolop: %d", (int) a->boolop);
opname = NULL; /* keep compiler quiet */
break;
}
foreach(lc, a->args)
{
Node *arg = (Node *) lfirst(lc);
arg = transformExprRecurse(pstate, arg);
arg = coerce_to_boolean(pstate, arg, opname);
args = lappend(args, arg);
}
return (Node *) makeBoolExpr(a->boolop, args, a->location);
}
static Node *
transformFuncCall(ParseState *pstate, FuncCall *fn)
{
@ -2428,10 +2416,6 @@ make_row_comparison_op(ParseState *pstate, List *opname,
/*
* For = and <> cases, we just combine the pairwise operators with AND or
* OR respectively.
*
* Note: this is presently the only place where the parser generates
* BoolExpr with more than two arguments. Should be OK since the rest of
* the system thinks BoolExpr is N-argument anyway.
*/
if (rctype == ROWCOMPARE_EQ)
return (Node *) makeBoolExpr(AND_EXPR, opexprs, location);

View File

@ -225,9 +225,6 @@ typedef struct ParamRef
typedef enum A_Expr_Kind
{
AEXPR_OP, /* normal operator */
AEXPR_AND, /* booleans - name field is unused */
AEXPR_OR,
AEXPR_NOT,
AEXPR_OP_ANY, /* scalar op ANY (array) */
AEXPR_OP_ALL, /* scalar op ALL (array) */
AEXPR_DISTINCT, /* IS DISTINCT FROM - name must be "=" */

View File

@ -458,12 +458,8 @@ typedef struct ScalarArrayOpExpr
* BoolExpr - expression node for the basic Boolean operators AND, OR, NOT
*
* Notice the arguments are given as a List. For NOT, of course the list
* must always have exactly one element. For AND and OR, the executor can
* handle any number of arguments. The parser generally treats AND and OR
* as binary and so it typically only produces two-element lists, but the
* optimizer will flatten trees of AND and OR nodes to produce longer lists
* when possible. There are also a few special cases where more arguments
* can appear before optimization.
* must always have exactly one element. For AND and OR, there can be two
* or more arguments.
*/
typedef enum BoolExprType
{

View File

@ -2117,7 +2117,7 @@ shoe_ready| SELECT rsh.shoename,
int4smaller(rsh.sh_avail, rsl.sl_avail) AS total_avail
FROM shoe rsh,
shoelace rsl
WHERE (((rsl.sl_color = rsh.slcolor) AND (rsl.sl_len_cm >= rsh.slminlen_cm)) AND (rsl.sl_len_cm <= rsh.slmaxlen_cm));
WHERE ((rsl.sl_color = rsh.slcolor) AND (rsl.sl_len_cm >= rsh.slminlen_cm) AND (rsl.sl_len_cm <= rsh.slmaxlen_cm));
shoelace| SELECT s.sl_name,
s.sl_avail,
s.sl_color,