/*------------------------------------------------------------------------- * * prepqual.c * Routines for preprocessing qualification expressions * * * While the parser will produce flattened (N-argument) AND/OR trees from * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause * directly underneath another AND, or OR underneath OR, if the input was * oddly parenthesized. Also, rule expansion and subquery flattening could * produce such parsetrees. The planner wants to flatten all such cases * to ensure consistent optimization behavior. * * Formerly, this module was responsible for doing the initial flattening, * but now we leave it to eval_const_expressions to do that since it has to * make a complete pass over the expression tree anyway. Instead, we just * have to ensure that our manipulations preserve AND/OR flatness. * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR * tree after local transformations that might introduce nested AND/ORs. * * * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/optimizer/prep/prepqual.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" #include "optimizer/prep.h" #include "utils/lsyscache.h" static List *pull_ands(List *andlist); static List *pull_ors(List *orlist); static Expr *find_duplicate_ors(Expr *qual, bool is_check); static Expr *process_duplicate_ors(List *orlist); /* * negate_clause * Negate a Boolean expression. * * Input is a clause to be negated (e.g., the argument of a NOT clause). * Returns a new clause equivalent to the negation of the given clause. * * Although this can be invoked on its own, it's mainly intended as a helper * for eval_const_expressions(), and that context drives several design * decisions. In particular, if the input is already AND/OR flat, we must * preserve that property. We also don't bother to recurse in situations * where we can assume that lower-level executions of eval_const_expressions * would already have simplified sub-clauses of the input. * * The difference between this and a simple make_notclause() is that this * tries to get rid of the NOT node by logical simplification. It's clearly * always a win if the NOT node can be eliminated altogether. However, our * use of DeMorgan's laws could result in having more NOT nodes rather than * fewer. We do that unconditionally anyway, because in WHERE clauses it's * important to expose as much top-level AND/OR structure as possible. * Also, eliminating an intermediate NOT may allow us to flatten two levels * of AND or OR together that we couldn't have otherwise. Finally, one of * the motivations for doing this is to ensure that logically equivalent * expressions will be seen as physically equal(), so we should always apply * the same transformations. */ Node * negate_clause(Node *node) { if (node == NULL) /* should not happen */ elog(ERROR, "can't negate an empty subexpression"); switch (nodeTag(node)) { case T_Const: { Const *c = (Const *) node; /* NOT NULL is still NULL */ if (c->constisnull) return makeBoolConst(false, true); /* otherwise pretty easy */ return makeBoolConst(!DatumGetBool(c->constvalue), false); } break; case T_OpExpr: { /* * Negate operator if possible: (NOT (< A B)) => (>= A B) */ OpExpr *opexpr = (OpExpr *) node; Oid negator = get_negator(opexpr->opno); if (negator) { OpExpr *newopexpr = makeNode(OpExpr); newopexpr->opno = negator; newopexpr->opfuncid = InvalidOid; newopexpr->opresulttype = opexpr->opresulttype; newopexpr->opretset = opexpr->opretset; newopexpr->opcollid = opexpr->opcollid; newopexpr->inputcollid = opexpr->inputcollid; newopexpr->args = opexpr->args; newopexpr->location = opexpr->location; return (Node *) newopexpr; } } break; case T_ScalarArrayOpExpr: { /* * Negate a ScalarArrayOpExpr if its operator has a negator; * for example x = ANY (list) becomes x <> ALL (list) */ ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node; Oid negator = get_negator(saopexpr->opno); if (negator) { ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr); newopexpr->opno = negator; newopexpr->opfuncid = InvalidOid; newopexpr->hashfuncid = InvalidOid; newopexpr->useOr = !saopexpr->useOr; newopexpr->inputcollid = saopexpr->inputcollid; newopexpr->args = saopexpr->args; newopexpr->location = saopexpr->location; return (Node *) newopexpr; } } break; case T_BoolExpr: { BoolExpr *expr = (BoolExpr *) node; switch (expr->boolop) { /*-------------------- * Apply DeMorgan's Laws: * (NOT (AND A B)) => (OR (NOT A) (NOT B)) * (NOT (OR A B)) => (AND (NOT A) (NOT B)) * i.e., swap AND for OR and negate each subclause. * * If the input is already AND/OR flat and has no NOT * directly above AND or OR, this transformation preserves * those properties. For example, if no direct child of * the given AND clause is an AND or a NOT-above-OR, then * the recursive calls of negate_clause() can't return any * OR clauses. So we needn't call pull_ors() before * building a new OR clause. Similarly for the OR case. *-------------------- */ case AND_EXPR: { List *nargs = NIL; ListCell *lc; foreach(lc, expr->args) { nargs = lappend(nargs, negate_clause(lfirst(lc))); } return (Node *) make_orclause(nargs); } break; case OR_EXPR: { List *nargs = NIL; ListCell *lc; foreach(lc, expr->args) { nargs = lappend(nargs, negate_clause(lfirst(lc))); } return (Node *) make_andclause(nargs); } break; case NOT_EXPR: /* * NOT underneath NOT: they cancel. We assume the * input is already simplified, so no need to recurse. */ return (Node *) linitial(expr->args); default: elog(ERROR, "unrecognized boolop: %d", (int) expr->boolop); break; } } break; case T_NullTest: { NullTest *expr = (NullTest *) node; /* * In the rowtype case, the two flavors of NullTest are *not* * logical inverses, so we can't simplify. But it does work * for scalar datatypes. */ if (!expr->argisrow) { NullTest *newexpr = makeNode(NullTest); newexpr->arg = expr->arg; newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ? IS_NOT_NULL : IS_NULL); newexpr->argisrow = expr->argisrow; newexpr->location = expr->location; return (Node *) newexpr; } } break; case T_BooleanTest: { BooleanTest *expr = (BooleanTest *) node; BooleanTest *newexpr = makeNode(BooleanTest); newexpr->arg = expr->arg; switch (expr->booltesttype) { case IS_TRUE: newexpr->booltesttype = IS_NOT_TRUE; break; case IS_NOT_TRUE: newexpr->booltesttype = IS_TRUE; break; case IS_FALSE: newexpr->booltesttype = IS_NOT_FALSE; break; case IS_NOT_FALSE: newexpr->booltesttype = IS_FALSE; break; case IS_UNKNOWN: newexpr->booltesttype = IS_NOT_UNKNOWN; break; case IS_NOT_UNKNOWN: newexpr->booltesttype = IS_UNKNOWN; break; default: elog(ERROR, "unrecognized booltesttype: %d", (int) expr->booltesttype); break; } newexpr->location = expr->location; return (Node *) newexpr; } break; default: /* else fall through */ break; } /* * Otherwise we don't know how to simplify this, so just tack on an * explicit NOT node. */ return (Node *) make_notclause((Expr *) node); } /* * canonicalize_qual * Convert a qualification expression to the most useful form. * * This is primarily intended to be used on top-level WHERE (or JOIN/ON) * clauses. It can also be used on top-level CHECK constraints, for which * pass is_check = true. DO NOT call it on any expression that is not known * to be one or the other, as it might apply inappropriate simplifications. * * The name of this routine is a holdover from a time when it would try to * force the expression into canonical AND-of-ORs or OR-of-ANDs form. * Eventually, we recognized that that had more theoretical purity than * actual usefulness, and so now the transformation doesn't involve any * notion of reaching a canonical form. * * NOTE: we assume the input has already been through eval_const_expressions * and therefore possesses AND/OR flatness. Formerly this function included * its own flattening logic, but that requires a useless extra pass over the * tree. * * Returns the modified qualification. */ Expr * canonicalize_qual(Expr *qual, bool is_check) { Expr *newqual; /* Quick exit for empty qual */ if (qual == NULL) return NULL; /* This should not be invoked on quals in implicit-AND format */ Assert(!IsA(qual, List)); /* * Pull up redundant subclauses in OR-of-AND trees. We do this only * within the top-level AND/OR structure; there's no point in looking * deeper. Also remove any NULL constants in the top-level structure. */ newqual = find_duplicate_ors(qual, is_check); return newqual; } /* * pull_ands * Recursively flatten nested AND clauses into a single and-clause list. * * Input is the arglist of an AND clause. * Returns the rebuilt arglist (note original list structure is not touched). */ static List * pull_ands(List *andlist) { List *out_list = NIL; ListCell *arg; foreach(arg, andlist) { Node *subexpr = (Node *) lfirst(arg); if (is_andclause(subexpr)) out_list = list_concat(out_list, pull_ands(((BoolExpr *) subexpr)->args)); else out_list = lappend(out_list, subexpr); } return out_list; } /* * pull_ors * Recursively flatten nested OR clauses into a single or-clause list. * * Input is the arglist of an OR clause. * Returns the rebuilt arglist (note original list structure is not touched). */ static List * pull_ors(List *orlist) { List *out_list = NIL; ListCell *arg; foreach(arg, orlist) { Node *subexpr = (Node *) lfirst(arg); if (is_orclause(subexpr)) out_list = list_concat(out_list, pull_ors(((BoolExpr *) subexpr)->args)); else out_list = lappend(out_list, subexpr); } return out_list; } /*-------------------- * The following code attempts to apply the inverse OR distributive law: * ((A AND B) OR (A AND C)) => (A AND (B OR C)) * That is, locate OR clauses in which every subclause contains an * identical term, and pull out the duplicated terms. * * This may seem like a fairly useless activity, but it turns out to be * applicable to many machine-generated queries, and there are also queries * in some of the TPC benchmarks that need it. This was in fact almost the * sole useful side-effect of the old prepqual code that tried to force * the query into canonical AND-of-ORs form: the canonical equivalent of * ((A AND B) OR (A AND C)) * is * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C)) * which the code was able to simplify to * (A AND (A OR C) AND (B OR A) AND (B OR C)) * thus successfully extracting the common condition A --- but at the cost * of cluttering the qual with many redundant clauses. *-------------------- */ /* * find_duplicate_ors * Given a qualification tree with the NOTs pushed down, search for * OR clauses to which the inverse OR distributive law might apply. * Only the top-level AND/OR structure is searched. * * While at it, we remove any NULL constants within the top-level AND/OR * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x". * In general that would change the result, so eval_const_expressions can't * do it; but at top level of WHERE, we don't need to distinguish between * FALSE and NULL results, so it's valid to treat NULL::boolean the same * as FALSE and then simplify AND/OR accordingly. Conversely, in a top-level * CHECK constraint, we may treat a NULL the same as TRUE. * * Returns the modified qualification. AND/OR flatness is preserved. */ static Expr * find_duplicate_ors(Expr *qual, bool is_check) { if (is_orclause(qual)) { List *orlist = NIL; ListCell *temp; /* Recurse */ foreach(temp, ((BoolExpr *) qual)->args) { Expr *arg = (Expr *) lfirst(temp); arg = find_duplicate_ors(arg, is_check); /* Get rid of any constant inputs */ if (arg && IsA(arg, Const)) { Const *carg = (Const *) arg; if (is_check) { /* Within OR in CHECK, drop constant FALSE */ if (!carg->constisnull && !DatumGetBool(carg->constvalue)) continue; /* Constant TRUE or NULL, so OR reduces to TRUE */ return (Expr *) makeBoolConst(true, false); } else { /* Within OR in WHERE, drop constant FALSE or NULL */ if (carg->constisnull || !DatumGetBool(carg->constvalue)) continue; /* Constant TRUE, so OR reduces to TRUE */ return arg; } } orlist = lappend(orlist, arg); } /* Flatten any ORs pulled up to just below here */ orlist = pull_ors(orlist); /* Now we can look for duplicate ORs */ return process_duplicate_ors(orlist); } else if (is_andclause(qual)) { List *andlist = NIL; ListCell *temp; /* Recurse */ foreach(temp, ((BoolExpr *) qual)->args) { Expr *arg = (Expr *) lfirst(temp); arg = find_duplicate_ors(arg, is_check); /* Get rid of any constant inputs */ if (arg && IsA(arg, Const)) { Const *carg = (Const *) arg; if (is_check) { /* Within AND in CHECK, drop constant TRUE or NULL */ if (carg->constisnull || DatumGetBool(carg->constvalue)) continue; /* Constant FALSE, so AND reduces to FALSE */ return arg; } else { /* Within AND in WHERE, drop constant TRUE */ if (!carg->constisnull && DatumGetBool(carg->constvalue)) continue; /* Constant FALSE or NULL, so AND reduces to FALSE */ return (Expr *) makeBoolConst(false, false); } } andlist = lappend(andlist, arg); } /* Flatten any ANDs introduced just below here */ andlist = pull_ands(andlist); /* AND of no inputs reduces to TRUE */ if (andlist == NIL) return (Expr *) makeBoolConst(true, false); /* Single-expression AND just reduces to that expression */ if (list_length(andlist) == 1) return (Expr *) linitial(andlist); /* Else we still need an AND node */ return make_andclause(andlist); } else return qual; } /* * process_duplicate_ors * Given a list of exprs which are ORed together, try to apply * the inverse OR distributive law. * * Returns the resulting expression (could be an AND clause, an OR * clause, or maybe even a single subexpression). */ static Expr * process_duplicate_ors(List *orlist) { List *reference = NIL; int num_subclauses = 0; List *winners; List *neworlist; ListCell *temp; /* OR of no inputs reduces to FALSE */ if (orlist == NIL) return (Expr *) makeBoolConst(false, false); /* Single-expression OR just reduces to that expression */ if (list_length(orlist) == 1) return (Expr *) linitial(orlist); /* * Choose the shortest AND clause as the reference list --- obviously, any * subclause not in this clause isn't in all the clauses. If we find a * clause that's not an AND, we can treat it as a one-element AND clause, * which necessarily wins as shortest. */ foreach(temp, orlist) { Expr *clause = (Expr *) lfirst(temp); if (is_andclause(clause)) { List *subclauses = ((BoolExpr *) clause)->args; int nclauses = list_length(subclauses); if (reference == NIL || nclauses < num_subclauses) { reference = subclauses; num_subclauses = nclauses; } } else { reference = list_make1(clause); break; } } /* * Just in case, eliminate any duplicates in the reference list. */ reference = list_union(NIL, reference); /* * Check each element of the reference list to see if it's in all the OR * clauses. Build a new list of winning clauses. */ winners = NIL; foreach(temp, reference) { Expr *refclause = (Expr *) lfirst(temp); bool win = true; ListCell *temp2; foreach(temp2, orlist) { Expr *clause = (Expr *) lfirst(temp2); if (is_andclause(clause)) { if (!list_member(((BoolExpr *) clause)->args, refclause)) { win = false; break; } } else { if (!equal(refclause, clause)) { win = false; break; } } } if (win) winners = lappend(winners, refclause); } /* * If no winners, we can't transform the OR */ if (winners == NIL) return make_orclause(orlist); /* * Generate new OR list consisting of the remaining sub-clauses. * * If any clause degenerates to empty, then we have a situation like (A * AND B) OR (A), which can be reduced to just A --- that is, the * additional conditions in other arms of the OR are irrelevant. * * Note that because we use list_difference, any multiple occurrences of a * winning clause in an AND sub-clause will be removed automatically. */ neworlist = NIL; foreach(temp, orlist) { Expr *clause = (Expr *) lfirst(temp); if (is_andclause(clause)) { List *subclauses = ((BoolExpr *) clause)->args; subclauses = list_difference(subclauses, winners); if (subclauses != NIL) { if (list_length(subclauses) == 1) neworlist = lappend(neworlist, linitial(subclauses)); else neworlist = lappend(neworlist, make_andclause(subclauses)); } else { neworlist = NIL; /* degenerate case, see above */ break; } } else { if (!list_member(winners, clause)) neworlist = lappend(neworlist, clause); else { neworlist = NIL; /* degenerate case, see above */ break; } } } /* * Append reduced OR to the winners list, if it's not degenerate, handling * the special case of one element correctly (can that really happen?). * Also be careful to maintain AND/OR flatness in case we pulled up a * sub-sub-OR-clause. */ if (neworlist != NIL) { if (list_length(neworlist) == 1) winners = lappend(winners, linitial(neworlist)); else winners = lappend(winners, make_orclause(pull_ors(neworlist))); } /* * And return the constructed AND clause, again being wary of a single * element and AND/OR flatness. */ if (list_length(winners) == 1) return (Expr *) linitial(winners); else return make_andclause(pull_ands(winners)); }