/*------------------------------------------------------------------------- * * orclauses.c * Routines to extract restriction OR clauses from join OR clauses * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/optimizer/util/orclauses.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/orclauses.h" #include "optimizer/restrictinfo.h" static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel); static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel); static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, Expr *orclause, RestrictInfo *join_or_rinfo); /* * extract_restriction_or_clauses * Examine join OR-of-AND clauses to see if any useful restriction OR * clauses can be extracted. If so, add them to the query. * * Although a join clause must reference multiple relations overall, * an OR of ANDs clause might contain sub-clauses that reference just one * relation and can be used to build a restriction clause for that rel. * For example consider * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); * We can transform this into * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) * AND (a.x = 42 OR a.x = 44) * AND (b.y = 43 OR b.z = 45); * which allows the latter clauses to be applied during the scans of a and b, * perhaps as index qualifications, and in any case reducing the number of * rows arriving at the join. In essence this is a partial transformation to * CNF (AND of ORs format). It is not complete, however, because we do not * unravel the original OR --- doing so would usually bloat the qualification * expression to little gain. * * The added quals are partially redundant with the original OR, and therefore * would cause the size of the joinrel to be underestimated when it is finally * formed. (This would be true of a full transformation to CNF as well; the * fault is not really in the transformation, but in clauselist_selectivity's * inability to recognize redundant conditions.) We can compensate for this * redundancy by changing the cached selectivity of the original OR clause, * canceling out the (valid) reduction in the estimated sizes of the base * relations so that the estimated joinrel size remains the same. This is * a MAJOR HACK: it depends on the fact that clause selectivities are cached * and on the fact that the same RestrictInfo node will appear in every * joininfo list that might be used when the joinrel is formed. * And it doesn't work in cases where the size estimation is nonlinear * (i.e., outer and IN joins). But it beats not doing anything. * * We examine each base relation to see if join clauses associated with it * contain extractable restriction conditions. If so, add those conditions * to the rel's baserestrictinfo and update the cached selectivities of the * join clauses. Note that the same join clause will be examined afresh * from the point of view of each baserel that participates in it, so its * cached selectivity may get updated multiple times. */ void extract_restriction_or_clauses(PlannerInfo *root) { Index rti; /* Examine each baserel for potential join OR clauses */ for (rti = 1; rti < root->simple_rel_array_size; rti++) { RelOptInfo *rel = root->simple_rel_array[rti]; ListCell *lc; /* there may be empty slots corresponding to non-baserel RTEs */ if (rel == NULL) continue; Assert(rel->relid == rti); /* sanity check on array */ /* ignore RTEs that are "other rels" */ if (rel->reloptkind != RELOPT_BASEREL) continue; /* * Find potentially interesting OR joinclauses. We can use any * joinclause that is considered safe to move to this rel by the * parameterized-path machinery, even though what we are going to do * with it is not exactly a parameterized path. */ foreach(lc, rel->joininfo) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); if (restriction_is_or_clause(rinfo) && join_clause_is_movable_to(rinfo, rel)) { /* Try to extract a qual for this rel only */ Expr *orclause = extract_or_clause(rinfo, rel); /* * If successful, decide whether we want to use the clause, * and insert it into the rel's restrictinfo list if so. */ if (orclause) consider_new_or_clause(root, rel, orclause, rinfo); } } } } /* * Is the given primitive (non-OR) RestrictInfo safe to move to the rel? */ static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel) { /* * We want clauses that mention the rel, and only the rel. So in * particular pseudoconstant clauses can be rejected quickly. Then check * the clause's Var membership. */ if (rinfo->pseudoconstant) return false; if (!bms_equal(rinfo->clause_relids, rel->relids)) return false; /* We don't want extra evaluations of any volatile functions */ if (contain_volatile_functions((Node *) rinfo->clause)) return false; return true; } /* * Try to extract a restriction clause mentioning only "rel" from the given * join OR-clause. * * We must be able to extract at least one qual for this rel from each of * the arms of the OR, else we can't use it. * * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL * if no OR clause could be extracted. */ static Expr * extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel) { List *clauselist = NIL; ListCell *lc; /* * Scan each arm of the input OR clause. Notice we descend into * or_rinfo->orclause, which has RestrictInfo nodes embedded below the * toplevel OR/AND structure. This is useful because we can use the info * in those nodes to make is_safe_restriction_clause_for()'s checks * cheaper. We'll strip those nodes from the returned tree, though, * meaning that fresh ones will be built if the clause is accepted as a * restriction clause. This might seem wasteful --- couldn't we re-use * the existing RestrictInfos? But that'd require assuming that * selectivity and other cached data is computed exactly the same way for * a restriction clause as for a join clause, which seems undesirable. */ Assert(is_orclause(or_rinfo->orclause)); foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args) { Node *orarg = (Node *) lfirst(lc); List *subclauses = NIL; Node *subclause; /* OR arguments should be ANDs or sub-RestrictInfos */ if (is_andclause(orarg)) { List *andargs = ((BoolExpr *) orarg)->args; ListCell *lc2; foreach(lc2, andargs) { RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); if (restriction_is_or_clause(rinfo)) { /* * Recurse to deal with nested OR. Note we *must* recurse * here, this isn't just overly-tense optimization: we * have to descend far enough to find and strip all * RestrictInfos in the expression. */ Expr *suborclause; suborclause = extract_or_clause(rinfo, rel); if (suborclause) subclauses = lappend(subclauses, suborclause); } else if (is_safe_restriction_clause_for(rinfo, rel)) subclauses = lappend(subclauses, rinfo->clause); } } else { RestrictInfo *rinfo = castNode(RestrictInfo, orarg); Assert(!restriction_is_or_clause(rinfo)); if (is_safe_restriction_clause_for(rinfo, rel)) subclauses = lappend(subclauses, rinfo->clause); } /* * If nothing could be extracted from this arm, we can't do anything * with this OR clause. */ if (subclauses == NIL) return NULL; /* * OK, add subclause(s) to the result OR. If we found more than one, * we need an AND node. But if we found only one, and it is itself an * OR node, add its subclauses to the result instead; this is needed * to preserve AND/OR flatness (ie, no OR directly underneath OR). */ subclause = (Node *) make_ands_explicit(subclauses); if (is_orclause(subclause)) clauselist = list_concat(clauselist, ((BoolExpr *) subclause)->args); else clauselist = lappend(clauselist, subclause); } /* * If we got a restriction clause from every arm, wrap them up in an OR * node. (In theory the OR node might be unnecessary, if there was only * one arm --- but then the input OR node was also redundant.) */ if (clauselist != NIL) return make_orclause(clauselist); return NULL; } /* * Consider whether a successfully-extracted restriction OR clause is * actually worth using. If so, add it to the planner's data structures, * and adjust the original join clause (join_or_rinfo) to compensate. */ static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, Expr *orclause, RestrictInfo *join_or_rinfo) { RestrictInfo *or_rinfo; Selectivity or_selec, orig_selec; /* * Build a RestrictInfo from the new OR clause. We can assume it's valid * as a base restriction clause. */ or_rinfo = make_restrictinfo(root, orclause, true, false, false, false, join_or_rinfo->security_level, NULL, NULL, NULL); /* * Estimate its selectivity. (We could have done this earlier, but doing * it on the RestrictInfo representation allows the result to get cached, * saving work later.) */ or_selec = clause_selectivity(root, (Node *) or_rinfo, 0, JOIN_INNER, NULL); /* * The clause is only worth adding to the query if it rejects a useful * fraction of the base relation's rows; otherwise, it's just going to * cause duplicate computation (since we will still have to check the * original OR clause when the join is formed). Somewhat arbitrarily, we * set the selectivity threshold at 0.9. */ if (or_selec > 0.9) return; /* forget it */ /* * OK, add it to the rel's restriction-clause list. */ rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo); rel->baserestrict_min_security = Min(rel->baserestrict_min_security, or_rinfo->security_level); /* * Adjust the original join OR clause's cached selectivity to compensate * for the selectivity of the added (but redundant) lower-level qual. This * should result in the join rel getting approximately the same rows * estimate as it would have gotten without all these shenanigans. * * XXX major hack alert: this depends on the assumption that the * selectivity will stay cached. * * XXX another major hack: we adjust only norm_selec, the cached * selectivity for JOIN_INNER semantics, even though the join clause * might've been an outer-join clause. This is partly because we can't * easily identify the relevant SpecialJoinInfo here, and partly because * the linearity assumption we're making would fail anyway. (If it is an * outer-join clause, "rel" must be on the nullable side, else we'd not * have gotten here. So the computation of the join size is going to be * quite nonlinear with respect to the size of "rel", so it's not clear * how we ought to adjust outer_selec even if we could compute its * original value correctly.) */ if (or_selec > 0) { SpecialJoinInfo sjinfo; /* * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare * approx_tuple_count() in costsize.c.) */ sjinfo.type = T_SpecialJoinInfo; sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids, rel->relids); sjinfo.min_righthand = rel->relids; sjinfo.syn_lefthand = sjinfo.min_lefthand; sjinfo.syn_righthand = sjinfo.min_righthand; sjinfo.jointype = JOIN_INNER; sjinfo.ojrelid = 0; sjinfo.commute_above_l = NULL; sjinfo.commute_above_r = NULL; sjinfo.commute_below_l = NULL; sjinfo.commute_below_r = NULL; /* we don't bother trying to make the remaining fields valid */ sjinfo.lhs_strict = false; sjinfo.semi_can_btree = false; sjinfo.semi_can_hash = false; sjinfo.semi_operators = NIL; sjinfo.semi_rhs_exprs = NIL; /* Compute inner-join size */ orig_selec = clause_selectivity(root, (Node *) join_or_rinfo, 0, JOIN_INNER, &sjinfo); /* And hack cached selectivity so join size remains the same */ join_or_rinfo->norm_selec = orig_selec / or_selec; /* ensure result stays in sane range */ if (join_or_rinfo->norm_selec > 1) join_or_rinfo->norm_selec = 1; /* as explained above, we don't touch outer_selec */ } }