/*------------------------------------------------------------------------- * * orindxpath.c * Routines to find index paths that match a set of OR clauses * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.89 2009/04/16 20:42:16 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "optimizer/cost.h" #include "optimizer/paths.h" #include "optimizer/restrictinfo.h" /*---------- * create_or_index_quals * Examine join OR-of-AND quals to see if any useful restriction OR * clauses can be extracted. If so, add them to the query. * * Although a join clause must reference other relations overall, * an OR of ANDs clause might contain sub-clauses that reference just this * relation and can be used to build a restriction clause. * For example consider * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); * We can transform this into * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) * AND (a.x = 42 OR a.x = 44) * AND (b.y = 43 OR b.z = 45); * which opens the potential to build OR indexscans on a and b. In essence * this is a partial transformation to CNF (AND of ORs format). It is not * complete, however, because we do not unravel the original OR --- doing so * would usually bloat the qualification expression to little gain. * * The added quals are partially redundant with the original OR, and therefore * will cause the size of the joinrel to be underestimated when it is finally * formed. (This would be true of a full transformation to CNF as well; the * fault is not really in the transformation, but in clauselist_selectivity's * inability to recognize redundant conditions.) To minimize the collateral * damage, we want to minimize the number of quals added. Therefore we do * not add every possible extracted restriction condition to the query. * Instead, we search for the single restriction condition that generates * the most useful (cheapest) OR indexscan, and add only that condition. * This is a pretty ad-hoc heuristic, but quite useful. * * We can then compensate for the redundancy of the added qual by poking * the recorded selectivity of the original OR clause, thereby ensuring * the added qual doesn't change the estimated size of the joinrel when * it is finally formed. This is a MAJOR HACK: it depends on the fact * that clause selectivities are cached and on the fact that the same * RestrictInfo node will appear in every joininfo list that might be used * when the joinrel is formed. And it probably isn't right in cases where * the size estimation is nonlinear (i.e., outer and IN joins). But it * beats not doing anything. * * NOTE: one might think this messiness could be worked around by generating * the indexscan path with a small path->rows value, and not touching the * rel's baserestrictinfo or rel->rows. However, that does not work. * The optimizer's fundamental design assumes that every general-purpose * Path for a given relation generates the same number of rows. Without * this assumption we'd not be able to optimize solely on the cost of Paths, * but would have to take number of output rows into account as well. * (Perhaps someday that'd be worth doing, but it's a pretty big change...) * * 'rel' is the relation entry for which quals are to be created * * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE. * If no quals available, returns FALSE and doesn't change rel. * * Note: check_partial_indexes() must have been run previously. *---------- */ bool create_or_index_quals(PlannerInfo *root, RelOptInfo *rel) { BitmapOrPath *bestpath = NULL; RestrictInfo *bestrinfo = NULL; List *newrinfos; RestrictInfo *or_rinfo; Selectivity or_selec, orig_selec; ListCell *i; /* * Find potentially interesting OR joinclauses. * * We must ignore clauses for which the target rel is in nullable_relids; * that means there's an outer join below the clause and so it can't be * enforced at the relation scan level. * * We must also ignore clauses that are marked !is_pushed_down (ie they * are themselves outer-join clauses). It would be safe to extract an * index condition from such a clause if we are within the nullable rather * than the non-nullable side of its join, but we haven't got enough * context here to tell which applies. OR clauses in outer-join quals * aren't exactly common, so we'll let that case go unoptimized for now. */ foreach(i, rel->joininfo) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(i); if (restriction_is_or_clause(rinfo) && rinfo->is_pushed_down && !bms_is_member(rel->relid, rinfo->nullable_relids)) { /* * Use the generate_bitmap_or_paths() machinery to estimate the * value of each OR clause. We can use regular restriction * clauses along with the OR clause contents to generate * indexquals. We pass outer_rel = NULL so that sub-clauses that * are actually joins will be ignored. */ List *orpaths; ListCell *k; orpaths = generate_bitmap_or_paths(root, rel, list_make1(rinfo), rel->baserestrictinfo, NULL); /* Locate the cheapest OR path */ foreach(k, orpaths) { BitmapOrPath *path = (BitmapOrPath *) lfirst(k); Assert(IsA(path, BitmapOrPath)); if (bestpath == NULL || path->path.total_cost < bestpath->path.total_cost) { bestpath = path; bestrinfo = rinfo; } } } } /* Fail if no suitable clauses found */ if (bestpath == NULL) return false; /* * Convert the path's indexclauses structure to a RestrictInfo tree. We * include any partial-index predicates so as to get a reasonable * representation of what the path is actually scanning. */ newrinfos = make_restrictinfo_from_bitmapqual((Path *) bestpath, true, true); /* It's possible we get back something other than a single OR clause */ if (list_length(newrinfos) != 1) return false; or_rinfo = (RestrictInfo *) linitial(newrinfos); Assert(IsA(or_rinfo, RestrictInfo)); if (!restriction_is_or_clause(or_rinfo)) return false; /* * OK, add it to the rel's restriction list. */ rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos); /* * Adjust the original OR clause's cached selectivity to compensate for * the selectivity of the added (but redundant) lower-level qual. This * should result in the join rel getting approximately the same rows * estimate as it would have gotten without all these shenanigans. (XXX * major hack alert ... this depends on the assumption that the * selectivity will stay cached ...) */ or_selec = clause_selectivity(root, (Node *) or_rinfo, 0, JOIN_INNER, NULL); if (or_selec > 0 && or_selec < 1) { orig_selec = clause_selectivity(root, (Node *) bestrinfo, 0, JOIN_INNER, NULL); bestrinfo->norm_selec = orig_selec / or_selec; /* clamp result to sane range */ if (bestrinfo->norm_selec > 1) bestrinfo->norm_selec = 1; /* It isn't an outer join clause, so no need to adjust outer_selec */ } /* Tell caller to recompute partial index status and rowcount estimate */ return true; }