Fix ANALYZE's ancient deficiency of not trying to collect stats for expression

indexes when the index column type (the opclass opckeytype) is different from
the expression's datatype.  When coded, this limitation wasn't worth worrying
about because we had no intelligence to speak of in stats collection for the
datatypes used by such opclasses.  However, now that there's non-toy
estimation capability for tsvector queries, it amounts to a bug that ANALYZE
fails to do this.

The fix changes struct VacAttrStats, and therefore constitutes an API break
for custom typanalyze functions.  Therefore we can't back-patch it into
released branches, but it was agreed that 9.0 isn't yet frozen hard enough
to make such a change unacceptable.  Ergo, back-patch to 9.0 but no further.
The API break had better be mentioned in 9.0 release notes.
This commit is contained in:
Tom Lane 2010-08-01 22:38:11 +00:00
parent 97532f7c29
commit 67becf8d41
2 changed files with 63 additions and 44 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.152 2010/02/26 02:00:37 momjian Exp $
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.153 2010/08/01 22:38:11 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -92,7 +92,8 @@ static void compute_index_stats(Relation onerel, double totalrows,
AnlIndexData *indexdata, int nindexes,
HeapTuple *rows, int numrows,
MemoryContext col_context);
static VacAttrStats *examine_attribute(Relation onerel, int attnum);
static VacAttrStats *examine_attribute(Relation onerel, int attnum,
Node *index_expr);
static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
int targrows, double *totalrows, double *totaldeadrows);
static double random_fract(void);
@ -339,7 +340,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" of relation \"%s\" does not exist",
col, RelationGetRelationName(onerel))));
vacattrstats[tcnt] = examine_attribute(onerel, i);
vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
if (vacattrstats[tcnt] != NULL)
tcnt++;
}
@ -353,7 +354,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
tcnt = 0;
for (i = 1; i <= attr_cnt; i++)
{
vacattrstats[tcnt] = examine_attribute(onerel, i);
vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
if (vacattrstats[tcnt] != NULL)
tcnt++;
}
@ -407,21 +408,8 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
elog(ERROR, "too few entries in indexprs list");
indexkey = (Node *) lfirst(indexpr_item);
indexpr_item = lnext(indexpr_item);
/*
* Can't analyze if the opclass uses a storage type
* different from the expression result type. We'd get
* confused because the type shown in pg_attribute for
* the index column doesn't match what we are getting
* from the expression. Perhaps this can be fixed
* someday, but for now, punt.
*/
if (exprType(indexkey) !=
Irel[ind]->rd_att->attrs[i]->atttypid)
continue;
thisdata->vacattrstats[tcnt] =
examine_attribute(Irel[ind], i + 1);
examine_attribute(Irel[ind], i + 1, indexkey);
if (thisdata->vacattrstats[tcnt] != NULL)
{
tcnt++;
@ -802,9 +790,12 @@ compute_index_stats(Relation onerel, double totalrows,
*
* Determine whether the column is analyzable; if so, create and initialize
* a VacAttrStats struct for it. If not, return NULL.
*
* If index_expr isn't NULL, then we're trying to analyze an expression index,
* and index_expr is the expression tree representing the column's data.
*/
static VacAttrStats *
examine_attribute(Relation onerel, int attnum)
examine_attribute(Relation onerel, int attnum, Node *index_expr)
{
Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
HeapTuple typtuple;
@ -827,9 +818,30 @@ examine_attribute(Relation onerel, int attnum)
stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(attr->atttypid));
/*
* When analyzing an expression index, believe the expression tree's type
* not the column datatype --- the latter might be the opckeytype storage
* type of the opclass, which is not interesting for our purposes. (Note:
* if we did anything with non-expression index columns, we'd need to
* figure out where to get the correct type info from, but for now that's
* not a problem.) It's not clear whether anyone will care about the
* typmod, but we store that too just in case.
*/
if (index_expr)
{
stats->attrtypid = exprType(index_expr);
stats->attrtypmod = exprTypmod(index_expr);
}
else
{
stats->attrtypid = attr->atttypid;
stats->attrtypmod = attr->atttypmod;
}
typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(stats->attrtypid));
if (!HeapTupleIsValid(typtuple))
elog(ERROR, "cache lookup failed for type %u", attr->atttypid);
elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
ReleaseSysCache(typtuple);
@ -838,12 +850,12 @@ examine_attribute(Relation onerel, int attnum)
/*
* The fields describing the stats->stavalues[n] element types default to
* the type of the field being analyzed, but the type-specific typanalyze
* the type of the data being analyzed, but the type-specific typanalyze
* function can change them if it wants to store something else.
*/
for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
{
stats->statypid[i] = stats->attr->atttypid;
stats->statypid[i] = stats->attrtypid;
stats->statyplen[i] = stats->attrtype->typlen;
stats->statypbyval[i] = stats->attrtype->typbyval;
stats->statypalign[i] = stats->attrtype->typalign;
@ -1780,7 +1792,7 @@ std_typanalyze(VacAttrStats *stats)
attr->attstattarget = default_statistics_target;
/* Look for default "<" and "=" operators for column's type */
get_sort_group_operators(attr->atttypid,
get_sort_group_operators(stats->attrtypid,
false, false, false,
&ltopr, &eqopr, NULL);
@ -1860,10 +1872,10 @@ compute_minimal_stats(VacAttrStatsP stats,
int nonnull_cnt = 0;
int toowide_cnt = 0;
double total_width = 0;
bool is_varlena = (!stats->attr->attbyval &&
stats->attr->attlen == -1);
bool is_varwidth = (!stats->attr->attbyval &&
stats->attr->attlen < 0);
bool is_varlena = (!stats->attrtype->typbyval &&
stats->attrtype->typlen == -1);
bool is_varwidth = (!stats->attrtype->typbyval &&
stats->attrtype->typlen < 0);
FmgrInfo f_cmpeq;
typedef struct
{
@ -2126,8 +2138,8 @@ compute_minimal_stats(VacAttrStatsP stats,
for (i = 0; i < num_mcv; i++)
{
mcv_values[i] = datumCopy(track[i].value,
stats->attr->attbyval,
stats->attr->attlen);
stats->attrtype->typbyval,
stats->attrtype->typlen);
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
}
MemoryContextSwitchTo(old_context);
@ -2184,10 +2196,10 @@ compute_scalar_stats(VacAttrStatsP stats,
int nonnull_cnt = 0;
int toowide_cnt = 0;
double total_width = 0;
bool is_varlena = (!stats->attr->attbyval &&
stats->attr->attlen == -1);
bool is_varwidth = (!stats->attr->attbyval &&
stats->attr->attlen < 0);
bool is_varlena = (!stats->attrtype->typbyval &&
stats->attrtype->typlen == -1);
bool is_varwidth = (!stats->attrtype->typbyval &&
stats->attrtype->typlen < 0);
double corr_xysum;
Oid cmpFn;
int cmpFlags;
@ -2476,8 +2488,8 @@ compute_scalar_stats(VacAttrStatsP stats,
for (i = 0; i < num_mcv; i++)
{
mcv_values[i] = datumCopy(values[track[i].first].value,
stats->attr->attbyval,
stats->attr->attlen);
stats->attrtype->typbyval,
stats->attrtype->typlen);
mcv_freqs[i] = (double) track[i].count / (double) samplerows;
}
MemoryContextSwitchTo(old_context);
@ -2583,8 +2595,8 @@ compute_scalar_stats(VacAttrStatsP stats,
for (i = 0; i < num_hist; i++)
{
hist_values[i] = datumCopy(values[pos].value,
stats->attr->attbyval,
stats->attr->attlen);
stats->attrtype->typbyval,
stats->attrtype->typlen);
pos += delta;
posfrac += deltafrac;
if (posfrac >= (num_hist - 1))

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.89 2010/02/09 21:43:30 tgl Exp $
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.90 2010/08/01 22:38:11 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -62,9 +62,17 @@ typedef struct VacAttrStats
/*
* These fields are set up by the main ANALYZE code before invoking the
* type-specific typanalyze function.
*
* Note: do not assume that the data being analyzed has the same datatype
* shown in attr, ie do not trust attr->atttypid, attlen, etc. This is
* because some index opclasses store a different type than the underlying
* column/expression. Instead use attrtypid, attrtypmod, and attrtype for
* information about the datatype being fed to the typanalyze function.
*/
Form_pg_attribute attr; /* copy of pg_attribute row for column */
Form_pg_type attrtype; /* copy of pg_type row for column */
Oid attrtypid; /* type of data being analyzed */
int32 attrtypmod; /* typmod of data being analyzed */
Form_pg_type attrtype; /* copy of pg_type row for attrtypid */
MemoryContext anl_context; /* where to save long-lived data */
/*
@ -95,10 +103,9 @@ typedef struct VacAttrStats
/*
* These fields describe the stavalues[n] element types. They will be
* initialized to be the same as the column's that's underlying the slot,
* but a custom typanalyze function might want to store an array of
* something other than the analyzed column's elements. It should then
* overwrite these fields.
* initialized to match attrtypid, but a custom typanalyze function might
* want to store an array of something other than the analyzed column's
* elements. It should then overwrite these fields.
*/
Oid statypid[STATISTIC_NUM_SLOTS];
int2 statyplen[STATISTIC_NUM_SLOTS];