diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 47581ab705..6285a57888 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.135 2009/03/31 22:12:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.136 2009/05/05 18:02:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2260,6 +2260,10 @@ compute_scalar_stats(VacAttrStatsP stats, MemoryContext old_context; Datum *hist_values; int nvals; + int pos, + posfrac, + delta, + deltafrac; /* Sort the MCV items into position order to speed next loop */ qsort((void *) track, num_mcv, @@ -2313,15 +2317,35 @@ compute_scalar_stats(VacAttrStatsP stats, /* Must copy the target values into anl_context */ old_context = MemoryContextSwitchTo(stats->anl_context); hist_values = (Datum *) palloc(num_hist * sizeof(Datum)); + + /* + * The object of this loop is to copy the first and last values[] + * entries along with evenly-spaced values in between. So the + * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)]. But + * computing that subscript directly risks integer overflow when + * the stats target is more than a couple thousand. Instead we + * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking + * the integral and fractional parts of the sum separately. + */ + delta = (nvals - 1) / (num_hist - 1); + deltafrac = (nvals - 1) % (num_hist - 1); + pos = posfrac = 0; + for (i = 0; i < num_hist; i++) { - int pos; - - pos = (i * (nvals - 1)) / (num_hist - 1); hist_values[i] = datumCopy(values[pos].value, stats->attr->attbyval, stats->attr->attlen); + pos += delta; + posfrac += deltafrac; + if (posfrac >= (num_hist - 1)) + { + /* fractional part exceeds 1, carry to integer part */ + pos++; + posfrac -= (num_hist - 1); + } } + MemoryContextSwitchTo(old_context); stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;