diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index bc1a2b9918..a1dee895b3 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -8,19 +8,18 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.9 2000/11/16 22:30:19 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.10 2000/12/02 19:38:34 tgl Exp $ * - *------------------------------------------------------------------------- */ +#include "postgres.h" + #include #include #include #include #include -#include "postgres.h" - #include "access/heapam.h" #include "catalog/catname.h" #include "catalog/indexing.h" @@ -159,7 +158,8 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL) stats = &vacattrstats[i]; stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE); - memmove(stats->attr, attr[((attnums) ? attnums[i] : i)], ATTRIBUTE_TUPLE_SIZE); + memcpy(stats->attr, attr[((attnums) ? attnums[i] : i)], + ATTRIBUTE_TUPLE_SIZE); stats->best = stats->guess1 = stats->guess2 = 0; stats->max = stats->min = 0; stats->best_len = stats->guess1_len = stats->guess2_len = 0; @@ -220,6 +220,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL) /* delete existing pg_statistic rows for relation */ del_stats(relid, ((attnums) ? attr_cnt : 0), attnums); + /* scan relation to gather statistics */ scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL); while (HeapTupleIsValid(tuple = heap_getnext(scan, 0))) @@ -237,7 +238,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL) } /* - * attr_stats() -- compute column statistics used by the optimzer + * attr_stats() -- compute column statistics used by the planner * * We compute the column min, max, null and non-null counts. * Plus we attempt to find the count of the value that occurs most @@ -266,6 +267,7 @@ attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple for (i = 0; i < attr_cnt; i++) { VacAttrStats *stats = &vacattrstats[i]; + Datum origvalue; Datum value; bool isnull; bool value_hit; @@ -278,16 +280,25 @@ attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple continue; #endif /* _DROP_COLUMN_HACK__ */ - value = heap_getattr(tuple, - stats->attr->attnum, tupDesc, &isnull); + origvalue = heap_getattr(tuple, stats->attr->attnum, + tupDesc, &isnull); if (isnull) { stats->null_cnt++; continue; } - stats->nonnull_cnt++; + + /* + * If the value is toasted, detoast it to avoid repeated detoastings + * and resultant memory leakage inside the comparison routines. + */ + if (!stats->attr->attbyval && stats->attr->attlen == -1) + value = PointerGetDatum(PG_DETOAST_DATUM(origvalue)); + else + value = origvalue; + if (! stats->initialized) { bucketcpy(stats->attr, value, &stats->best, &stats->best_len); @@ -365,22 +376,26 @@ attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple stats->guess1_hits = 1; stats->guess2_hits = 1; } + + /* Clean up detoasted copy, if any */ + if (value != origvalue) + pfree(DatumGetPointer(value)); } } /* * bucketcpy() -- copy a new value into one of the statistics buckets - * */ static void bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len) { - if (attr->attbyval && attr->attlen != -1) + if (attr->attbyval) *bucket = value; else { int len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value)); + /* Avoid unnecessary palloc() traffic... */ if (len > *bucket_len) { if (*bucket_len != 0) @@ -396,8 +411,27 @@ bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len) /* * update_attstats() -- update attribute statistics for one relation * - * Updates of pg_attribute statistics are handled by over-write, - * for reasons described above. pg_statistic rows are added normally. + * Statistics are stored in several places: the pg_class row for the + * relation has stats about the whole relation, the pg_attribute rows + * for each attribute store "dispersion", and there is a pg_statistic + * row for each (non-system) attribute. (Dispersion probably ought to + * be moved to pg_statistic, but it's not worth doing unless there's + * another reason to have to change pg_attribute.) The pg_class values + * are updated by VACUUM, not here. + * + * We violate no-overwrite semantics here by storing new values for + * the dispersion column directly into the pg_attribute tuple that's + * already on the page. The reason for this is that if we updated + * these tuples in the usual way, vacuuming pg_attribute itself + * wouldn't work very well --- by the time we got done with a vacuum + * cycle, most of the tuples in pg_attribute would've been obsoleted. + * Updating pg_attribute's own statistics would be especially tricky. + * Of course, this only works for fixed-size never-null columns, but + * dispersion is. + * + * pg_statistic rows are just added normally. This means that + * pg_statistic will probably contain some deleted rows at the + * completion of a vacuum cycle, unless it happens to get vacuumed last. * * To keep things simple, we punt for pg_statistic, and don't try * to compute or store rows for pg_statistic itself in pg_statistic. diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 5f2e193d05..d42417b050 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -8,21 +8,28 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.174 2000/11/30 08:46:22 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.175 2000/12/02 19:38:34 tgl Exp $ * - *------------------------------------------------------------------------- */ +#include "postgres.h" + #include #include #include #include #include -#include "postgres.h" +#ifndef HAVE_GETRUSAGE +#include "rusagestub.h" +#else +#include +#include +#endif #include "access/genam.h" #include "access/heapam.h" +#include "access/xlog.h" #include "catalog/catalog.h" #include "catalog/catname.h" #include "catalog/index.h" @@ -40,14 +47,6 @@ #include "utils/syscache.h" #include "utils/temprel.h" -#ifndef HAVE_GETRUSAGE -#include "rusagestub.h" -#else -#include -#include -#endif - -#include "access/xlog.h" extern XLogRecPtr log_heap_move(Relation reln, ItemPointerData from, HeapTuple newtup); @@ -62,7 +61,7 @@ static void vacuum_init(void); static void vacuum_shutdown(void); static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2); static VRelList getrels(NameData *VacRelP); -static void vacuum_rel(Oid relid, bool analyze, bool is_toastrel); +static void vacuum_rel(Oid relid, bool is_toastrel); static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages); static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel); static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist); @@ -240,7 +239,7 @@ vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2) /* vacuum each heap relation */ for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next) { - vacuum_rel(cur->vrl_relid, analyze, false); + vacuum_rel(cur->vrl_relid, false); /* analyze separately so locking is minimized */ if (analyze) analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL); @@ -352,7 +351,7 @@ getrels(NameData *VacRelP) * us to lock the entire database during one pass of the vacuum cleaner. */ static void -vacuum_rel(Oid relid, bool analyze, bool is_toastrel) +vacuum_rel(Oid relid, bool is_toastrel) { Relation onerel; VacPageListData vacuum_pages; /* List of pages to vacuum and/or clean @@ -532,7 +531,7 @@ vacuum_rel(Oid relid, bool analyze, bool is_toastrel) * totally unimportant for toast relations */ if (toast_relid != InvalidOid) - vacuum_rel(toast_relid, false, true); + vacuum_rel(toast_relid, true); /* next command frees attribute stats */ if (!is_toastrel) @@ -2187,14 +2186,9 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist) /* * update_relstats() -- update statistics for one relation * - * Statistics are stored in several places: the pg_class row for the - * relation has stats about the whole relation, the pg_attribute rows - * for each attribute store "dispersion", and there is a pg_statistic - * row for each (non-system) attribute. (Dispersion probably ought to - * be moved to pg_statistic, but it's not worth doing unless there's - * another reason to have to change pg_attribute.) Dispersion and - * pg_statistic values are only updated by VACUUM ANALYZE, but we - * always update the stats in pg_class. + * Update the whole-relation statistics that are kept in its pg_class + * row. There are additional stats that will be updated if we are + * doing VACUUM ANALYZE, but we always update these stats. * * This routine works for both index and heap relation entries in * pg_class. We violate no-overwrite semantics here by storing new