1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* vacuum.h
|
2001-05-07 02:43:27 +02:00
|
|
|
* header file for postgres vacuum cleaner and statistics analyzer
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
2011-01-01 19:18:15 +01:00
|
|
|
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/commands/vacuum.h
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
1997-09-07 07:04:48 +02:00
|
|
|
#ifndef VACUUM_H
|
|
|
|
#define VACUUM_H
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2004-02-13 00:41:04 +01:00
|
|
|
#include "access/htup.h"
|
|
|
|
#include "catalog/pg_statistic.h"
|
|
|
|
#include "catalog/pg_type.h"
|
2001-05-07 02:43:27 +02:00
|
|
|
#include "nodes/parsenodes.h"
|
2007-05-30 22:12:03 +02:00
|
|
|
#include "storage/buf.h"
|
2006-07-13 18:49:20 +02:00
|
|
|
#include "storage/lock.h"
|
2008-06-19 02:46:06 +02:00
|
|
|
#include "utils/relcache.h"
|
1997-02-07 17:24:12 +01:00
|
|
|
|
2007-05-30 22:12:03 +02:00
|
|
|
|
2004-02-13 00:41:04 +01:00
|
|
|
/*----------
|
|
|
|
* ANALYZE builds one of these structs for each attribute (column) that is
|
2004-08-29 07:07:03 +02:00
|
|
|
* to be analyzed. The struct and subsidiary data are in anl_context,
|
2004-02-13 00:41:04 +01:00
|
|
|
* so they live until the end of the ANALYZE operation.
|
|
|
|
*
|
|
|
|
* The type-specific typanalyze function is passed a pointer to this struct
|
|
|
|
* and must return TRUE to continue analysis, FALSE to skip analysis of this
|
2004-08-29 07:07:03 +02:00
|
|
|
* column. In the TRUE case it must set the compute_stats and minrows fields,
|
2004-02-13 00:41:04 +01:00
|
|
|
* and can optionally set extra_data to pass additional info to compute_stats.
|
2004-02-13 07:39:49 +01:00
|
|
|
* minrows is its request for the minimum number of sample rows to be gathered
|
|
|
|
* (but note this request might not be honored, eg if there are fewer rows
|
|
|
|
* than that in the table).
|
2004-02-13 00:41:04 +01:00
|
|
|
*
|
|
|
|
* The compute_stats routine will be called after sample rows have been
|
|
|
|
* gathered. Aside from this struct, it is passed:
|
2004-02-13 07:39:49 +01:00
|
|
|
* fetchfunc: a function for accessing the column values from the
|
|
|
|
* sample rows
|
|
|
|
* samplerows: the number of sample tuples
|
2004-02-13 00:41:04 +01:00
|
|
|
* totalrows: estimated total number of rows in relation
|
2004-02-13 07:39:49 +01:00
|
|
|
* The fetchfunc may be called with rownum running from 0 to samplerows-1.
|
|
|
|
* It returns a Datum and an isNull flag.
|
2004-02-13 00:41:04 +01:00
|
|
|
*
|
|
|
|
* compute_stats should set stats_valid TRUE if it is able to compute
|
|
|
|
* any useful statistics. If it does, the remainder of the struct holds
|
|
|
|
* the information to be stored in a pg_statistic row for the column. Be
|
|
|
|
* careful to allocate any pointed-to data in anl_context, which will NOT
|
|
|
|
* be CurrentMemoryContext when compute_stats is called.
|
2011-03-12 22:30:36 +01:00
|
|
|
*
|
|
|
|
* Note: for the moment, all comparisons done for statistical purposes
|
|
|
|
* should use the database's default collation (DEFAULT_COLLATION_OID).
|
|
|
|
* This might change in some future release.
|
2004-02-13 00:41:04 +01:00
|
|
|
*----------
|
|
|
|
*/
|
2004-02-13 07:39:49 +01:00
|
|
|
typedef struct VacAttrStats *VacAttrStatsP;
|
|
|
|
|
|
|
|
typedef Datum (*AnalyzeAttrFetchFunc) (VacAttrStatsP stats, int rownum,
|
2004-08-29 07:07:03 +02:00
|
|
|
bool *isNull);
|
2004-02-13 07:39:49 +01:00
|
|
|
|
2004-02-13 00:41:04 +01:00
|
|
|
typedef struct VacAttrStats
|
|
|
|
{
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* These fields are set up by the main ANALYZE code before invoking the
|
|
|
|
* type-specific typanalyze function.
|
2010-08-02 00:38:11 +02:00
|
|
|
*
|
|
|
|
* Note: do not assume that the data being analyzed has the same datatype
|
2011-04-10 17:42:00 +02:00
|
|
|
* shown in attr, ie do not trust attr->atttypid, attlen, etc. This is
|
2010-08-02 00:38:11 +02:00
|
|
|
* because some index opclasses store a different type than the underlying
|
2011-03-12 22:30:36 +01:00
|
|
|
* column/expression. Instead use attrtypid, attrtypmod, and attrtype for
|
2010-08-02 00:38:11 +02:00
|
|
|
* information about the datatype being fed to the typanalyze function.
|
2004-02-13 00:41:04 +01:00
|
|
|
*/
|
|
|
|
Form_pg_attribute attr; /* copy of pg_attribute row for column */
|
2010-08-02 00:38:11 +02:00
|
|
|
Oid attrtypid; /* type of data being analyzed */
|
|
|
|
int32 attrtypmod; /* typmod of data being analyzed */
|
|
|
|
Form_pg_type attrtype; /* copy of pg_type row for attrtypid */
|
2004-02-13 00:41:04 +01:00
|
|
|
MemoryContext anl_context; /* where to save long-lived data */
|
|
|
|
|
|
|
|
/*
|
2004-08-29 07:07:03 +02:00
|
|
|
* These fields must be filled in by the typanalyze routine, unless it
|
|
|
|
* returns FALSE.
|
2004-02-13 00:41:04 +01:00
|
|
|
*/
|
2004-08-29 07:07:03 +02:00
|
|
|
void (*compute_stats) (VacAttrStatsP stats,
|
2005-10-15 04:49:52 +02:00
|
|
|
AnalyzeAttrFetchFunc fetchfunc,
|
2004-08-29 07:07:03 +02:00
|
|
|
int samplerows,
|
|
|
|
double totalrows);
|
2004-02-13 00:41:04 +01:00
|
|
|
int minrows; /* Minimum # of rows wanted for stats */
|
|
|
|
void *extra_data; /* for extra type-specific data */
|
|
|
|
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* These fields are to be filled in by the compute_stats routine. (They
|
|
|
|
* are initialized to zero when the struct is created.)
|
2004-02-13 00:41:04 +01:00
|
|
|
*/
|
|
|
|
bool stats_valid;
|
|
|
|
float4 stanullfrac; /* fraction of entries that are NULL */
|
|
|
|
int4 stawidth; /* average width of column values */
|
|
|
|
float4 stadistinct; /* # distinct values */
|
|
|
|
int2 stakind[STATISTIC_NUM_SLOTS];
|
|
|
|
Oid staop[STATISTIC_NUM_SLOTS];
|
|
|
|
int numnumbers[STATISTIC_NUM_SLOTS];
|
|
|
|
float4 *stanumbers[STATISTIC_NUM_SLOTS];
|
|
|
|
int numvalues[STATISTIC_NUM_SLOTS];
|
|
|
|
Datum *stavalues[STATISTIC_NUM_SLOTS];
|
|
|
|
|
2008-07-01 12:33:09 +02:00
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* These fields describe the stavalues[n] element types. They will be
|
2010-08-02 00:38:11 +02:00
|
|
|
* initialized to match attrtypid, but a custom typanalyze function might
|
|
|
|
* want to store an array of something other than the analyzed column's
|
|
|
|
* elements. It should then overwrite these fields.
|
2008-07-01 12:33:09 +02:00
|
|
|
*/
|
|
|
|
Oid statypid[STATISTIC_NUM_SLOTS];
|
|
|
|
int2 statyplen[STATISTIC_NUM_SLOTS];
|
|
|
|
bool statypbyval[STATISTIC_NUM_SLOTS];
|
|
|
|
char statypalign[STATISTIC_NUM_SLOTS];
|
|
|
|
|
2004-02-13 00:41:04 +01:00
|
|
|
/*
|
2004-08-29 07:07:03 +02:00
|
|
|
* These fields are private to the main ANALYZE code and should not be
|
|
|
|
* looked at by type-specific functions.
|
2004-02-13 00:41:04 +01:00
|
|
|
*/
|
|
|
|
int tupattnum; /* attribute number within tuples */
|
2004-02-15 22:01:39 +01:00
|
|
|
HeapTuple *rows; /* access info for std fetch function */
|
2004-02-13 07:39:49 +01:00
|
|
|
TupleDesc tupDesc;
|
2004-02-15 22:01:39 +01:00
|
|
|
Datum *exprvals; /* access info for index fetch function */
|
|
|
|
bool *exprnulls;
|
|
|
|
int rowstride;
|
2004-02-13 00:41:04 +01:00
|
|
|
} VacAttrStats;
|
|
|
|
|
|
|
|
|
Fix recently-understood problems with handling of XID freezing, particularly
in PITR scenarios. We now WAL-log the replacement of old XIDs with
FrozenTransactionId, so that such replacement is guaranteed to propagate to
PITR slave databases. Also, rather than relying on hint-bit updates to be
preserved, pg_clog is not truncated until all instances of an XID are known to
have been replaced by FrozenTransactionId. Add new GUC variables and
pg_autovacuum columns to allow management of the freezing policy, so that
users can trade off the size of pg_clog against the amount of freezing work
done. Revise the already-existing code that forces autovacuum of tables
approaching the wraparound point to make it more bulletproof; also, revise the
autovacuum logic so that anti-wraparound vacuuming is done per-table rather
than per-database. initdb forced because of changes in pg_class, pg_database,
and pg_autovacuum catalogs. Heikki Linnakangas, Simon Riggs, and Tom Lane.
2006-11-05 23:42:10 +01:00
|
|
|
/* GUC parameters */
|
2007-11-15 22:14:46 +01:00
|
|
|
extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for
|
|
|
|
* PostGIS */
|
Fix recently-understood problems with handling of XID freezing, particularly
in PITR scenarios. We now WAL-log the replacement of old XIDs with
FrozenTransactionId, so that such replacement is guaranteed to propagate to
PITR slave databases. Also, rather than relying on hint-bit updates to be
preserved, pg_clog is not truncated until all instances of an XID are known to
have been replaced by FrozenTransactionId. Add new GUC variables and
pg_autovacuum columns to allow management of the freezing policy, so that
users can trade off the size of pg_clog against the amount of freezing work
done. Revise the already-existing code that forces autovacuum of tables
approaching the wraparound point to make it more bulletproof; also, revise the
autovacuum logic so that anti-wraparound vacuuming is done per-table rather
than per-database. initdb forced because of changes in pg_class, pg_database,
and pg_autovacuum catalogs. Heikki Linnakangas, Simon Riggs, and Tom Lane.
2006-11-05 23:42:10 +01:00
|
|
|
extern int vacuum_freeze_min_age;
|
2009-01-16 14:27:24 +01:00
|
|
|
extern int vacuum_freeze_table_age;
|
2002-07-31 19:19:54 +02:00
|
|
|
|
2001-07-12 06:11:13 +02:00
|
|
|
|
2001-05-07 02:43:27 +02:00
|
|
|
/* in commands/vacuum.c */
|
2008-08-13 02:07:50 +02:00
|
|
|
extern void vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
|
2008-03-14 18:25:59 +01:00
|
|
|
BufferAccessStrategy bstrategy, bool for_wraparound, bool isTopLevel);
|
2004-10-01 01:21:26 +02:00
|
|
|
extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
|
2005-10-15 04:49:52 +02:00
|
|
|
int *nindexes, Relation **Irel);
|
2004-10-01 01:21:26 +02:00
|
|
|
extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
|
Fix VACUUM so that it always updates pg_class.reltuples/relpages.
When we added the ability for vacuum to skip heap pages by consulting the
visibility map, we made it just not update the reltuples/relpages
statistics if it skipped any pages. But this could leave us with extremely
out-of-date stats for a table that contains any unchanging areas,
especially for TOAST tables which never get processed by ANALYZE. In
particular this could result in autovacuum making poor decisions about when
to process the table, as in recent report from Florian Helmberger. And in
general it's a bad idea to not update the stats at all. Instead, use the
previous values of reltuples/relpages as an estimate of the tuple density
in unvisited pages. This approach results in a "moving average" estimate
of reltuples, which should converge to the correct value over multiple
VACUUM and ANALYZE cycles even when individual measurements aren't very
good.
This new method for updating reltuples is used by both VACUUM and ANALYZE,
with the result that we no longer need the grotty interconnections that
caused ANALYZE to not update the stats depending on what had happened
in the parent VACUUM command.
Also, fix the logic for skipping all-visible pages during VACUUM so that it
looks ahead rather than behind to decide what to do, as per a suggestion
from Greg Stark. This eliminates useless scanning of all-visible pages at
the start of the relation or just after a not-all-visible page. In
particular, the first few pages of the relation will not be invariably
included in the scanned pages, which seems to help in not overweighting
them in the reltuples estimate.
Back-patch to 8.4, where the visibility map was introduced.
2011-05-30 23:05:26 +02:00
|
|
|
extern double vac_estimate_reltuples(Relation relation, bool is_analyze,
|
|
|
|
BlockNumber total_pages,
|
|
|
|
BlockNumber scanned_pages,
|
|
|
|
double scanned_tuples);
|
2008-11-10 01:49:37 +01:00
|
|
|
extern void vac_update_relstats(Relation relation,
|
2001-10-25 07:50:21 +02:00
|
|
|
BlockNumber num_pages,
|
|
|
|
double num_tuples,
|
2011-10-14 23:23:01 +02:00
|
|
|
BlockNumber num_all_visible_pages,
|
2006-07-10 18:20:52 +02:00
|
|
|
bool hasindex,
|
Fix recently-understood problems with handling of XID freezing, particularly
in PITR scenarios. We now WAL-log the replacement of old XIDs with
FrozenTransactionId, so that such replacement is guaranteed to propagate to
PITR slave databases. Also, rather than relying on hint-bit updates to be
preserved, pg_clog is not truncated until all instances of an XID are known to
have been replaced by FrozenTransactionId. Add new GUC variables and
pg_autovacuum columns to allow management of the freezing policy, so that
users can trade off the size of pg_clog against the amount of freezing work
done. Revise the already-existing code that forces autovacuum of tables
approaching the wraparound point to make it more bulletproof; also, revise the
autovacuum logic so that anti-wraparound vacuuming is done per-table rather
than per-database. initdb forced because of changes in pg_class, pg_database,
and pg_autovacuum catalogs. Heikki Linnakangas, Simon Riggs, and Tom Lane.
2006-11-05 23:42:10 +01:00
|
|
|
TransactionId frozenxid);
|
2009-01-16 14:27:24 +01:00
|
|
|
extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age,
|
|
|
|
bool sharedRel,
|
2001-10-25 07:50:21 +02:00
|
|
|
TransactionId *oldestXmin,
|
2009-01-16 14:27:24 +01:00
|
|
|
TransactionId *freezeLimit,
|
|
|
|
TransactionId *freezeTableLimit);
|
Fix recently-understood problems with handling of XID freezing, particularly
in PITR scenarios. We now WAL-log the replacement of old XIDs with
FrozenTransactionId, so that such replacement is guaranteed to propagate to
PITR slave databases. Also, rather than relying on hint-bit updates to be
preserved, pg_clog is not truncated until all instances of an XID are known to
have been replaced by FrozenTransactionId. Add new GUC variables and
pg_autovacuum columns to allow management of the freezing policy, so that
users can trade off the size of pg_clog against the amount of freezing work
done. Revise the already-existing code that forces autovacuum of tables
approaching the wraparound point to make it more bulletproof; also, revise the
autovacuum logic so that anti-wraparound vacuuming is done per-table rather
than per-database. initdb forced because of changes in pg_class, pg_database,
and pg_autovacuum catalogs. Heikki Linnakangas, Simon Riggs, and Tom Lane.
2006-11-05 23:42:10 +01:00
|
|
|
extern void vac_update_datfrozenxid(void);
|
2004-02-10 04:42:45 +01:00
|
|
|
extern void vacuum_delay_point(void);
|
2001-07-12 06:11:13 +02:00
|
|
|
|
2001-07-14 00:55:59 +02:00
|
|
|
/* in commands/vacuumlazy.c */
|
2010-02-09 22:43:30 +01:00
|
|
|
extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
Fix VACUUM so that it always updates pg_class.reltuples/relpages.
When we added the ability for vacuum to skip heap pages by consulting the
visibility map, we made it just not update the reltuples/relpages
statistics if it skipped any pages. But this could leave us with extremely
out-of-date stats for a table that contains any unchanging areas,
especially for TOAST tables which never get processed by ANALYZE. In
particular this could result in autovacuum making poor decisions about when
to process the table, as in recent report from Florian Helmberger. And in
general it's a bad idea to not update the stats at all. Instead, use the
previous values of reltuples/relpages as an estimate of the tuple density
in unvisited pages. This approach results in a "moving average" estimate
of reltuples, which should converge to the correct value over multiple
VACUUM and ANALYZE cycles even when individual measurements aren't very
good.
This new method for updating reltuples is used by both VACUUM and ANALYZE,
with the result that we no longer need the grotty interconnections that
caused ANALYZE to not update the stats depending on what had happened
in the parent VACUUM command.
Also, fix the logic for skipping all-visible pages during VACUUM so that it
looks ahead rather than behind to decide what to do, as per a suggestion
from Greg Stark. This eliminates useless scanning of all-visible pages at
the start of the relation or just after a not-all-visible page. In
particular, the first few pages of the relation will not be invariably
included in the scanned pages, which seems to help in not overweighting
them in the reltuples estimate.
Back-patch to 8.4, where the visibility map was introduced.
2011-05-30 23:05:26 +02:00
|
|
|
BufferAccessStrategy bstrategy);
|
2001-07-14 00:55:59 +02:00
|
|
|
|
2001-05-07 02:43:27 +02:00
|
|
|
/* in commands/analyze.c */
|
2007-05-30 22:12:03 +02:00
|
|
|
extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
|
Fix VACUUM so that it always updates pg_class.reltuples/relpages.
When we added the ability for vacuum to skip heap pages by consulting the
visibility map, we made it just not update the reltuples/relpages
statistics if it skipped any pages. But this could leave us with extremely
out-of-date stats for a table that contains any unchanging areas,
especially for TOAST tables which never get processed by ANALYZE. In
particular this could result in autovacuum making poor decisions about when
to process the table, as in recent report from Florian Helmberger. And in
general it's a bad idea to not update the stats at all. Instead, use the
previous values of reltuples/relpages as an estimate of the tuple density
in unvisited pages. This approach results in a "moving average" estimate
of reltuples, which should converge to the correct value over multiple
VACUUM and ANALYZE cycles even when individual measurements aren't very
good.
This new method for updating reltuples is used by both VACUUM and ANALYZE,
with the result that we no longer need the grotty interconnections that
caused ANALYZE to not update the stats depending on what had happened
in the parent VACUUM command.
Also, fix the logic for skipping all-visible pages during VACUUM so that it
looks ahead rather than behind to decide what to do, as per a suggestion
from Greg Stark. This eliminates useless scanning of all-visible pages at
the start of the relation or just after a not-all-visible page. In
particular, the first few pages of the relation will not be invariably
included in the scanned pages, which seems to help in not overweighting
them in the reltuples estimate.
Back-patch to 8.4, where the visibility map was introduced.
2011-05-30 23:05:26 +02:00
|
|
|
BufferAccessStrategy bstrategy);
|
2001-10-28 07:26:15 +01:00
|
|
|
|
2001-11-05 18:46:40 +01:00
|
|
|
#endif /* VACUUM_H */
|