1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* vacuum.h
|
2001-05-07 02:43:27 +02:00
|
|
|
* header file for postgres vacuum cleaner and statistics analyzer
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
2022-01-08 01:04:57 +01:00
|
|
|
* Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/commands/vacuum.h
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef VACUUM_H
|
|
|
|
#define VACUUM_H
|
|
|
|
|
2004-02-13 00:41:04 +01:00
|
|
|
#include "access/htup.h"
|
2021-12-22 03:25:14 +01:00
|
|
|
#include "access/genam.h"
|
2021-12-23 07:12:52 +01:00
|
|
|
#include "access/parallel.h"
|
Improve VACUUM and ANALYZE by avoiding early lock queue
A caller of VACUUM can perform early lookup obtention which can cause
other sessions to block on the request done, causing potentially DOS
attacks as even a non-privileged user can attempt a vacuum fill of a
critical catalog table to block even all incoming connection attempts.
Contrary to TRUNCATE, a client could attempt a system-wide VACUUM after
building the list of relations to VACUUM, which can cause vacuum_rel()
or analyze_rel() to try to lock the relation but the operation would
just block. When the client specifies a list of relations and the
relation needs to be skipped, ownership checks are done when building
the list of relations to work on, preventing a later lock attempt.
vacuum_rel() already had the sanity checks needed, except that those
were applied too late. This commit refactors the code so as relation
skips are checked beforehand, making it safer to avoid too early locks,
for both manual VACUUM with and without a list of relations specified.
An isolation test is added emulating the fact that early locks do not
happen anymore, issuing a WARNING message earlier if the user calling
VACUUM is not a relation owner.
When a partitioned table is listed in a manual VACUUM or ANALYZE
command, its full list of partitions is fetched, all partitions get
added to the list to work on, and then each one of them is processed one
by one, with ownership checks happening at the later phase of
vacuum_rel() or analyze_rel(). Trying to do early ownership checks for
each partition is proving to be tedious as this would result in deadlock
risks with lock upgrades, and skipping all partitions if the listed
partitioned table is not owned would result in a behavior change
compared to how Postgres 10 has implemented vacuum for partitioned
tables. The original problem reported related to early lock queue for
critical relations is fixed anyway, so priority is given to avoiding a
backward-incompatible behavior.
Reported-by: Lloyd Albin, Jeremy Schneider
Author: Michael Paquier
Reviewed by: Nathan Bossart, Kyotaro Horiguchi
Discussion: https://postgr.es/m/152512087100.19803.12733865831237526317@wrigleys.postgresql.org
Discussion: https://postgr.es/m/20180812222142.GA6097@paquier.xyz
2018-08-27 02:11:12 +02:00
|
|
|
#include "catalog/pg_class.h"
|
2004-02-13 00:41:04 +01:00
|
|
|
#include "catalog/pg_statistic.h"
|
|
|
|
#include "catalog/pg_type.h"
|
2020-03-10 10:22:52 +01:00
|
|
|
#include "parser/parse_node.h"
|
2007-05-30 22:12:03 +02:00
|
|
|
#include "storage/buf.h"
|
2006-07-13 18:49:20 +02:00
|
|
|
#include "storage/lock.h"
|
2008-06-19 02:46:06 +02:00
|
|
|
#include "utils/relcache.h"
|
1997-02-07 17:24:12 +01:00
|
|
|
|
2020-01-15 02:54:14 +01:00
|
|
|
/*
|
|
|
|
* Flags for amparallelvacuumoptions to control the participation of bulkdelete
|
|
|
|
* and vacuumcleanup in parallel vacuum.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Both bulkdelete and vacuumcleanup are disabled by default. This will be
|
|
|
|
* used by IndexAM's that don't want to or cannot participate in parallel
|
|
|
|
* vacuum. For example, if an index AM doesn't have a way to communicate the
|
|
|
|
* index statistics allocated by the first ambulkdelete call to the subsequent
|
|
|
|
* ones until amvacuumcleanup, the index AM cannot participate in parallel
|
|
|
|
* vacuum.
|
|
|
|
*/
|
|
|
|
#define VACUUM_OPTION_NO_PARALLEL 0
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bulkdelete can be performed in parallel. This option can be used by
|
Don't overlook indexes during parallel VACUUM.
Commit b4af70cb, which simplified state managed by VACUUM, performed
refactoring of parallel VACUUM in passing. Confusion about the exact
details of the tasks that the leader process is responsible for led to
code that made it possible for parallel VACUUM to miss a subset of the
table's indexes entirely. Specifically, indexes that fell under the
min_parallel_index_scan_size size cutoff were missed. These indexes are
supposed to be vacuumed by the leader (alongside any parallel unsafe
indexes), but weren't vacuumed at all. Affected indexes could easily
end up with duplicate heap TIDs, once heap TIDs were recycled for new
heap tuples. This had generic symptoms that might be seen with almost
any index corruption involving structural inconsistencies between an
index and its table.
To fix, make sure that the parallel VACUUM leader process performs any
required index vacuuming for indexes that happen to be below the size
cutoff. Also document the design of parallel VACUUM with these
below-size-cutoff indexes.
It's unclear how many users might be affected by this bug. There had to
be at least three indexes on the table to hit the bug: a smaller index,
plus at least two additional indexes that themselves exceed the size
cutoff. Cases with just one additional index would not run into
trouble, since the parallel VACUUM cost model requires two
larger-than-cutoff indexes on the table to apply any parallel
processing. Note also that autovacuum was not affected, since it never
uses parallel processing.
Test case based on tests from a larger patch to test parallel VACUUM by
Masahiko Sawada.
Many thanks to Kamigishi Rei for her invaluable help with tracking this
problem down.
Author: Peter Geoghegan <pg@bowt.ie>
Author: Masahiko Sawada <sawada.mshk@gmail.com>
Reported-By: Kamigishi Rei <iijima.yun@koumakan.jp>
Reported-By: Andrew Gierth <andrew@tao11.riddles.org.uk>
Diagnosed-By: Andres Freund <andres@anarazel.de>
Bug: #17245
Discussion: https://postgr.es/m/17245-ddf06aaf85735f36@postgresql.org
Discussion: https://postgr.es/m/20211030023740.qbnsl2xaoh2grq3d@alap3.anarazel.de
Backpatch: 14-, where the refactoring commit appears.
2021-11-02 20:06:17 +01:00
|
|
|
* index AMs that need to scan indexes to delete tuples.
|
2020-01-15 02:54:14 +01:00
|
|
|
*/
|
|
|
|
#define VACUUM_OPTION_PARALLEL_BULKDEL (1 << 0)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* vacuumcleanup can be performed in parallel if bulkdelete is not performed
|
|
|
|
* yet. This will be used by IndexAM's that can scan the index if the
|
|
|
|
* bulkdelete is not performed.
|
|
|
|
*/
|
|
|
|
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP (1 << 1)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* vacuumcleanup can be performed in parallel even if bulkdelete has already
|
|
|
|
* processed the index. This will be used by IndexAM's that scan the index
|
|
|
|
* during the cleanup phase of index irrespective of whether the index is
|
|
|
|
* already scanned or not during bulkdelete phase.
|
|
|
|
*/
|
|
|
|
#define VACUUM_OPTION_PARALLEL_CLEANUP (1 << 2)
|
|
|
|
|
|
|
|
/* value for checking vacuum flags */
|
|
|
|
#define VACUUM_OPTION_MAX_VALID_VALUE ((1 << 3) - 1)
|
2007-05-30 22:12:03 +02:00
|
|
|
|
2021-12-23 07:12:52 +01:00
|
|
|
/* Abstract type for parallel vacuum state */
|
|
|
|
typedef struct ParallelVacuumState ParallelVacuumState;
|
|
|
|
|
2004-02-13 00:41:04 +01:00
|
|
|
/*----------
|
|
|
|
* ANALYZE builds one of these structs for each attribute (column) that is
|
|
|
|
* to be analyzed. The struct and subsidiary data are in anl_context,
|
|
|
|
* so they live until the end of the ANALYZE operation.
|
|
|
|
*
|
|
|
|
* The type-specific typanalyze function is passed a pointer to this struct
|
2017-08-16 06:22:32 +02:00
|
|
|
* and must return true to continue analysis, false to skip analysis of this
|
|
|
|
* column. In the true case it must set the compute_stats and minrows fields,
|
2004-02-13 00:41:04 +01:00
|
|
|
* and can optionally set extra_data to pass additional info to compute_stats.
|
2004-02-13 07:39:49 +01:00
|
|
|
* minrows is its request for the minimum number of sample rows to be gathered
|
|
|
|
* (but note this request might not be honored, eg if there are fewer rows
|
|
|
|
* than that in the table).
|
2004-02-13 00:41:04 +01:00
|
|
|
*
|
|
|
|
* The compute_stats routine will be called after sample rows have been
|
|
|
|
* gathered. Aside from this struct, it is passed:
|
2004-02-13 07:39:49 +01:00
|
|
|
* fetchfunc: a function for accessing the column values from the
|
|
|
|
* sample rows
|
|
|
|
* samplerows: the number of sample tuples
|
2004-02-13 00:41:04 +01:00
|
|
|
* totalrows: estimated total number of rows in relation
|
2004-02-13 07:39:49 +01:00
|
|
|
* The fetchfunc may be called with rownum running from 0 to samplerows-1.
|
|
|
|
* It returns a Datum and an isNull flag.
|
2004-02-13 00:41:04 +01:00
|
|
|
*
|
2017-08-16 06:22:32 +02:00
|
|
|
* compute_stats should set stats_valid true if it is able to compute
|
2004-02-13 00:41:04 +01:00
|
|
|
* any useful statistics. If it does, the remainder of the struct holds
|
|
|
|
* the information to be stored in a pg_statistic row for the column. Be
|
|
|
|
* careful to allocate any pointed-to data in anl_context, which will NOT
|
|
|
|
* be CurrentMemoryContext when compute_stats is called.
|
2011-03-12 22:30:36 +01:00
|
|
|
*
|
Make pg_statistic and related code account more honestly for collations.
When we first put in collations support, we basically punted on teaching
pg_statistic, ANALYZE, and the planner selectivity functions about that.
They've just used DEFAULT_COLLATION_OID independently of the actual
collation of the data. It's time to improve that, so:
* Add columns to pg_statistic that record the specific collation associated
with each statistics slot.
* Teach ANALYZE to use the column's actual collation when comparing values
for statistical purposes, and record this in the appropriate slot. (Note
that type-specific typanalyze functions are now expected to fill
stats->stacoll with the appropriate collation, too.)
* Teach assorted selectivity functions to use the actual collation of
the stats they are looking at, instead of just assuming it's
DEFAULT_COLLATION_OID.
This should give noticeably better results in selectivity estimates for
columns with nondefault collations, at least for query clauses that use
that same collation (which would be the default behavior in most cases).
It's still true that comparisons with explicit COLLATE clauses different
from the stored data's collation won't be well-estimated, but that's no
worse than before. Also, this patch does make the first step towards
doing better with that, which is that it's now theoretically possible to
collect stats for a collation other than the column's own collation.
Patch by me; thanks to Peter Eisentraut for review.
Discussion: https://postgr.es/m/14706.1544630227@sss.pgh.pa.us
2018-12-14 18:52:49 +01:00
|
|
|
* Note: all comparisons done for statistical purposes should use the
|
|
|
|
* underlying column's collation (attcollation), except in situations
|
|
|
|
* where a noncollatable container type contains a collatable type;
|
|
|
|
* in that case use the type's default collation. Be sure to record
|
|
|
|
* the appropriate collation in stacoll.
|
2004-02-13 00:41:04 +01:00
|
|
|
*----------
|
|
|
|
*/
|
2004-02-13 07:39:49 +01:00
|
|
|
typedef struct VacAttrStats *VacAttrStatsP;
|
|
|
|
|
|
|
|
typedef Datum (*AnalyzeAttrFetchFunc) (VacAttrStatsP stats, int rownum,
|
|
|
|
bool *isNull);
|
|
|
|
|
2012-03-04 02:20:19 +01:00
|
|
|
typedef void (*AnalyzeAttrComputeStatsFunc) (VacAttrStatsP stats,
|
|
|
|
AnalyzeAttrFetchFunc fetchfunc,
|
|
|
|
int samplerows,
|
|
|
|
double totalrows);
|
|
|
|
|
2004-02-13 00:41:04 +01:00
|
|
|
typedef struct VacAttrStats
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* These fields are set up by the main ANALYZE code before invoking the
|
|
|
|
* type-specific typanalyze function.
|
2010-08-02 00:38:11 +02:00
|
|
|
*
|
|
|
|
* Note: do not assume that the data being analyzed has the same datatype
|
|
|
|
* shown in attr, ie do not trust attr->atttypid, attlen, etc. This is
|
|
|
|
* because some index opclasses store a different type than the underlying
|
2011-03-12 22:30:36 +01:00
|
|
|
* column/expression. Instead use attrtypid, attrtypmod, and attrtype for
|
2010-08-02 00:38:11 +02:00
|
|
|
* information about the datatype being fed to the typanalyze function.
|
Make pg_statistic and related code account more honestly for collations.
When we first put in collations support, we basically punted on teaching
pg_statistic, ANALYZE, and the planner selectivity functions about that.
They've just used DEFAULT_COLLATION_OID independently of the actual
collation of the data. It's time to improve that, so:
* Add columns to pg_statistic that record the specific collation associated
with each statistics slot.
* Teach ANALYZE to use the column's actual collation when comparing values
for statistical purposes, and record this in the appropriate slot. (Note
that type-specific typanalyze functions are now expected to fill
stats->stacoll with the appropriate collation, too.)
* Teach assorted selectivity functions to use the actual collation of
the stats they are looking at, instead of just assuming it's
DEFAULT_COLLATION_OID.
This should give noticeably better results in selectivity estimates for
columns with nondefault collations, at least for query clauses that use
that same collation (which would be the default behavior in most cases).
It's still true that comparisons with explicit COLLATE clauses different
from the stored data's collation won't be well-estimated, but that's no
worse than before. Also, this patch does make the first step towards
doing better with that, which is that it's now theoretically possible to
collect stats for a collation other than the column's own collation.
Patch by me; thanks to Peter Eisentraut for review.
Discussion: https://postgr.es/m/14706.1544630227@sss.pgh.pa.us
2018-12-14 18:52:49 +01:00
|
|
|
* Likewise, use attrcollid not attr->attcollation.
|
2004-02-13 00:41:04 +01:00
|
|
|
*/
|
|
|
|
Form_pg_attribute attr; /* copy of pg_attribute row for column */
|
2010-08-02 00:38:11 +02:00
|
|
|
Oid attrtypid; /* type of data being analyzed */
|
|
|
|
int32 attrtypmod; /* typmod of data being analyzed */
|
|
|
|
Form_pg_type attrtype; /* copy of pg_type row for attrtypid */
|
Make pg_statistic and related code account more honestly for collations.
When we first put in collations support, we basically punted on teaching
pg_statistic, ANALYZE, and the planner selectivity functions about that.
They've just used DEFAULT_COLLATION_OID independently of the actual
collation of the data. It's time to improve that, so:
* Add columns to pg_statistic that record the specific collation associated
with each statistics slot.
* Teach ANALYZE to use the column's actual collation when comparing values
for statistical purposes, and record this in the appropriate slot. (Note
that type-specific typanalyze functions are now expected to fill
stats->stacoll with the appropriate collation, too.)
* Teach assorted selectivity functions to use the actual collation of
the stats they are looking at, instead of just assuming it's
DEFAULT_COLLATION_OID.
This should give noticeably better results in selectivity estimates for
columns with nondefault collations, at least for query clauses that use
that same collation (which would be the default behavior in most cases).
It's still true that comparisons with explicit COLLATE clauses different
from the stored data's collation won't be well-estimated, but that's no
worse than before. Also, this patch does make the first step towards
doing better with that, which is that it's now theoretically possible to
collect stats for a collation other than the column's own collation.
Patch by me; thanks to Peter Eisentraut for review.
Discussion: https://postgr.es/m/14706.1544630227@sss.pgh.pa.us
2018-12-14 18:52:49 +01:00
|
|
|
Oid attrcollid; /* collation of data being analyzed */
|
2004-02-13 00:41:04 +01:00
|
|
|
MemoryContext anl_context; /* where to save long-lived data */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* These fields must be filled in by the typanalyze routine, unless it
|
2017-08-16 06:22:32 +02:00
|
|
|
* returns false.
|
2004-02-13 00:41:04 +01:00
|
|
|
*/
|
2012-03-04 02:20:19 +01:00
|
|
|
AnalyzeAttrComputeStatsFunc compute_stats; /* function pointer */
|
2004-02-13 00:41:04 +01:00
|
|
|
int minrows; /* Minimum # of rows wanted for stats */
|
|
|
|
void *extra_data; /* for extra type-specific data */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* These fields are to be filled in by the compute_stats routine. (They
|
|
|
|
* are initialized to zero when the struct is created.)
|
|
|
|
*/
|
|
|
|
bool stats_valid;
|
|
|
|
float4 stanullfrac; /* fraction of entries that are NULL */
|
2012-06-25 00:51:46 +02:00
|
|
|
int32 stawidth; /* average width of column values */
|
2004-02-13 00:41:04 +01:00
|
|
|
float4 stadistinct; /* # distinct values */
|
2012-06-25 00:51:46 +02:00
|
|
|
int16 stakind[STATISTIC_NUM_SLOTS];
|
2004-02-13 00:41:04 +01:00
|
|
|
Oid staop[STATISTIC_NUM_SLOTS];
|
Make pg_statistic and related code account more honestly for collations.
When we first put in collations support, we basically punted on teaching
pg_statistic, ANALYZE, and the planner selectivity functions about that.
They've just used DEFAULT_COLLATION_OID independently of the actual
collation of the data. It's time to improve that, so:
* Add columns to pg_statistic that record the specific collation associated
with each statistics slot.
* Teach ANALYZE to use the column's actual collation when comparing values
for statistical purposes, and record this in the appropriate slot. (Note
that type-specific typanalyze functions are now expected to fill
stats->stacoll with the appropriate collation, too.)
* Teach assorted selectivity functions to use the actual collation of
the stats they are looking at, instead of just assuming it's
DEFAULT_COLLATION_OID.
This should give noticeably better results in selectivity estimates for
columns with nondefault collations, at least for query clauses that use
that same collation (which would be the default behavior in most cases).
It's still true that comparisons with explicit COLLATE clauses different
from the stored data's collation won't be well-estimated, but that's no
worse than before. Also, this patch does make the first step towards
doing better with that, which is that it's now theoretically possible to
collect stats for a collation other than the column's own collation.
Patch by me; thanks to Peter Eisentraut for review.
Discussion: https://postgr.es/m/14706.1544630227@sss.pgh.pa.us
2018-12-14 18:52:49 +01:00
|
|
|
Oid stacoll[STATISTIC_NUM_SLOTS];
|
2004-02-13 00:41:04 +01:00
|
|
|
int numnumbers[STATISTIC_NUM_SLOTS];
|
|
|
|
float4 *stanumbers[STATISTIC_NUM_SLOTS];
|
|
|
|
int numvalues[STATISTIC_NUM_SLOTS];
|
|
|
|
Datum *stavalues[STATISTIC_NUM_SLOTS];
|
|
|
|
|
2008-07-01 12:33:09 +02:00
|
|
|
/*
|
|
|
|
* These fields describe the stavalues[n] element types. They will be
|
2010-08-02 00:38:11 +02:00
|
|
|
* initialized to match attrtypid, but a custom typanalyze function might
|
|
|
|
* want to store an array of something other than the analyzed column's
|
|
|
|
* elements. It should then overwrite these fields.
|
2008-07-01 12:33:09 +02:00
|
|
|
*/
|
|
|
|
Oid statypid[STATISTIC_NUM_SLOTS];
|
2012-06-25 00:51:46 +02:00
|
|
|
int16 statyplen[STATISTIC_NUM_SLOTS];
|
2008-07-01 12:33:09 +02:00
|
|
|
bool statypbyval[STATISTIC_NUM_SLOTS];
|
|
|
|
char statypalign[STATISTIC_NUM_SLOTS];
|
|
|
|
|
2004-02-13 00:41:04 +01:00
|
|
|
/*
|
|
|
|
* These fields are private to the main ANALYZE code and should not be
|
|
|
|
* looked at by type-specific functions.
|
|
|
|
*/
|
|
|
|
int tupattnum; /* attribute number within tuples */
|
2004-02-15 22:01:39 +01:00
|
|
|
HeapTuple *rows; /* access info for std fetch function */
|
2004-02-13 07:39:49 +01:00
|
|
|
TupleDesc tupDesc;
|
2004-02-15 22:01:39 +01:00
|
|
|
Datum *exprvals; /* access info for index fetch function */
|
|
|
|
bool *exprnulls;
|
|
|
|
int rowstride;
|
2004-02-13 00:41:04 +01:00
|
|
|
} VacAttrStats;
|
|
|
|
|
2021-01-18 06:03:10 +01:00
|
|
|
/* flag bits for VacuumParams->options */
|
|
|
|
#define VACOPT_VACUUM 0x01 /* do VACUUM */
|
|
|
|
#define VACOPT_ANALYZE 0x02 /* do ANALYZE */
|
|
|
|
#define VACOPT_VERBOSE 0x04 /* print progress info */
|
|
|
|
#define VACOPT_FREEZE 0x08 /* FREEZE option */
|
|
|
|
#define VACOPT_FULL 0x10 /* FULL (non-concurrent) vacuum */
|
|
|
|
#define VACOPT_SKIP_LOCKED 0x20 /* skip if cannot get lock */
|
2021-02-09 06:13:57 +01:00
|
|
|
#define VACOPT_PROCESS_TOAST 0x40 /* process the TOAST table, if any */
|
2021-01-18 06:03:10 +01:00
|
|
|
#define VACOPT_DISABLE_PAGE_SKIPPING 0x80 /* don't skip any pages */
|
2019-03-18 20:14:52 +01:00
|
|
|
|
Allow VACUUM to be run with index cleanup disabled.
This commit adds a new reloption, vacuum_index_cleanup, which
controls whether index cleanup is performed for a particular
relation by default. It also adds a new option to the VACUUM
command, INDEX_CLEANUP, which can be used to override the
reloption. If neither the reloption nor the VACUUM option is
used, the default is true, as before.
Masahiko Sawada, reviewed and tested by Nathan Bossart, Alvaro
Herrera, Kyotaro Horiguchi, Darafei Praliaskouski, and me.
The wording of the documentation is mostly due to me.
Discussion: http://postgr.es/m/CAD21AoAt5R3DNUZSjOoXDUY=naYPUOuffVsRzuTYMz29yLzQCA@mail.gmail.com
2019-04-04 20:58:53 +02:00
|
|
|
/*
|
2021-06-19 05:04:07 +02:00
|
|
|
* Values used by index_cleanup and truncate params.
|
Allow VACUUM to be run with index cleanup disabled.
This commit adds a new reloption, vacuum_index_cleanup, which
controls whether index cleanup is performed for a particular
relation by default. It also adds a new option to the VACUUM
command, INDEX_CLEANUP, which can be used to override the
reloption. If neither the reloption nor the VACUUM option is
used, the default is true, as before.
Masahiko Sawada, reviewed and tested by Nathan Bossart, Alvaro
Herrera, Kyotaro Horiguchi, Darafei Praliaskouski, and me.
The wording of the documentation is mostly due to me.
Discussion: http://postgr.es/m/CAD21AoAt5R3DNUZSjOoXDUY=naYPUOuffVsRzuTYMz29yLzQCA@mail.gmail.com
2019-04-04 20:58:53 +02:00
|
|
|
*
|
2021-06-19 05:04:07 +02:00
|
|
|
* VACOPTVALUE_UNSPECIFIED is used as an initial placeholder when VACUUM
|
|
|
|
* command has no explicit value. When that happens the final usable value
|
|
|
|
* comes from the corresponding reloption (though the reloption default is
|
|
|
|
* usually used).
|
Allow VACUUM to be run with index cleanup disabled.
This commit adds a new reloption, vacuum_index_cleanup, which
controls whether index cleanup is performed for a particular
relation by default. It also adds a new option to the VACUUM
command, INDEX_CLEANUP, which can be used to override the
reloption. If neither the reloption nor the VACUUM option is
used, the default is true, as before.
Masahiko Sawada, reviewed and tested by Nathan Bossart, Alvaro
Herrera, Kyotaro Horiguchi, Darafei Praliaskouski, and me.
The wording of the documentation is mostly due to me.
Discussion: http://postgr.es/m/CAD21AoAt5R3DNUZSjOoXDUY=naYPUOuffVsRzuTYMz29yLzQCA@mail.gmail.com
2019-04-04 20:58:53 +02:00
|
|
|
*/
|
2021-06-19 05:04:07 +02:00
|
|
|
typedef enum VacOptValue
|
Allow VACUUM to be run with index cleanup disabled.
This commit adds a new reloption, vacuum_index_cleanup, which
controls whether index cleanup is performed for a particular
relation by default. It also adds a new option to the VACUUM
command, INDEX_CLEANUP, which can be used to override the
reloption. If neither the reloption nor the VACUUM option is
used, the default is true, as before.
Masahiko Sawada, reviewed and tested by Nathan Bossart, Alvaro
Herrera, Kyotaro Horiguchi, Darafei Praliaskouski, and me.
The wording of the documentation is mostly due to me.
Discussion: http://postgr.es/m/CAD21AoAt5R3DNUZSjOoXDUY=naYPUOuffVsRzuTYMz29yLzQCA@mail.gmail.com
2019-04-04 20:58:53 +02:00
|
|
|
{
|
2021-06-19 05:04:07 +02:00
|
|
|
VACOPTVALUE_UNSPECIFIED = 0,
|
|
|
|
VACOPTVALUE_AUTO,
|
|
|
|
VACOPTVALUE_DISABLED,
|
|
|
|
VACOPTVALUE_ENABLED,
|
|
|
|
} VacOptValue;
|
Allow VACUUM to be run with index cleanup disabled.
This commit adds a new reloption, vacuum_index_cleanup, which
controls whether index cleanup is performed for a particular
relation by default. It also adds a new option to the VACUUM
command, INDEX_CLEANUP, which can be used to override the
reloption. If neither the reloption nor the VACUUM option is
used, the default is true, as before.
Masahiko Sawada, reviewed and tested by Nathan Bossart, Alvaro
Herrera, Kyotaro Horiguchi, Darafei Praliaskouski, and me.
The wording of the documentation is mostly due to me.
Discussion: http://postgr.es/m/CAD21AoAt5R3DNUZSjOoXDUY=naYPUOuffVsRzuTYMz29yLzQCA@mail.gmail.com
2019-04-04 20:58:53 +02:00
|
|
|
|
2015-03-18 15:52:33 +01:00
|
|
|
/*
|
|
|
|
* Parameters customizing behavior of VACUUM and ANALYZE.
|
2019-03-18 20:14:52 +01:00
|
|
|
*
|
|
|
|
* Note that at least one of VACOPT_VACUUM and VACOPT_ANALYZE must be set
|
|
|
|
* in options.
|
2015-03-18 15:52:33 +01:00
|
|
|
*/
|
|
|
|
typedef struct VacuumParams
|
|
|
|
{
|
2021-01-18 06:03:10 +01:00
|
|
|
bits32 options; /* bitmask of VACOPT_* */
|
2015-03-18 15:52:33 +01:00
|
|
|
int freeze_min_age; /* min freeze age, -1 to use default */
|
|
|
|
int freeze_table_age; /* age at which to scan whole table */
|
|
|
|
int multixact_freeze_min_age; /* min multixact freeze age, -1 to
|
|
|
|
* use default */
|
|
|
|
int multixact_freeze_table_age; /* multixact age at which to scan
|
|
|
|
* whole table */
|
|
|
|
bool is_wraparound; /* force a for-wraparound vacuum */
|
2015-04-03 16:55:50 +02:00
|
|
|
int log_min_duration; /* minimum execution threshold in ms at
|
|
|
|
* which verbose logs are activated, -1
|
|
|
|
* to use default */
|
2021-06-19 05:04:07 +02:00
|
|
|
VacOptValue index_cleanup; /* Do index vacuum and cleanup */
|
|
|
|
VacOptValue truncate; /* Truncate empty pages at the end */
|
2020-01-20 03:27:49 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The number of parallel vacuum workers. 0 by default which means choose
|
2020-04-14 04:40:27 +02:00
|
|
|
* based on the number of indexes. -1 indicates parallel vacuum is
|
2020-01-20 03:27:49 +01:00
|
|
|
* disabled.
|
|
|
|
*/
|
|
|
|
int nworkers;
|
2015-03-18 15:52:33 +01:00
|
|
|
} VacuumParams;
|
2004-02-13 00:41:04 +01:00
|
|
|
|
2021-12-22 03:25:14 +01:00
|
|
|
/*
|
|
|
|
* VacDeadItems stores TIDs whose index tuples are deleted by index vacuuming.
|
|
|
|
*/
|
|
|
|
typedef struct VacDeadItems
|
|
|
|
{
|
|
|
|
int max_items; /* # slots allocated in array */
|
|
|
|
int num_items; /* current # of entries */
|
|
|
|
|
|
|
|
/* Sorted array of TIDs to delete from indexes */
|
|
|
|
ItemPointerData items[FLEXIBLE_ARRAY_MEMBER];
|
|
|
|
} VacDeadItems;
|
|
|
|
|
|
|
|
#define MAXDEADITEMS(avail_mem) \
|
|
|
|
(((avail_mem) - offsetof(VacDeadItems, items)) / sizeof(ItemPointerData))
|
|
|
|
|
Fix recently-understood problems with handling of XID freezing, particularly
in PITR scenarios. We now WAL-log the replacement of old XIDs with
FrozenTransactionId, so that such replacement is guaranteed to propagate to
PITR slave databases. Also, rather than relying on hint-bit updates to be
preserved, pg_clog is not truncated until all instances of an XID are known to
have been replaced by FrozenTransactionId. Add new GUC variables and
pg_autovacuum columns to allow management of the freezing policy, so that
users can trade off the size of pg_clog against the amount of freezing work
done. Revise the already-existing code that forces autovacuum of tables
approaching the wraparound point to make it more bulletproof; also, revise the
autovacuum logic so that anti-wraparound vacuuming is done per-table rather
than per-database. initdb forced because of changes in pg_class, pg_database,
and pg_autovacuum catalogs. Heikki Linnakangas, Simon Riggs, and Tom Lane.
2006-11-05 23:42:10 +01:00
|
|
|
/* GUC parameters */
|
2007-07-25 14:22:54 +02:00
|
|
|
extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for PostGIS */
|
Fix recently-understood problems with handling of XID freezing, particularly
in PITR scenarios. We now WAL-log the replacement of old XIDs with
FrozenTransactionId, so that such replacement is guaranteed to propagate to
PITR slave databases. Also, rather than relying on hint-bit updates to be
preserved, pg_clog is not truncated until all instances of an XID are known to
have been replaced by FrozenTransactionId. Add new GUC variables and
pg_autovacuum columns to allow management of the freezing policy, so that
users can trade off the size of pg_clog against the amount of freezing work
done. Revise the already-existing code that forces autovacuum of tables
approaching the wraparound point to make it more bulletproof; also, revise the
autovacuum logic so that anti-wraparound vacuuming is done per-table rather
than per-database. initdb forced because of changes in pg_class, pg_database,
and pg_autovacuum catalogs. Heikki Linnakangas, Simon Riggs, and Tom Lane.
2006-11-05 23:42:10 +01:00
|
|
|
extern int vacuum_freeze_min_age;
|
2009-01-16 14:27:24 +01:00
|
|
|
extern int vacuum_freeze_table_age;
|
Separate multixact freezing parameters from xid's
Previously we were piggybacking on transaction ID parameters to freeze
multixacts; but since there isn't necessarily any relationship between
rates of Xid and multixact consumption, this turns out not to be a good
idea.
Therefore, we now have multixact-specific freezing parameters:
vacuum_multixact_freeze_min_age: when to remove multis as we come across
them in vacuum (default to 5 million, i.e. early in comparison to Xid's
default of 50 million)
vacuum_multixact_freeze_table_age: when to force whole-table scans
instead of scanning only the pages marked as not all visible in
visibility map (default to 150 million, same as for Xids). Whichever of
both which reaches the 150 million mark earlier will cause a whole-table
scan.
autovacuum_multixact_freeze_max_age: when for cause emergency,
uninterruptible whole-table scans (default to 400 million, double as
that for Xids). This means there shouldn't be more frequent emergency
vacuuming than previously, unless multixacts are being used very
rapidly.
Backpatch to 9.3 where multixacts were made to persist enough to require
freezing. To avoid an ABI break in 9.3, VacuumStmt has a couple of
fields in an unnatural place, and StdRdOptions is split in two so that
the newly added fields can go at the end.
Patch by me, reviewed by Robert Haas, with additional input from Andres
Freund and Tom Lane.
2014-02-13 23:30:30 +01:00
|
|
|
extern int vacuum_multixact_freeze_min_age;
|
|
|
|
extern int vacuum_multixact_freeze_table_age;
|
Add wraparound failsafe to VACUUM.
Add a failsafe mechanism that is triggered by VACUUM when it notices
that the table's relfrozenxid and/or relminmxid are dangerously far in
the past. VACUUM checks the age of the table dynamically, at regular
intervals.
When the failsafe triggers, VACUUM takes extraordinary measures to
finish as quickly as possible so that relfrozenxid and/or relminmxid can
be advanced. VACUUM will stop applying any cost-based delay that may be
in effect. VACUUM will also bypass any further index vacuuming and heap
vacuuming -- it only completes whatever remaining pruning and freezing
is required. Bypassing index/heap vacuuming is enabled by commit
8523492d, which made it possible to dynamically trigger the mechanism
already used within VACUUM when it is run with INDEX_CLEANUP off.
It is expected that the failsafe will almost always trigger within an
autovacuum to prevent wraparound, long after the autovacuum began.
However, the failsafe mechanism can trigger in any VACUUM operation.
Even in a non-aggressive VACUUM, where we're likely to not advance
relfrozenxid, it still seems like a good idea to finish off remaining
pruning and freezing. An aggressive/anti-wraparound VACUUM will be
launched immediately afterwards. Note that the anti-wraparound VACUUM
that follows will itself trigger the failsafe, usually before it even
begins its first (and only) pass over the heap.
The failsafe is controlled by two new GUCs: vacuum_failsafe_age, and
vacuum_multixact_failsafe_age. There are no equivalent reloptions,
since that isn't expected to be useful. The GUCs have rather high
defaults (both default to 1.6 billion), and are expected to generally
only be used to make the failsafe trigger sooner/more frequently.
Author: Masahiko Sawada <sawada.mshk@gmail.com>
Author: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/CAD21AoD0SkE11fMw4jD4RENAwBMcw1wasVnwpJVw3tVqPOQgAw@mail.gmail.com
Discussion: https://postgr.es/m/CAH2-WzmgH3ySGYeC-m-eOBsa2=sDwa292-CFghV4rESYo39FsQ@mail.gmail.com
2021-04-07 21:37:45 +02:00
|
|
|
extern int vacuum_failsafe_age;
|
|
|
|
extern int vacuum_multixact_failsafe_age;
|
2002-07-31 19:19:54 +02:00
|
|
|
|
2020-01-20 03:27:49 +01:00
|
|
|
/* Variables for cost-based parallel vacuum */
|
|
|
|
extern pg_atomic_uint32 *VacuumSharedCostBalance;
|
|
|
|
extern pg_atomic_uint32 *VacuumActiveNWorkers;
|
|
|
|
extern int VacuumCostBalanceLocal;
|
|
|
|
|
2001-07-12 06:11:13 +02:00
|
|
|
|
2001-05-07 02:43:27 +02:00
|
|
|
/* in commands/vacuum.c */
|
2019-03-18 20:14:52 +01:00
|
|
|
extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel);
|
2019-03-18 18:57:33 +01:00
|
|
|
extern void vacuum(List *relations, VacuumParams *params,
|
2015-03-18 15:52:33 +01:00
|
|
|
BufferAccessStrategy bstrategy, bool isTopLevel);
|
2004-10-01 01:21:26 +02:00
|
|
|
extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
|
|
|
|
int *nindexes, Relation **Irel);
|
|
|
|
extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
|
When updating reltuples after ANALYZE, just extrapolate from our sample.
The existing logic for updating pg_class.reltuples trusted the sampling
results only for the pages ANALYZE actually visited, preferring to
believe the previous tuple density estimate for all the unvisited pages.
While there's some rationale for doing that for VACUUM (first that
VACUUM is likely to visit a very nonrandom subset of pages, and second
that we know for sure that the unvisited pages did not change), there's
no such rationale for ANALYZE: by assumption, it's looked at an unbiased
random sample of the table's pages. Furthermore, in a very large table
ANALYZE will have examined only a tiny fraction of the table's pages,
meaning it cannot slew the overall density estimate very far at all.
In a table that is physically growing, this causes reltuples to increase
nearly proportionally to the change in relpages, regardless of what is
actually happening in the table. This has been observed to cause reltuples
to become so much larger than reality that it effectively shuts off
autovacuum, whose threshold for doing anything is a fraction of reltuples.
(Getting to the point where that would happen seems to require some
additional, not well understood, conditions. But it's undeniable that if
reltuples is seriously off in a large table, ANALYZE alone will not fix it
in any reasonable number of iterations, especially not if the table is
continuing to grow.)
Hence, restrict the use of vac_estimate_reltuples() to VACUUM alone,
and in ANALYZE, just extrapolate from the sample pages on the assumption
that they provide an accurate model of the whole table. If, by very bad
luck, they don't, at least another ANALYZE will fix it; in the old logic
a single bad estimate could cause problems indefinitely.
In HEAD, let's remove vac_estimate_reltuples' is_analyze argument
altogether; it was never used for anything and now it's totally pointless.
But keep it in the back branches, in case any third-party code is calling
this function.
Per bug #15005. Back-patch to all supported branches.
David Gould, reviewed by Alexander Kuzmenkov, cosmetic changes by me
Discussion: https://postgr.es/m/20180117164916.3fdcf2e9@engels
2018-03-13 18:24:27 +01:00
|
|
|
extern double vac_estimate_reltuples(Relation relation,
|
Fix VACUUM so that it always updates pg_class.reltuples/relpages.
When we added the ability for vacuum to skip heap pages by consulting the
visibility map, we made it just not update the reltuples/relpages
statistics if it skipped any pages. But this could leave us with extremely
out-of-date stats for a table that contains any unchanging areas,
especially for TOAST tables which never get processed by ANALYZE. In
particular this could result in autovacuum making poor decisions about when
to process the table, as in recent report from Florian Helmberger. And in
general it's a bad idea to not update the stats at all. Instead, use the
previous values of reltuples/relpages as an estimate of the tuple density
in unvisited pages. This approach results in a "moving average" estimate
of reltuples, which should converge to the correct value over multiple
VACUUM and ANALYZE cycles even when individual measurements aren't very
good.
This new method for updating reltuples is used by both VACUUM and ANALYZE,
with the result that we no longer need the grotty interconnections that
caused ANALYZE to not update the stats depending on what had happened
in the parent VACUUM command.
Also, fix the logic for skipping all-visible pages during VACUUM so that it
looks ahead rather than behind to decide what to do, as per a suggestion
from Greg Stark. This eliminates useless scanning of all-visible pages at
the start of the relation or just after a not-all-visible page. In
particular, the first few pages of the relation will not be invariably
included in the scanned pages, which seems to help in not overweighting
them in the reltuples estimate.
Back-patch to 8.4, where the visibility map was introduced.
2011-05-30 23:05:26 +02:00
|
|
|
BlockNumber total_pages,
|
|
|
|
BlockNumber scanned_pages,
|
|
|
|
double scanned_tuples);
|
2008-11-10 01:49:37 +01:00
|
|
|
extern void vac_update_relstats(Relation relation,
|
2001-06-28 01:31:40 +02:00
|
|
|
BlockNumber num_pages,
|
|
|
|
double num_tuples,
|
2011-10-14 23:23:01 +02:00
|
|
|
BlockNumber num_all_visible_pages,
|
2006-07-10 18:20:52 +02:00
|
|
|
bool hasindex,
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
TransactionId frozenxid,
|
2014-10-30 18:03:22 +01:00
|
|
|
MultiXactId minmulti,
|
|
|
|
bool in_outer_xact);
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
extern void vacuum_set_xid_limits(Relation rel,
|
|
|
|
int freeze_min_age, int freeze_table_age,
|
Separate multixact freezing parameters from xid's
Previously we were piggybacking on transaction ID parameters to freeze
multixacts; but since there isn't necessarily any relationship between
rates of Xid and multixact consumption, this turns out not to be a good
idea.
Therefore, we now have multixact-specific freezing parameters:
vacuum_multixact_freeze_min_age: when to remove multis as we come across
them in vacuum (default to 5 million, i.e. early in comparison to Xid's
default of 50 million)
vacuum_multixact_freeze_table_age: when to force whole-table scans
instead of scanning only the pages marked as not all visible in
visibility map (default to 150 million, same as for Xids). Whichever of
both which reaches the 150 million mark earlier will cause a whole-table
scan.
autovacuum_multixact_freeze_max_age: when for cause emergency,
uninterruptible whole-table scans (default to 400 million, double as
that for Xids). This means there shouldn't be more frequent emergency
vacuuming than previously, unless multixacts are being used very
rapidly.
Backpatch to 9.3 where multixacts were made to persist enough to require
freezing. To avoid an ABI break in 9.3, VacuumStmt has a couple of
fields in an unnatural place, and StdRdOptions is split in two so that
the newly added fields can go at the end.
Patch by me, reviewed by Robert Haas, with additional input from Andres
Freund and Tom Lane.
2014-02-13 23:30:30 +01:00
|
|
|
int multixact_freeze_min_age,
|
|
|
|
int multixact_freeze_table_age,
|
2001-08-26 18:56:03 +02:00
|
|
|
TransactionId *oldestXmin,
|
2009-01-16 14:27:24 +01:00
|
|
|
TransactionId *freezeLimit,
|
2013-11-28 20:52:54 +01:00
|
|
|
TransactionId *xidFullScanLimit,
|
|
|
|
MultiXactId *multiXactCutoff,
|
|
|
|
MultiXactId *mxactFullScanLimit);
|
Add wraparound failsafe to VACUUM.
Add a failsafe mechanism that is triggered by VACUUM when it notices
that the table's relfrozenxid and/or relminmxid are dangerously far in
the past. VACUUM checks the age of the table dynamically, at regular
intervals.
When the failsafe triggers, VACUUM takes extraordinary measures to
finish as quickly as possible so that relfrozenxid and/or relminmxid can
be advanced. VACUUM will stop applying any cost-based delay that may be
in effect. VACUUM will also bypass any further index vacuuming and heap
vacuuming -- it only completes whatever remaining pruning and freezing
is required. Bypassing index/heap vacuuming is enabled by commit
8523492d, which made it possible to dynamically trigger the mechanism
already used within VACUUM when it is run with INDEX_CLEANUP off.
It is expected that the failsafe will almost always trigger within an
autovacuum to prevent wraparound, long after the autovacuum began.
However, the failsafe mechanism can trigger in any VACUUM operation.
Even in a non-aggressive VACUUM, where we're likely to not advance
relfrozenxid, it still seems like a good idea to finish off remaining
pruning and freezing. An aggressive/anti-wraparound VACUUM will be
launched immediately afterwards. Note that the anti-wraparound VACUUM
that follows will itself trigger the failsafe, usually before it even
begins its first (and only) pass over the heap.
The failsafe is controlled by two new GUCs: vacuum_failsafe_age, and
vacuum_multixact_failsafe_age. There are no equivalent reloptions,
since that isn't expected to be useful. The GUCs have rather high
defaults (both default to 1.6 billion), and are expected to generally
only be used to make the failsafe trigger sooner/more frequently.
Author: Masahiko Sawada <sawada.mshk@gmail.com>
Author: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/CAD21AoD0SkE11fMw4jD4RENAwBMcw1wasVnwpJVw3tVqPOQgAw@mail.gmail.com
Discussion: https://postgr.es/m/CAH2-WzmgH3ySGYeC-m-eOBsa2=sDwa292-CFghV4rESYo39FsQ@mail.gmail.com
2021-04-07 21:37:45 +02:00
|
|
|
extern bool vacuum_xid_failsafe_check(TransactionId relfrozenxid,
|
|
|
|
MultiXactId relminmxid);
|
Fix recently-understood problems with handling of XID freezing, particularly
in PITR scenarios. We now WAL-log the replacement of old XIDs with
FrozenTransactionId, so that such replacement is guaranteed to propagate to
PITR slave databases. Also, rather than relying on hint-bit updates to be
preserved, pg_clog is not truncated until all instances of an XID are known to
have been replaced by FrozenTransactionId. Add new GUC variables and
pg_autovacuum columns to allow management of the freezing policy, so that
users can trade off the size of pg_clog against the amount of freezing work
done. Revise the already-existing code that forces autovacuum of tables
approaching the wraparound point to make it more bulletproof; also, revise the
autovacuum logic so that anti-wraparound vacuuming is done per-table rather
than per-database. initdb forced because of changes in pg_class, pg_database,
and pg_autovacuum catalogs. Heikki Linnakangas, Simon Riggs, and Tom Lane.
2006-11-05 23:42:10 +01:00
|
|
|
extern void vac_update_datfrozenxid(void);
|
2004-02-10 04:42:45 +01:00
|
|
|
extern void vacuum_delay_point(void);
|
Improve VACUUM and ANALYZE by avoiding early lock queue
A caller of VACUUM can perform early lookup obtention which can cause
other sessions to block on the request done, causing potentially DOS
attacks as even a non-privileged user can attempt a vacuum fill of a
critical catalog table to block even all incoming connection attempts.
Contrary to TRUNCATE, a client could attempt a system-wide VACUUM after
building the list of relations to VACUUM, which can cause vacuum_rel()
or analyze_rel() to try to lock the relation but the operation would
just block. When the client specifies a list of relations and the
relation needs to be skipped, ownership checks are done when building
the list of relations to work on, preventing a later lock attempt.
vacuum_rel() already had the sanity checks needed, except that those
were applied too late. This commit refactors the code so as relation
skips are checked beforehand, making it safer to avoid too early locks,
for both manual VACUUM with and without a list of relations specified.
An isolation test is added emulating the fact that early locks do not
happen anymore, issuing a WARNING message earlier if the user calling
VACUUM is not a relation owner.
When a partitioned table is listed in a manual VACUUM or ANALYZE
command, its full list of partitions is fetched, all partitions get
added to the list to work on, and then each one of them is processed one
by one, with ownership checks happening at the later phase of
vacuum_rel() or analyze_rel(). Trying to do early ownership checks for
each partition is proving to be tedious as this would result in deadlock
risks with lock upgrades, and skipping all partitions if the listed
partitioned table is not owned would result in a behavior change
compared to how Postgres 10 has implemented vacuum for partitioned
tables. The original problem reported related to early lock queue for
critical relations is fixed anyway, so priority is given to avoiding a
backward-incompatible behavior.
Reported-by: Lloyd Albin, Jeremy Schneider
Author: Michael Paquier
Reviewed by: Nathan Bossart, Kyotaro Horiguchi
Discussion: https://postgr.es/m/152512087100.19803.12733865831237526317@wrigleys.postgresql.org
Discussion: https://postgr.es/m/20180812222142.GA6097@paquier.xyz
2018-08-27 02:11:12 +02:00
|
|
|
extern bool vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple,
|
2021-01-18 06:03:10 +01:00
|
|
|
bits32 options);
|
2018-10-02 01:53:38 +02:00
|
|
|
extern Relation vacuum_open_relation(Oid relid, RangeVar *relation,
|
2021-01-18 06:03:10 +01:00
|
|
|
bits32 options, bool verbose,
|
|
|
|
LOCKMODE lmode);
|
2021-12-22 03:25:14 +01:00
|
|
|
extern IndexBulkDeleteResult *vac_bulkdel_one_index(IndexVacuumInfo *ivinfo,
|
|
|
|
IndexBulkDeleteResult *istat,
|
|
|
|
VacDeadItems *dead_items);
|
|
|
|
extern IndexBulkDeleteResult *vac_cleanup_one_index(IndexVacuumInfo *ivinfo,
|
|
|
|
IndexBulkDeleteResult *istat);
|
|
|
|
extern Size vac_max_items_to_alloc_size(int max_items);
|
2001-07-12 06:11:13 +02:00
|
|
|
|
2021-12-23 07:12:52 +01:00
|
|
|
/* in commands/vacuumparallel.c */
|
|
|
|
extern ParallelVacuumState *parallel_vacuum_init(Relation rel, Relation *indrels,
|
|
|
|
int nindexes, int nrequested_workers,
|
|
|
|
int max_items, int elevel,
|
|
|
|
BufferAccessStrategy bstrategy);
|
|
|
|
extern void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats);
|
|
|
|
extern VacDeadItems *parallel_vacuum_get_dead_items(ParallelVacuumState *pvs);
|
|
|
|
extern void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs,
|
|
|
|
long num_table_tuples,
|
|
|
|
int num_index_scans);
|
|
|
|
extern void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs,
|
|
|
|
long num_table_tuples,
|
|
|
|
int num_index_scans,
|
|
|
|
bool estimated_count);
|
|
|
|
extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
|
|
|
|
|
2001-05-07 02:43:27 +02:00
|
|
|
/* in commands/analyze.c */
|
2019-03-18 18:57:33 +01:00
|
|
|
extern void analyze_rel(Oid relid, RangeVar *relation,
|
2015-04-03 16:55:50 +02:00
|
|
|
VacuumParams *params, List *va_cols, bool in_outer_xact,
|
2015-03-18 15:52:33 +01:00
|
|
|
BufferAccessStrategy bstrategy);
|
2012-03-04 02:20:19 +01:00
|
|
|
extern bool std_typanalyze(VacAttrStats *stats);
|
2001-10-28 07:26:15 +01:00
|
|
|
|
2015-05-19 00:34:37 +02:00
|
|
|
/* in utils/misc/sampling.c --- duplicate of declarations in utils/sampling.h */
|
|
|
|
extern double anl_random_fract(void);
|
|
|
|
extern double anl_init_selection_state(int n);
|
|
|
|
extern double anl_get_next_S(double t, int n, double *stateptr);
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
#endif /* VACUUM_H */
|