mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-10-02 19:16:54 +02:00
8f3c06c5d5
Author: Daniel Gustafsson Discussion: https://postgr.es/m/7838B8EE-CFD6-48D7-AE2D-520D69FD84BD@yesql.se
322 lines
13 KiB
Plaintext
322 lines
13 KiB
Plaintext
<!--
|
|
doc/src/sgml/ref/analyze.sgml
|
|
PostgreSQL documentation
|
|
-->
|
|
|
|
<refentry id="sql-analyze">
|
|
<indexterm zone="sql-analyze">
|
|
<primary>ANALYZE</primary>
|
|
</indexterm>
|
|
|
|
<refmeta>
|
|
<refentrytitle>ANALYZE</refentrytitle>
|
|
<manvolnum>7</manvolnum>
|
|
<refmiscinfo>SQL - Language Statements</refmiscinfo>
|
|
</refmeta>
|
|
|
|
<refnamediv>
|
|
<refname>ANALYZE</refname>
|
|
<refpurpose>collect statistics about a database</refpurpose>
|
|
</refnamediv>
|
|
|
|
<refsynopsisdiv>
|
|
<synopsis>
|
|
ANALYZE [ ( <replaceable class="parameter">option</replaceable> [, ...] ) ] [ <replaceable class="parameter">table_and_columns</replaceable> [, ...] ]
|
|
ANALYZE [ VERBOSE ] [ <replaceable class="parameter">table_and_columns</replaceable> [, ...] ]
|
|
|
|
<phrase>where <replaceable class="parameter">option</replaceable> can be one of:</phrase>
|
|
|
|
VERBOSE [ <replaceable class="parameter">boolean</replaceable> ]
|
|
SKIP_LOCKED [ <replaceable class="parameter">boolean</replaceable> ]
|
|
|
|
<phrase>and <replaceable class="parameter">table_and_columns</replaceable> is:</phrase>
|
|
|
|
<replaceable class="parameter">table_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ]
|
|
</synopsis>
|
|
</refsynopsisdiv>
|
|
|
|
<refsect1>
|
|
<title>Description</title>
|
|
|
|
<para>
|
|
<command>ANALYZE</command> collects statistics about the contents
|
|
of tables in the database, and stores the results in the <link
|
|
linkend="catalog-pg-statistic"><structname>pg_statistic</structname></link>
|
|
system catalog. Subsequently, the query planner uses these
|
|
statistics to help determine the most efficient execution plans for
|
|
queries.
|
|
</para>
|
|
|
|
<para>
|
|
Without a <replaceable class="parameter">table_and_columns</replaceable>
|
|
list, <command>ANALYZE</command> processes every table and materialized view
|
|
in the current database that the current user has permission to analyze.
|
|
With a list, <command>ANALYZE</command> processes only those table(s).
|
|
It is further possible to give a list of column names for a table,
|
|
in which case only the statistics for those columns are collected.
|
|
</para>
|
|
|
|
<para>
|
|
When the option list is surrounded by parentheses, the options can be
|
|
written in any order. The parenthesized syntax was added in
|
|
<productname>PostgreSQL</productname> 11; the unparenthesized syntax
|
|
is deprecated.
|
|
</para>
|
|
</refsect1>
|
|
|
|
<refsect1>
|
|
<title>Parameters</title>
|
|
|
|
<variablelist>
|
|
<varlistentry>
|
|
<term><literal>VERBOSE</literal></term>
|
|
<listitem>
|
|
<para>
|
|
Enables display of progress messages.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>SKIP_LOCKED</literal></term>
|
|
<listitem>
|
|
<para>
|
|
Specifies that <command>ANALYZE</command> should not wait for any
|
|
conflicting locks to be released when beginning work on a relation:
|
|
if a relation cannot be locked immediately without waiting, the relation
|
|
is skipped. Note that even with this option, <command>ANALYZE</command>
|
|
may still block when opening the relation's indexes or when acquiring
|
|
sample rows from partitions, table inheritance children, and some
|
|
types of foreign tables. Also, while <command>ANALYZE</command>
|
|
ordinarily processes all partitions of specified partitioned tables,
|
|
this option will cause <command>ANALYZE</command> to skip all
|
|
partitions if there is a conflicting lock on the partitioned table.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable class="parameter">boolean</replaceable></term>
|
|
<listitem>
|
|
<para>
|
|
Specifies whether the selected option should be turned on or off.
|
|
You can write <literal>TRUE</literal>, <literal>ON</literal>, or
|
|
<literal>1</literal> to enable the option, and <literal>FALSE</literal>,
|
|
<literal>OFF</literal>, or <literal>0</literal> to disable it. The
|
|
<replaceable class="parameter">boolean</replaceable> value can also
|
|
be omitted, in which case <literal>TRUE</literal> is assumed.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable class="parameter">table_name</replaceable></term>
|
|
<listitem>
|
|
<para>
|
|
The name (possibly schema-qualified) of a specific table to
|
|
analyze. If omitted, all regular tables, partitioned tables, and
|
|
materialized views in the current database are analyzed (but not
|
|
foreign tables). If the specified table is a partitioned table, both the
|
|
inheritance statistics of the partitioned table as a whole and
|
|
statistics of the individual partitions are updated.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable class="parameter">column_name</replaceable></term>
|
|
<listitem>
|
|
<para>
|
|
The name of a specific column to analyze. Defaults to all columns.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
</variablelist>
|
|
</refsect1>
|
|
|
|
<refsect1>
|
|
<title>Outputs</title>
|
|
|
|
<para>
|
|
When <literal>VERBOSE</literal> is specified, <command>ANALYZE</command> emits
|
|
progress messages to indicate which table is currently being
|
|
processed. Various statistics about the tables are printed as well.
|
|
</para>
|
|
</refsect1>
|
|
|
|
<refsect1>
|
|
<title>Notes</title>
|
|
|
|
<para>
|
|
To analyze a table, one must ordinarily be the table's owner or a
|
|
superuser. However, database owners are allowed to
|
|
analyze all tables in their databases, except shared catalogs.
|
|
(The restriction for shared catalogs means that a true database-wide
|
|
<command>ANALYZE</command> can only be performed by a superuser.)
|
|
<command>ANALYZE</command> will skip over any tables that the calling user
|
|
does not have permission to analyze.
|
|
</para>
|
|
|
|
<para>
|
|
Foreign tables are analyzed only when explicitly selected. Not all
|
|
foreign data wrappers support <command>ANALYZE</command>. If the table's
|
|
wrapper does not support <command>ANALYZE</command>, the command prints a
|
|
warning and does nothing.
|
|
</para>
|
|
|
|
<para>
|
|
In the default <productname>PostgreSQL</productname> configuration,
|
|
the autovacuum daemon (see <xref linkend="autovacuum"/>)
|
|
takes care of automatic analyzing of tables when they are first loaded
|
|
with data, and as they change throughout regular operation.
|
|
When autovacuum is disabled,
|
|
it is a good idea to run <command>ANALYZE</command> periodically, or
|
|
just after making major changes in the contents of a table. Accurate
|
|
statistics will help the planner to choose the most appropriate query
|
|
plan, and thereby improve the speed of query processing. A common
|
|
strategy for read-mostly databases is to run <link linkend="sql-vacuum"><command>VACUUM</command></link>
|
|
and <command>ANALYZE</command> once a day during a low-usage time of day.
|
|
(This will not be sufficient if there is heavy update activity.)
|
|
</para>
|
|
|
|
<para>
|
|
<command>ANALYZE</command>
|
|
requires only a read lock on the target table, so it can run in
|
|
parallel with other activity on the table.
|
|
</para>
|
|
|
|
<para>
|
|
The statistics collected by <command>ANALYZE</command> usually
|
|
include a list of some of the most common values in each column and
|
|
a histogram showing the approximate data distribution in each
|
|
column. One or both of these can be omitted if
|
|
<command>ANALYZE</command> deems them uninteresting (for example,
|
|
in a unique-key column, there are no common values) or if the
|
|
column data type does not support the appropriate operators. There
|
|
is more information about the statistics in <xref
|
|
linkend="maintenance"/>.
|
|
</para>
|
|
|
|
<para>
|
|
For large tables, <command>ANALYZE</command> takes a random sample
|
|
of the table contents, rather than examining every row. This
|
|
allows even very large tables to be analyzed in a small amount of
|
|
time. Note, however, that the statistics are only approximate, and
|
|
will change slightly each time <command>ANALYZE</command> is run,
|
|
even if the actual table contents did not change. This might result
|
|
in small changes in the planner's estimated costs shown by
|
|
<link linkend="sql-explain"><command>EXPLAIN</command></link>.
|
|
In rare situations, this non-determinism will cause the planner's
|
|
choices of query plans to change after <command>ANALYZE</command> is run.
|
|
To avoid this, raise the amount of statistics collected by
|
|
<command>ANALYZE</command>, as described below.
|
|
</para>
|
|
|
|
<para>
|
|
The extent of analysis can be controlled by adjusting the
|
|
<xref linkend="guc-default-statistics-target"/> configuration variable, or
|
|
on a column-by-column basis by setting the per-column statistics
|
|
target with <link linkend="sql-altertable"><command>ALTER TABLE ... ALTER COLUMN ... SET
|
|
STATISTICS</command></link>.
|
|
The target value sets the
|
|
maximum number of entries in the most-common-value list and the
|
|
maximum number of bins in the histogram. The default target value
|
|
is 100, but this can be adjusted up or down to trade off accuracy of
|
|
planner estimates against the time taken for
|
|
<command>ANALYZE</command> and the amount of space occupied in
|
|
<literal>pg_statistic</literal>. In particular, setting the
|
|
statistics target to zero disables collection of statistics for
|
|
that column. It might be useful to do that for columns that are
|
|
never used as part of the <literal>WHERE</literal>, <literal>GROUP BY</literal>,
|
|
or <literal>ORDER BY</literal> clauses of queries, since the planner will
|
|
have no use for statistics on such columns.
|
|
</para>
|
|
|
|
<para>
|
|
The largest statistics target among the columns being analyzed determines
|
|
the number of table rows sampled to prepare the statistics. Increasing
|
|
the target causes a proportional increase in the time and space needed
|
|
to do <command>ANALYZE</command>.
|
|
</para>
|
|
|
|
<para>
|
|
One of the values estimated by <command>ANALYZE</command> is the number of
|
|
distinct values that appear in each column. Because only a subset of the
|
|
rows are examined, this estimate can sometimes be quite inaccurate, even
|
|
with the largest possible statistics target. If this inaccuracy leads to
|
|
bad query plans, a more accurate value can be determined manually and then
|
|
installed with
|
|
<link linkend="sql-altertable"><command>ALTER TABLE ... ALTER COLUMN ... SET (n_distinct = ...)</command></link>.
|
|
</para>
|
|
|
|
<para>
|
|
If the table being analyzed is partitioned, <command>ANALYZE</command>
|
|
will gather statistics by sampling blocks randomly from its partitions;
|
|
in addition, it will recurse into each partition and update its statistics.
|
|
(However, in multi-level partitioning scenarios, each leaf partition
|
|
will only be analyzed once.)
|
|
By contrast, if the table being analyzed has inheritance children,
|
|
<command>ANALYZE</command> will gather statistics for it twice:
|
|
once on the rows of the parent table only, and a second time on the
|
|
rows of the parent table with all of its children. This second set of
|
|
statistics is needed when planning queries that traverse the entire
|
|
inheritance tree. The child tables themselves are not individually
|
|
analyzed in this case.
|
|
</para>
|
|
|
|
<para>
|
|
The autovacuum daemon counts inserts, updates and deletes in the
|
|
partitions to determine if auto-analyze is needed. However, adding
|
|
or removing partitions does not affect autovacuum daemon decisions,
|
|
so triggering a manual <command>ANALYZE</command> is recommended
|
|
when this occurs.
|
|
</para>
|
|
|
|
<para>
|
|
Tuples changed in inheritance children do not count towards analyze
|
|
on the parent table. If the parent table is empty or rarely modified,
|
|
it may never be processed by autovacuum. It's necessary to
|
|
periodically run a manual <command>ANALYZE</command> to keep the
|
|
statistics of the table hierarchy up to date.
|
|
</para>
|
|
|
|
<para>
|
|
If any of the child tables or partitions are foreign tables whose foreign data wrappers
|
|
do not support <command>ANALYZE</command>, those child tables are ignored while
|
|
gathering inheritance statistics.
|
|
</para>
|
|
|
|
<para>
|
|
If the table being analyzed is completely empty, <command>ANALYZE</command>
|
|
will not record new statistics for that table. Any existing statistics
|
|
will be retained.
|
|
</para>
|
|
|
|
<para>
|
|
Each backend running <command>ANALYZE</command> will report its progress
|
|
in the <structname>pg_stat_progress_analyze</structname> view. See
|
|
<xref linkend="analyze-progress-reporting"/> for details.
|
|
</para>
|
|
</refsect1>
|
|
|
|
<refsect1>
|
|
<title>Compatibility</title>
|
|
|
|
<para>
|
|
There is no <command>ANALYZE</command> statement in the SQL standard.
|
|
</para>
|
|
</refsect1>
|
|
|
|
<refsect1>
|
|
<title>See Also</title>
|
|
|
|
<simplelist type="inline">
|
|
<member><xref linkend="sql-vacuum"/></member>
|
|
<member><xref linkend="app-vacuumdb"/></member>
|
|
<member><xref linkend="runtime-config-resource-vacuum-cost"/></member>
|
|
<member><xref linkend="autovacuum"/></member>
|
|
<member><xref linkend="analyze-progress-reporting"/></member>
|
|
</simplelist>
|
|
</refsect1>
|
|
</refentry>
|