1999-07-22 17:09:15 +02:00
|
|
|
<!--
|
2002-11-18 18:12:07 +01:00
|
|
|
$Header: /cvsroot/pgsql/doc/src/sgml/ref/cluster.sgml,v 1.22 2002/11/18 17:12:06 momjian Exp $
|
2001-12-08 04:24:40 +01:00
|
|
|
PostgreSQL documentation
|
1999-07-22 17:09:15 +02:00
|
|
|
-->
|
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
<refentry id="SQL-CLUSTER">
|
|
|
|
<refmeta>
|
2002-04-21 21:02:39 +02:00
|
|
|
<refentrytitle id="sql-cluster-title">CLUSTER</refentrytitle>
|
1999-07-06 19:16:42 +02:00
|
|
|
<refmiscinfo>SQL - Language Statements</refmiscinfo>
|
|
|
|
</refmeta>
|
|
|
|
<refnamediv>
|
|
|
|
<refname>
|
|
|
|
CLUSTER
|
|
|
|
</refname>
|
|
|
|
<refpurpose>
|
2001-09-03 14:57:50 +02:00
|
|
|
cluster a table according to an index
|
1999-07-06 19:16:42 +02:00
|
|
|
</refpurpose>
|
1998-12-29 03:24:47 +01:00
|
|
|
</refnamediv>
|
1999-07-06 19:16:42 +02:00
|
|
|
<refsynopsisdiv>
|
|
|
|
<refsynopsisdivinfo>
|
1999-07-22 17:09:15 +02:00
|
|
|
<date>1999-07-20</date>
|
1999-07-06 19:16:42 +02:00
|
|
|
</refsynopsisdivinfo>
|
|
|
|
<synopsis>
|
2001-01-14 00:58:55 +01:00
|
|
|
CLUSTER <replaceable class="PARAMETER">indexname</replaceable> ON <replaceable class="PARAMETER">tablename</replaceable>
|
2002-11-15 04:09:39 +01:00
|
|
|
CLUSTER <replaceable class="PARAMETER">tablename</replaceable>
|
2002-11-18 18:12:07 +01:00
|
|
|
CLUSTER
|
1999-07-06 19:16:42 +02:00
|
|
|
</synopsis>
|
1998-09-16 16:43:12 +02:00
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
<refsect2 id="R2-SQL-CLUSTER-1">
|
|
|
|
<refsect2info>
|
|
|
|
<date>1998-09-08</date>
|
|
|
|
</refsect2info>
|
|
|
|
<title>
|
|
|
|
Inputs
|
|
|
|
</title>
|
|
|
|
<para>
|
|
|
|
</para>
|
|
|
|
<variablelist>
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="PARAMETER">indexname</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of an index.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="PARAMETER">table</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
2002-04-23 04:07:16 +02:00
|
|
|
The name (possibly schema-qualified) of a table.
|
1999-07-06 19:16:42 +02:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
</variablelist>
|
|
|
|
</refsect2>
|
|
|
|
|
|
|
|
<refsect2 id="R2-SQL-CLUSTER-2">
|
|
|
|
<refsect2info>
|
|
|
|
<date>1998-09-08</date>
|
|
|
|
</refsect2info>
|
|
|
|
<title>
|
|
|
|
Outputs
|
|
|
|
</title>
|
|
|
|
<para>
|
|
|
|
|
|
|
|
<variablelist>
|
|
|
|
<varlistentry>
|
|
|
|
<term><computeroutput>
|
|
|
|
CLUSTER
|
|
|
|
</computeroutput></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The clustering was done successfully.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
</variablelist>
|
|
|
|
</para>
|
|
|
|
</refsect2>
|
|
|
|
</refsynopsisdiv>
|
|
|
|
|
|
|
|
<refsect1 id="R1-SQL-CLUSTER-1">
|
|
|
|
<refsect1info>
|
|
|
|
<date>1998-09-08</date>
|
|
|
|
</refsect1info>
|
|
|
|
<title>
|
|
|
|
Description
|
|
|
|
</title>
|
|
|
|
<para>
|
2001-12-08 04:24:40 +01:00
|
|
|
<command>CLUSTER</command> instructs <productname>PostgreSQL</productname>
|
2001-01-14 00:58:55 +01:00
|
|
|
to cluster the table specified
|
2002-08-11 04:43:57 +02:00
|
|
|
by <replaceable class="parameter">table</replaceable>
|
1999-07-06 19:16:42 +02:00
|
|
|
based on the index specified by
|
|
|
|
<replaceable class="parameter">indexname</replaceable>. The index must
|
|
|
|
already have been defined on
|
2001-01-14 00:58:55 +01:00
|
|
|
<replaceable class="parameter">tablename</replaceable>.
|
1999-07-06 19:16:42 +02:00
|
|
|
</para>
|
1999-07-22 17:09:15 +02:00
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
<para>
|
2001-01-14 00:58:55 +01:00
|
|
|
When a table is clustered, it is physically reordered
|
2002-08-11 04:43:57 +02:00
|
|
|
based on the index information. Clustering is a one-time operation:
|
|
|
|
when the table is subsequently updated, the changes are
|
|
|
|
not clustered. That is, no attempt is made to store new or
|
|
|
|
updated tuples according to their index order. If one wishes, one can
|
|
|
|
periodically re-cluster by issuing the command again.
|
1999-07-06 19:16:42 +02:00
|
|
|
</para>
|
|
|
|
|
2002-11-15 04:09:39 +01:00
|
|
|
<para>
|
|
|
|
When a table is clustered, <productname>PostgreSQL</productname>
|
|
|
|
remembers on which index it was clustered. In calls to
|
|
|
|
<command>CLUSTER <replaceable class="parameter">tablename</replaceable></command>,
|
|
|
|
the table is clustered on the same index that it was clustered before.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
2002-11-18 18:12:07 +01:00
|
|
|
A simple <command>CLUSTER</command> clusters all the tables in the database
|
|
|
|
that the calling user owns and uses the saved cluster information. This
|
2002-11-15 04:09:39 +01:00
|
|
|
form of <command>CLUSTER</command> cannot be called from inside a
|
|
|
|
transaction or function.
|
|
|
|
</para>
|
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
<refsect2 id="R2-SQL-CLUSTER-3">
|
|
|
|
<refsect2info>
|
|
|
|
<date>1998-09-08</date>
|
|
|
|
</refsect2info>
|
|
|
|
<title>
|
|
|
|
Notes
|
|
|
|
</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
In cases where you are accessing single rows randomly
|
|
|
|
within a table, the actual order of the data in the heap
|
|
|
|
table is unimportant. However, if you tend to access some
|
|
|
|
data more than others, and there is an index that groups
|
|
|
|
them together, you will benefit from using <command>CLUSTER</command>.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
1999-07-22 17:09:15 +02:00
|
|
|
Another place where <command>CLUSTER</command> is helpful is in
|
|
|
|
cases where you use an
|
1999-07-06 19:16:42 +02:00
|
|
|
index to pull out several rows from a table. If you are
|
|
|
|
requesting a range of indexed values from a table, or a
|
|
|
|
single indexed value that has multiple rows that match,
|
|
|
|
<command>CLUSTER</command> will help because once the index identifies the
|
|
|
|
heap page for the first row that matches, all other rows
|
|
|
|
that match are probably already on the same heap page,
|
|
|
|
saving disk accesses and speeding up the query.
|
|
|
|
</para>
|
1998-10-30 20:34:40 +01:00
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
<para>
|
2002-08-11 04:43:57 +02:00
|
|
|
During the cluster operation, a temporary copy of the table is created
|
|
|
|
that contains the table data in the index order. Temporary copies of
|
|
|
|
each index on the table are created as well. Therefore, you need free
|
|
|
|
space on disk at least equal to the sum of the table size and the index
|
|
|
|
sizes.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
2002-11-15 04:09:39 +01:00
|
|
|
<command>CLUSTER</command> preserves GRANT, inheritance, index, foreign
|
|
|
|
key, and other ancillary information about the table.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Because <command>CLUSTER</command> remembers the clustering information,
|
|
|
|
one can cluster the tables one wants clustered manually the first time, and
|
|
|
|
setup a timed event similar to <command>VACUUM</command> so that the tables
|
|
|
|
are periodically and automatically clustered.
|
2002-08-11 04:43:57 +02:00
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
Because the optimizer records statistics about the ordering of tables, it
|
|
|
|
is advisable to run <command>ANALYZE</command> on the newly clustered
|
|
|
|
table. Otherwise, the optimizer may make poor choices of query plans.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
There is another way to cluster data. The
|
|
|
|
<command>CLUSTER</command> command reorders the original table using
|
1999-07-06 19:16:42 +02:00
|
|
|
the ordering of the index you specify. This can be slow
|
|
|
|
on large tables because the rows are fetched from the heap
|
|
|
|
in index order, and if the heap table is unordered, the
|
|
|
|
entries are on random pages, so there is one disk page
|
2002-08-11 04:43:57 +02:00
|
|
|
retrieved for every row moved. (<productname>PostgreSQL</productname> has a cache,
|
|
|
|
but the majority of a big table will not fit in the cache.)
|
|
|
|
The other way to cluster a table is to use
|
1999-07-22 17:09:15 +02:00
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
<programlisting>
|
1999-07-22 17:09:15 +02:00
|
|
|
SELECT <replaceable class="parameter">columnlist</replaceable> INTO TABLE <replaceable class="parameter">newtable</replaceable>
|
|
|
|
FROM <replaceable class="parameter">table</replaceable> ORDER BY <replaceable class="parameter">columnlist</replaceable>
|
1999-07-06 19:16:42 +02:00
|
|
|
</programlisting>
|
1999-07-22 17:09:15 +02:00
|
|
|
|
2001-12-08 04:24:40 +01:00
|
|
|
which uses the <productname>PostgreSQL</productname> sorting code in
|
2002-08-11 04:43:57 +02:00
|
|
|
the ORDER BY clause to create the desired order; this is usually much
|
2002-09-21 20:32:54 +02:00
|
|
|
faster than an index scan for
|
1999-07-06 19:16:42 +02:00
|
|
|
unordered data. You then drop the old table, use
|
2001-10-31 05:49:44 +01:00
|
|
|
<command>ALTER TABLE...RENAME</command>
|
2001-12-30 05:36:52 +01:00
|
|
|
to rename <replaceable class="parameter">newtable</replaceable> to the old name, and
|
2002-08-11 04:43:57 +02:00
|
|
|
recreate the table's indexes. However, this approach does not preserve
|
|
|
|
OIDs, constraints, foreign key relationships, granted privileges, and
|
|
|
|
other ancillary properties of the table --- all such items must be
|
|
|
|
manually recreated.
|
2002-08-10 22:43:46 +02:00
|
|
|
</para>
|
|
|
|
|
1998-12-29 03:24:47 +01:00
|
|
|
</refsect2>
|
|
|
|
</refsect1>
|
1999-07-06 19:16:42 +02:00
|
|
|
|
|
|
|
<refsect1 id="R1-SQL-CLUSTER-2">
|
|
|
|
<title>
|
1998-12-29 03:24:47 +01:00
|
|
|
Usage
|
1999-07-06 19:16:42 +02:00
|
|
|
</title>
|
|
|
|
<para>
|
2002-08-11 04:43:57 +02:00
|
|
|
Cluster the employees relation on the basis of its ID attribute:
|
1999-07-06 19:16:42 +02:00
|
|
|
</para>
|
|
|
|
<programlisting>
|
|
|
|
CLUSTER emp_ind ON emp;
|
|
|
|
</programlisting>
|
2002-11-15 04:09:39 +01:00
|
|
|
<para>
|
|
|
|
Cluster the employees relation using the same index that was used before:
|
|
|
|
</para>
|
|
|
|
<programlisting>
|
|
|
|
CLUSTER emp;
|
|
|
|
</programlisting>
|
|
|
|
<para>
|
|
|
|
Cluster all the tables on the database that have previously been clustered:
|
|
|
|
</para>
|
|
|
|
<programlisting>
|
2002-11-18 18:12:07 +01:00
|
|
|
CLUSTER;
|
2002-11-15 04:09:39 +01:00
|
|
|
</programlisting>
|
1999-07-06 19:16:42 +02:00
|
|
|
</refsect1>
|
|
|
|
|
|
|
|
<refsect1 id="R1-SQL-CLUSTER-3">
|
|
|
|
<title>
|
1998-12-29 03:24:47 +01:00
|
|
|
Compatibility
|
1999-07-06 19:16:42 +02:00
|
|
|
</title>
|
|
|
|
|
|
|
|
<refsect2 id="R2-SQL-CLUSTER-4">
|
|
|
|
<refsect2info>
|
|
|
|
<date>1998-09-08</date>
|
|
|
|
</refsect2info>
|
|
|
|
<title>
|
1998-12-29 03:24:47 +01:00
|
|
|
SQL92
|
1999-07-06 19:16:42 +02:00
|
|
|
</title>
|
|
|
|
<para>
|
1998-12-29 03:24:47 +01:00
|
|
|
There is no <command>CLUSTER</command> statement in SQL92.
|
1999-07-06 19:16:42 +02:00
|
|
|
</para>
|
1998-12-29 03:24:47 +01:00
|
|
|
</refsect2>
|
1998-05-13 07:34:00 +02:00
|
|
|
</refsect1>
|
1999-07-06 19:16:42 +02:00
|
|
|
</refentry>
|
1998-05-13 07:34:00 +02:00
|
|
|
|
|
|
|
<!-- Keep this comment at the end of the file
|
|
|
|
Local variables:
|
|
|
|
mode: sgml
|
1999-07-06 19:16:42 +02:00
|
|
|
sgml-omittag:nil
|
1998-05-13 07:34:00 +02:00
|
|
|
sgml-shorttag:t
|
|
|
|
sgml-minimize-attributes:nil
|
|
|
|
sgml-always-quote-attributes:t
|
|
|
|
sgml-indent-step:1
|
|
|
|
sgml-indent-data:t
|
|
|
|
sgml-parent-document:nil
|
|
|
|
sgml-default-dtd-file:"../reference.ced"
|
|
|
|
sgml-exposed-tags:nil
|
|
|
|
sgml-local-catalogs:"/usr/lib/sgml/catalog"
|
|
|
|
sgml-local-ecat-files:nil
|
|
|
|
End:
|
|
|
|
-->
|
2002-08-10 23:03:33 +02:00
|
|
|
|