1998-09-07 17:58:31 +02:00
|
|
|
<REFENTRY ID="SQL-CLUSTER">
|
1998-05-13 07:34:00 +02:00
|
|
|
<REFMETA>
|
|
|
|
<REFENTRYTITLE>
|
|
|
|
CLUSTER
|
|
|
|
</REFENTRYTITLE>
|
|
|
|
<REFMISCINFO>SQL - Language Statements</REFMISCINFO>
|
|
|
|
</REFMETA>
|
|
|
|
<REFNAMEDIV>
|
|
|
|
<REFNAME>
|
|
|
|
CLUSTER
|
|
|
|
</REFNAME>
|
|
|
|
<REFPURPOSE>
|
1998-09-07 17:58:31 +02:00
|
|
|
Gives storage clustering advice to the backend
|
1998-05-13 07:34:00 +02:00
|
|
|
</REFPURPOSE>
|
|
|
|
|
|
|
|
<REFSYNOPSISDIV>
|
|
|
|
<REFSYNOPSISDIVINFO>
|
1998-09-16 16:43:12 +02:00
|
|
|
<DATE>1998-09-08</DATE>
|
1998-05-13 07:34:00 +02:00
|
|
|
</REFSYNOPSISDIVINFO>
|
|
|
|
<SYNOPSIS>
|
1998-09-16 16:43:12 +02:00
|
|
|
CLUSTER <REPLACEABLE CLASS="PARAMETER">indexname</REPLACEABLE> ON <REPLACEABLE CLASS="PARAMETER">table</REPLACEABLE>
|
1998-05-13 07:34:00 +02:00
|
|
|
</SYNOPSIS>
|
|
|
|
|
|
|
|
<REFSECT2 ID="R2-SQL-CLUSTER-1">
|
|
|
|
<REFSECT2INFO>
|
1998-09-16 16:43:12 +02:00
|
|
|
<DATE>1998-09-08</DATE>
|
1998-05-13 07:34:00 +02:00
|
|
|
</REFSECT2INFO>
|
|
|
|
<TITLE>
|
|
|
|
Inputs
|
|
|
|
</TITLE>
|
|
|
|
<PARA>
|
|
|
|
</PARA>
|
|
|
|
<VARIABLELIST>
|
|
|
|
<VARLISTENTRY>
|
|
|
|
<TERM>
|
|
|
|
<REPLACEABLE CLASS="PARAMETER">indexname</REPLACEABLE>
|
|
|
|
</TERM>
|
|
|
|
<LISTITEM>
|
|
|
|
<PARA>
|
|
|
|
The name of an index.
|
|
|
|
</PARA>
|
|
|
|
</LISTITEM>
|
|
|
|
</VARLISTENTRY>
|
|
|
|
<VARLISTENTRY>
|
|
|
|
<TERM>
|
|
|
|
<REPLACEABLE CLASS="PARAMETER">table</REPLACEABLE>
|
|
|
|
</TERM>
|
|
|
|
<LISTITEM>
|
|
|
|
<PARA>
|
|
|
|
The name of a table.
|
|
|
|
</PARA>
|
|
|
|
</LISTITEM>
|
|
|
|
</VARLISTENTRY>
|
|
|
|
</VARIABLELIST>
|
|
|
|
</REFSECT2>
|
|
|
|
|
|
|
|
<REFSECT2 ID="R2-SQL-CLUSTER-2">
|
|
|
|
<REFSECT2INFO>
|
1998-09-16 16:43:12 +02:00
|
|
|
<DATE>1998-09-08</DATE>
|
1998-05-13 07:34:00 +02:00
|
|
|
</REFSECT2INFO>
|
|
|
|
<TITLE>
|
|
|
|
Outputs
|
|
|
|
</TITLE>
|
|
|
|
<PARA>
|
|
|
|
</PARA>
|
|
|
|
<VARIABLELIST>
|
|
|
|
<VARLISTENTRY>
|
|
|
|
<TERM>
|
1998-09-16 16:43:12 +02:00
|
|
|
<replaceable>status</replaceable>
|
1998-05-13 07:34:00 +02:00
|
|
|
</TERM>
|
|
|
|
<LISTITEM>
|
|
|
|
<PARA>
|
|
|
|
<VARIABLELIST>
|
|
|
|
<VARLISTENTRY>
|
|
|
|
<TERM>
|
1998-09-16 16:43:12 +02:00
|
|
|
<returnvalue>CLUSTER</returnvalue>
|
1998-05-13 07:34:00 +02:00
|
|
|
</TERM>
|
|
|
|
<LISTITEM>
|
|
|
|
<PARA>
|
|
|
|
The clustering was done successfully.
|
|
|
|
</PARA>
|
|
|
|
</LISTITEM>
|
|
|
|
</VARLISTENTRY>
|
|
|
|
<VARLISTENTRY>
|
|
|
|
<TERM>
|
1998-09-16 16:43:12 +02:00
|
|
|
<returnvalue>ERROR: relation <<REPLACEABLE CLASS="PARAMETER">tablerelation_number</REPLACEABLE>> inherits "invoice"</returnvalue>
|
1998-05-13 07:34:00 +02:00
|
|
|
</TERM>
|
|
|
|
<LISTITEM>
|
|
|
|
<PARA>
|
1998-09-16 16:43:12 +02:00
|
|
|
|
1998-05-13 07:34:00 +02:00
|
|
|
<comment>
|
|
|
|
This is not documented anywhere. It seems not to be possible to
|
|
|
|
cluster a table that is inherited.
|
|
|
|
</comment>
|
|
|
|
</PARA>
|
|
|
|
</LISTITEM>
|
|
|
|
</VARLISTENTRY>
|
|
|
|
<VARLISTENTRY>
|
|
|
|
<TERM>
|
1998-09-16 16:43:12 +02:00
|
|
|
<returnvalue>ERROR: Relation x does not exist!</returnvalue>
|
1998-05-13 07:34:00 +02:00
|
|
|
</TERM>
|
|
|
|
<LISTITEM>
|
|
|
|
<PARA>
|
1998-09-16 16:43:12 +02:00
|
|
|
|
1998-05-13 07:34:00 +02:00
|
|
|
<comment>
|
|
|
|
The relation complained of was not shown in the error message,
|
|
|
|
which contained a random string instead of the relation name.
|
|
|
|
</comment>
|
|
|
|
</PARA>
|
|
|
|
</LISTITEM>
|
|
|
|
</VARLISTENTRY>
|
|
|
|
</variablelist>
|
|
|
|
</LISTITEM>
|
|
|
|
</VARLISTENTRY>
|
|
|
|
</VARIABLELIST>
|
|
|
|
|
|
|
|
</REFSECT2>
|
|
|
|
</REFSYNOPSISDIV>
|
|
|
|
|
|
|
|
<REFSECT1 ID="R1-SQL-CLUSTER-1">
|
|
|
|
<REFSECT1INFO>
|
1998-09-16 16:43:12 +02:00
|
|
|
<DATE>1998-09-08</DATE>
|
1998-05-13 07:34:00 +02:00
|
|
|
</REFSECT1INFO>
|
|
|
|
<TITLE>
|
|
|
|
Description
|
|
|
|
</TITLE>
|
|
|
|
<PARA>
|
1998-09-16 16:43:12 +02:00
|
|
|
<command>CLUSTER</command> instructs <productname>Postgres</productname>
|
|
|
|
to cluster the class specified
|
1998-05-13 07:34:00 +02:00
|
|
|
by <replaceable class="parameter">classname</replaceable> approximately
|
|
|
|
based on the index specified by
|
|
|
|
<replaceable class="parameter">indexname</replaceable>. The index must
|
1998-09-16 16:43:12 +02:00
|
|
|
already have been defined on
|
|
|
|
<replaceable class="parameter">classname</replaceable>.
|
1998-05-13 07:34:00 +02:00
|
|
|
</PARA>
|
|
|
|
<para>
|
|
|
|
When a class is clustered, it is physically reordered
|
|
|
|
based on the index information. The clustering is static.
|
|
|
|
In other words, as the class is updated, the changes are
|
|
|
|
not clustered. No attempt is made to keep new instances or
|
1998-09-16 16:43:12 +02:00
|
|
|
updated tuples clustered. If one wishes, one can
|
1998-05-13 07:34:00 +02:00
|
|
|
recluster manually by issuing the command again.
|
|
|
|
</para>
|
|
|
|
|
1998-09-16 16:43:12 +02:00
|
|
|
<REFSECT2 ID="R2-SQL-CLUSTER-3">
|
|
|
|
<REFSECT2INFO>
|
|
|
|
<DATE>1998-09-08</DATE>
|
|
|
|
</REFSECT2INFO>
|
|
|
|
<TITLE>
|
|
|
|
Notes
|
|
|
|
</TITLE>
|
|
|
|
<PARA>
|
1998-05-13 07:34:00 +02:00
|
|
|
<para>
|
|
|
|
The table is actually copied to a temporary table in index
|
|
|
|
order, then renamed back to the original name. For this
|
|
|
|
reason, all grant permissions and other indexes are lost
|
|
|
|
when clustering is performed.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
In cases where you are accessing single rows randomly
|
|
|
|
within a table, the actual order of the data in the heap
|
|
|
|
table is unimportant. However, if you tend to access some
|
|
|
|
data more than others, and there is an index that groups
|
1998-09-16 16:43:12 +02:00
|
|
|
them together, you will benefit from using <command>CLUSTER</command>.
|
1998-05-13 07:34:00 +02:00
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
1998-09-16 16:43:12 +02:00
|
|
|
Another place <command>CLUSTER</command> is helpful is in cases where you use an
|
1998-05-13 07:34:00 +02:00
|
|
|
index to pull out several rows from a table. If you are
|
|
|
|
requesting a range of indexed values from a table, or a
|
|
|
|
single indexed value that has multiple rows that match,
|
1998-09-16 16:43:12 +02:00
|
|
|
<command>CLUSTER</command> will help because once the index identifies the
|
1998-05-13 07:34:00 +02:00
|
|
|
heap page for the first row that matches, all other rows
|
|
|
|
that match are probably already on the same heap page,
|
|
|
|
saving disk accesses and speeding up the query.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
There are two ways to cluster data. The first is with the
|
1998-09-16 16:43:12 +02:00
|
|
|
<command>CLUSTER</command> command, which reorders the original table with
|
1998-05-13 07:34:00 +02:00
|
|
|
the ordering of the index you specify. This can be slow
|
|
|
|
on large tables because the rows are fetched from the heap
|
|
|
|
in index order, and if the heap table is unordered, the
|
|
|
|
entries are on random pages, so there is one disk page
|
1998-09-16 16:43:12 +02:00
|
|
|
retrieved for every row moved. <productname>Postgres</productname> has a cache,
|
1998-05-13 07:34:00 +02:00
|
|
|
but the majority of a big table will not fit in the cache.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
1998-09-16 16:43:12 +02:00
|
|
|
Another way to cluster data is to use
|
|
|
|
<programlisting>
|
|
|
|
SELECT ... INTO TABLE <replaceable class="parameter">temp</replaceable> FROM ... ORDER BY ...
|
|
|
|
</programlisting>
|
|
|
|
This uses the <productname>Postgres</productname> sorting code in
|
1998-05-13 07:34:00 +02:00
|
|
|
ORDER BY to match the index, and is much faster for
|
|
|
|
unordered data. You then drop the old table, use
|
1998-09-16 16:43:12 +02:00
|
|
|
<command>ALTER TABLE/RENAME</command>
|
|
|
|
to rename <replaceable class="parameter">temp</replaceable> to the old name, and
|
|
|
|
recreate any indexes. The only problem is that <acronym>OID</acronym>s
|
|
|
|
will not be preserved. From then on, <command>CLUSTER</command> should be
|
1998-05-13 07:34:00 +02:00
|
|
|
fast because most of the heap data has already been
|
|
|
|
ordered, and the existing index is used.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
|
|
|
|
<REFSECT1 ID="R1-SQL-CLUSTER-2">
|
|
|
|
<TITLE>
|
|
|
|
Usage
|
|
|
|
</TITLE>
|
|
|
|
<PARA>
|
|
|
|
Cluster the employees relation on the basis of its salary attribute
|
|
|
|
</PARA>
|
|
|
|
<ProgramListing>
|
1998-09-16 16:43:12 +02:00
|
|
|
CLUSTER emp_ind ON emp
|
1998-05-13 07:34:00 +02:00
|
|
|
</ProgramListing>
|
|
|
|
</REFSECT1>
|
|
|
|
|
|
|
|
<REFSECT1 ID="R1-SQL-CLUSTER-3">
|
|
|
|
<TITLE>
|
|
|
|
Compatibility
|
|
|
|
</TITLE>
|
|
|
|
<PARA>
|
|
|
|
</PARA>
|
|
|
|
|
|
|
|
<REFSECT2 ID="R2-SQL-CLUSTER-4">
|
|
|
|
<REFSECT2INFO>
|
1998-09-16 16:43:12 +02:00
|
|
|
<DATE>1998-09-08</DATE>
|
1998-05-13 07:34:00 +02:00
|
|
|
</REFSECT2INFO>
|
|
|
|
<TITLE>
|
|
|
|
SQL92
|
|
|
|
</TITLE>
|
|
|
|
<PARA>
|
1998-09-16 16:43:12 +02:00
|
|
|
There is no <command>CLUSTER</command> statement in SQL92.
|
1998-05-13 07:34:00 +02:00
|
|
|
</PARA>
|
|
|
|
</refsect2>
|
|
|
|
</refsect1>
|
|
|
|
</REFENTRY>
|
|
|
|
|
|
|
|
|
|
|
|
<!-- Keep this comment at the end of the file
|
|
|
|
Local variables:
|
|
|
|
mode: sgml
|
|
|
|
sgml-omittag:t
|
|
|
|
sgml-shorttag:t
|
|
|
|
sgml-minimize-attributes:nil
|
|
|
|
sgml-always-quote-attributes:t
|
|
|
|
sgml-indent-step:1
|
|
|
|
sgml-indent-data:t
|
|
|
|
sgml-parent-document:nil
|
|
|
|
sgml-default-dtd-file:"../reference.ced"
|
|
|
|
sgml-exposed-tags:nil
|
|
|
|
sgml-local-catalogs:"/usr/lib/sgml/catalog"
|
|
|
|
sgml-local-ecat-files:nil
|
|
|
|
End:
|
|
|
|
-->
|