Drop the rule against included index columns duplicating key columns.

The initial version of the included-index-column feature stated that included columns couldn't be the same as any key column of the index. While it'd be pretty silly to do that, since the included column would be entirely redundant, we've never prohibited redundant index columns before so it's not very consistent to do so here. Moreover, the prohibition was itself badly implemented, so that it failed to reject columns that were effectively identical but not spelled quite alike, as reported by Aditya Toshniwal. (Moreover, it's not hard to imagine that for some non-btree index types, such cases would be non-silly anyhow: the index might use a lossy representation for key columns but be able to support retrieval of the original form of included columns.) Hence, let's just drop the prohibition. In passing, do some copy-editing on the documentation for the included-column feature. Yugo Nagata; documentation and test corrections by me Discussion: https://postgr.es/m/CAM9w-_mhBCys4fQNfaiQKTRrVWtoFrZ-wXmDuE9Nj5y-Y7aDKQ@mail.gmail.com
2018-07-18 14:43:03 -04:00 · 2018-07-18 14:43:03 -04:00 · 701fd0bbc9
parent 3cb646264e
commit 701fd0bbc9
8 changed files with 58 additions and 64 deletions
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@ -3753,15 +3753,16 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
      <entry><type>int2</type></entry>
      <entry></entry>
      <entry>The total number of columns in the index (duplicates
-      <literal>pg_class.relnatts</literal>). This number includes both key and included attributes.</entry>
+      <literal>pg_class.relnatts</literal>); this number includes both key and included attributes</entry>
     </row>

      <row>
      <entry><structfield>indnkeyatts</structfield></entry>
      <entry><type>int2</type></entry>
      <entry></entry>
-      <entry>The number of key columns in the index. "Key columns" are ordinary
-      index columns (as opposed to "included" columns).</entry>
+      <entry>The number of <firstterm>key columns</firstterm> in the index,
+       not counting any <firstterm>included columns</firstterm>, which are
+       merely stored and do not participate in the index semantics</entry>
     </row>

     <row>
@ -3867,7 +3868,8 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
       This is an array of <structfield>indnatts</structfield> values that
       indicate which table columns this index indexes.  For example a value
       of <literal>1 3</literal> would mean that the first and the third table
-       columns make up the index key.  A zero in this array indicates that the
+       columns make up the index entries.  Key columns come before non-key
+       (included) columns.  A zero in this array indicates that the
       corresponding index attribute is an expression over the table columns,
       rather than a simple column reference.
      </entry>
@ -3878,9 +3880,10 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
      <entry><type>oidvector</type></entry>
      <entry><literal><link linkend="catalog-pg-collation"><structname>pg_collation</structname></link>.oid</literal></entry>
      <entry>
-       For each column in the index key, this contains the OID of the
-       collation to use for the index, or zero if the column is not
-       of a collatable data type.
+       For each column in the index key
+       (<structfield>indnkeyatts</structfield> values), this contains the OID
+       of the collation to use for the index, or zero if the column is not of
+       a collatable data type.
      </entry>
     </row>

@ -3889,8 +3892,9 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
      <entry><type>oidvector</type></entry>
      <entry><literal><link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link>.oid</literal></entry>
      <entry>
-       For each column in the index key, this contains the OID of
-       the operator class to use.  See
+       For each column in the index key
+       (<structfield>indnkeyatts</structfield> values), this contains the OID
+       of the operator class to use.  See
       <link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link> for details.
      </entry>
     </row>
@ -3900,7 +3904,7 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
      <entry><type>int2vector</type></entry>
      <entry></entry>
      <entry>
-       This is an array of <structfield>indnatts</structfield> values that
+       This is an array of <structfield>indnkeyatts</structfield> values that
       store per-column flag bits.  The meaning of the bits is defined by
       the index's access method.
      </entry>
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@ -112,10 +112,10 @@ typedef struct IndexAmRoutine
    bool        ampredlocks;
    /* does AM support parallel scan? */
    bool        amcanparallel;
-    /* type of data stored in index, or InvalidOid if variable */
-    Oid         amkeytype;
    /* does AM support columns included with clause INCLUDE? */
    bool        amcaninclude;
+    /* type of data stored in index, or InvalidOid if variable */
+    Oid         amkeytype;

    /* interface functions */
    ambuild_function ambuild;
@ -987,8 +987,9 @@ amparallelrescan (IndexScanDesc scan);
   using <firstterm>unique indexes</firstterm>, which are indexes that disallow
   multiple entries with identical keys.  An access method that supports this
   feature sets <structfield>amcanunique</structfield> true.
-   (At present, only b-tree supports it.) Columns listed in the
-    <literal>INCLUDE</literal> clause are not used to enforce uniqueness.
+   (At present, only b-tree supports it.)  Columns listed in the
+   <literal>INCLUDE</literal> clause are not considered when enforcing
+   uniqueness.
  </para>

  <para>
--- a/doc/src/sgml/indices.sgml
+++ b/doc/src/sgml/indices.sgml
@ -639,7 +639,7 @@ CREATE INDEX test3_desc_index ON test3 (id DESC NULLS LAST);
   or the uniqueness of the combined values of more than one column.
 <synopsis>
 CREATE UNIQUE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> (<replaceable>column</replaceable> <optional>, ...</optional>)
-[ INCLUDE (<replaceable>column</replaceable> <optional>, ...</optional>) ];
+<optional> INCLUDE (<replaceable>column</replaceable> <optional>, ...</optional>) </optional>;
 </synopsis>
   Currently, only B-tree indexes can be declared unique.
  </para>
@ -648,9 +648,9 @@ CREATE UNIQUE INDEX <replaceable>name</replaceable> ON <replaceable>table</repla
   When an index is declared unique, multiple table rows with equal
   indexed values are not allowed.  Null values are not considered
   equal.  A multicolumn unique index will only reject cases where all
-   indexed columns are equal in multiple rows.  Columns listed in the
-   <literal>INCLUDE</literal> clause aren't used to enforce constraints
-   (UNIQUE, PRIMARY KEY, etc).
+   indexed columns are equal.  Columns listed in
+   the <literal>INCLUDE</literal> clause, if any, aren't considered when
+   determining whether index entries are equal.
  </para>

  <para>
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@ -149,25 +149,28 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
      <listitem>
       <para>
        The optional <literal>INCLUDE</literal> clause specifies a
-        list of columns which will be included as a non-key part in the index.
-        Columns listed in this clause cannot also be present as index key columns.
-        The <literal>INCLUDE</literal> columns exist solely to
-        allow more queries to benefit from <firstterm>index-only scans</firstterm>
-        by including the values of the specified columns in the index.  These values
-        would otherwise have to be obtained by reading the table's heap.
+        list of columns which will be included in the index
+        as <firstterm>non-key</firstterm> columns.  A non-key column cannot
+        be used in an index scan search qualification, and it is disregarded
+        for purposes of any uniqueness or exclusion constraint enforced by
+        the index.  However, an index-only scan can return the contents of
+        non-key columns without having to visit the index's table, since
+        they are available directly from the index entry.  Thus, addition of
+        non-key columns allows index-only scans to be used for queries that
+        otherwise could not use them.
       </para>

       <para>
-        In <literal>UNIQUE</literal> indexes, uniqueness is only enforced
-        for key columns.  Columns listed in the <literal>INCLUDE</literal>
-        clause have no effect on uniqueness enforcement.  Other constraints
-        (<literal>PRIMARY KEY</literal> and <literal>EXCLUDE</literal>) work
-        the same way.
+        It's wise to be conservative about adding non-key columns to an
+        index, especially wide columns.  If an index tuple exceeds the
+        maximum size allowed for the index type, data insertion will fail.
+        In any case, non-key columns duplicate data from the index's table
+        and bloat the size of the index, thus potentially slowing searches.
       </para>

       <para>
        Columns listed in the <literal>INCLUDE</literal> clause don't need
-        appropriate operator classes; the clause can contain non-key index
+        appropriate operator classes; the clause can include
        columns whose data types don't have operator classes defined for
        a given access method.
       </para>
@ -181,15 +184,8 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
        Currently, only the B-tree index access method supports this feature.
        In B-tree indexes, the values of columns listed in the
        <literal>INCLUDE</literal> clause are included in leaf tuples which
-        are linked to the heap tuples, but are not included into pivot tuples
-        used for tree navigation.  Therefore, moving columns from the list of
-        key columns to the <literal>INCLUDE</literal> clause can slightly
-        reduce index size and improve the tree branching factor.
-       </para>
-
-       <para>
-        Indexes with columns listed in the <literal>INCLUDE</literal> clause
-        are also called <quote>covering indexes</quote>.
+        correspond to heap tuples, but are not included in upper-level
+        index entries used for tree navigation.
       </para>
      </listitem>
     </varlistentry>
@ -376,7 +372,7 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class=
       columns is updated and so the recheck is not worth the additional cost
       of executing the function.
     </para>
-       
+
     <para>
       Functional indexes are used frequently for the case where the function
       returns a subset of the argument. Examples of this would be accessing
@ -789,8 +785,8 @@ CREATE UNIQUE INDEX title_idx ON films (title);

  <para>
   To create a unique B-tree index on the column <literal>title</literal>
-   and included columns <literal>director</literal> and <literal>rating</literal>
-   in the table <literal>films</literal>:
+   with included columns <literal>director</literal>
+   and <literal>rating</literal> in the table <literal>films</literal>:
 <programlisting>
 CREATE UNIQUE INDEX title_idx ON films (title) INCLUDE (director, rating);
 </programlisting>
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@ -869,9 +869,8 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
      one or more columns on which the uniqueness is not enforced.
      Note that although the constraint is not enforced on the included columns,
      it still depends on them.  Consequently, some operations on these columns
-      (e.g. <literal>DROP COLUMN</literal>) can cause cascade constraint and
-      index deletion.  See paragraph about <literal>INCLUDE</literal> in
-      <xref linkend="sql-createindex"/> for more information.
+      (e.g. <literal>DROP COLUMN</literal>) can cause cascaded constraint and
+      index deletion.
     </para>
    </listitem>
   </varlistentry>
@ -916,9 +915,8 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
      of columns to be specified which will be included in the non-key portion
      of the index.  Although uniqueness is not enforced on the included columns,
      the constraint still depends on them. Consequently, some operations on the
-      included columns (e.g. <literal>DROP COLUMN</literal>) can cause cascade
-      constraint and index deletion. See paragraph about <literal>INCLUDE</literal>
-      in <xref linkend="sql-createindex"/> for more information.
+      included columns (e.g. <literal>DROP COLUMN</literal>) can cause cascaded
+      constraint and index deletion.
     </para>
    </listitem>
   </varlistentry>
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@ -369,11 +369,6 @@ DefineIndex(Oid relationId,
 	Snapshot	snapshot;
 	int			i;

-	if (list_intersection(stmt->indexParams, stmt->indexIncludingParams) != NIL)
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
-				 errmsg("included columns must not intersect with key columns")));
-
 	/*
 	 * count key attributes in index
 	 */
@ -596,7 +591,7 @@ DefineIndex(Oid relationId,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("access method \"%s\" does not support unique indexes",
 						accessMethodName)));
-	if (list_length(stmt->indexIncludingParams) > 0 && !amRoutine->amcaninclude)
+	if (stmt->indexIncludingParams != NIL && !amRoutine->amcaninclude)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("access method \"%s\" does not support included columns",
--- a/src/test/regress/expected/index_including.out
+++ b/src/test/regress/expected/index_including.out
@ -7,17 +7,17 @@
 -- Regular index with included columns
 CREATE TABLE tbl_include_reg (c1 int, c2 int, c3 int, c4 box);
 INSERT INTO tbl_include_reg SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-CREATE INDEX tbl_include_reg_idx ON tbl_include_reg using btree (c1, c2) INCLUDE (c3,c4);
-- must fail because of intersection of key and included columns
-CREATE INDEX tbl_include_reg_idx ON tbl_include_reg using btree (c1, c2) INCLUDE (c1,c3);
-ERROR:  included columns must not intersect with key columns
+CREATE INDEX tbl_include_reg_idx ON tbl_include_reg (c1, c2) INCLUDE (c3, c4);
+-- duplicate column is pretty pointless, but we allow it anyway
+CREATE INDEX ON tbl_include_reg (c1, c2) INCLUDE (c1, c3);
 SELECT pg_get_indexdef(i.indexrelid)
 FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid
 WHERE i.indrelid = 'tbl_include_reg'::regclass ORDER BY c.relname;
-                                         pg_get_indexdef                                          
--------------------------------------------------------------------------------------------------
+                                                pg_get_indexdef                                                
+---------------------------------------------------------------------------------------------------------------
+ CREATE INDEX tbl_include_reg_c1_c2_c11_c3_idx ON public.tbl_include_reg USING btree (c1, c2) INCLUDE (c1, c3)
 CREATE INDEX tbl_include_reg_idx ON public.tbl_include_reg USING btree (c1, c2) INCLUDE (c3, c4)
-(1 row)
+(2 rows)

 -- Unique index and unique constraint
 CREATE TABLE tbl_include_unique1 (c1 int, c2 int, c3 int, c4 box);
--- a/src/test/regress/sql/index_including.sql
+++ b/src/test/regress/sql/index_including.sql
@ -8,9 +8,9 @@
 -- Regular index with included columns
 CREATE TABLE tbl_include_reg (c1 int, c2 int, c3 int, c4 box);
 INSERT INTO tbl_include_reg SELECT x, 2*x, 3*x, box('4,4,4,4') FROM generate_series(1,10) AS x;
-CREATE INDEX tbl_include_reg_idx ON tbl_include_reg using btree (c1, c2) INCLUDE (c3,c4);
-- must fail because of intersection of key and included columns
-CREATE INDEX tbl_include_reg_idx ON tbl_include_reg using btree (c1, c2) INCLUDE (c1,c3);
+CREATE INDEX tbl_include_reg_idx ON tbl_include_reg (c1, c2) INCLUDE (c3, c4);
+-- duplicate column is pretty pointless, but we allow it anyway
+CREATE INDEX ON tbl_include_reg (c1, c2) INCLUDE (c1, c3);
 SELECT pg_get_indexdef(i.indexrelid)
 FROM pg_index i JOIN pg_class c ON i.indexrelid = c.oid
 WHERE i.indrelid = 'tbl_include_reg'::regclass ORDER BY c.relname;