Implement multivariate n-distinct coefficients

Add support for explicitly declared statistic objects (CREATE STATISTICS), allowing collection of statistics on more complex combinations that individual table columns. Companion commands DROP STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are added too. All this DDL has been designed so that more statistic types can be added later on, such as multivariate most-common-values and multivariate histograms between columns of a single table, leaving room for permitting columns on multiple tables, too, as well as expressions. This commit only adds support for collection of n-distinct coefficient on user-specified sets of columns in a single table. This is useful to estimate number of distinct groups in GROUP BY and DISTINCT clauses; estimation errors there can cause over-allocation of memory in hashed aggregates, for instance, so it's a worthwhile problem to solve. A new special pseudo-type pg_ndistinct is used. (num-distinct estimation was deemed sufficiently useful by itself that this is worthwhile even if no further statistic types are added immediately; so much so that another version of essentially the same functionality was submitted by Kyotaro Horiguchi: https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp though this commit does not use that code.) Author: Tomas Vondra. Some code rework by Álvaro. Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes, Ideriha Takeshi Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00 · 2017-03-24 14:06:10 -03:00 · 7b504eb282
parent f120b614e0
commit 7b504eb282
77 changed files with 3599 additions and 63 deletions
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@ -295,6 +295,11 @@
      <entry>planner statistics</entry>
     </row>

+     <row>
+      <entry><link linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link></entry>
+      <entry>extended planner statistics</entry>
+     </row>
+
     <row>
      <entry><link linkend="catalog-pg-subscription"><structname>pg_subscription</structname></link></entry>
      <entry>logical replication subscriptions</entry>
@ -4247,6 +4252,98 @@
  </table>
 </sect1>

+ <sect1 id="catalog-pg-statistic-ext">
+  <title><structname>pg_statistic_ext</structname></title>
+
+  <indexterm zone="catalog-pg-statistic-ext">
+   <primary>pg_statistic_ext</primary>
+  </indexterm>
+
+  <para>
+   The catalog <structname>pg_statistic_ext</structname>
+   holds extended planner statistics.
+  </para>
+
+  <table>
+   <title><structname>pg_statistic_ext</> Columns</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+
+     <row>
+      <entry><structfield>starelid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
+      <entry>The table that the described columns belongs to</entry>
+     </row>
+
+     <row>
+      <entry><structfield>staname</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry></entry>
+      <entry>Name of the statistic.</entry>
+     </row>
+
+     <row>
+      <entry><structfield>stanamespace</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.oid</literal></entry>
+      <entry>
+       The OID of the namespace that contains this statistic
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>staowner</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-authid"><structname>pg_authid</structname></link>.oid</literal></entry>
+      <entry>Owner of the statistic</entry>
+     </row>
+
+     <row>
+      <entry><structfield>staenabled</structfield></entry>
+      <entry><type>char[]</type></entry>
+      <entry></entry>
+      <entry>
+        An array with the modes of the enabled statistic types, encoded as
+        <literal>d</literal> for ndistinct coefficients.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>stakeys</structfield></entry>
+      <entry><type>int2vector</type></entry>
+      <entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
+      <entry>
+       This is an array of values that indicate which table columns this
+       statistic covers. For example a value of <literal>1 3</literal> would
+       mean that the first and the third table columns make up the statistic key.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>standistinct</structfield></entry>
+      <entry><type>pg_ndistinct</type></entry>
+      <entry></entry>
+      <entry>
+       N-distinct coefficients, serialized as <structname>pg_ndistinct</> type.
+      </entry>
+     </row>
+
+    </tbody>
+   </tgroup>
+  </table>
+ </sect1>
+
 <sect1 id="catalog-pg-namespace">
  <title><structname>pg_namespace</structname></title>

--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@ -16720,6 +16720,10 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
    <primary>pg_get_serial_sequence</primary>
   </indexterm>

+   <indexterm>
+    <primary>pg_get_statisticsextdef</primary>
+   </indexterm>
+
   <indexterm>
    <primary>pg_get_triggerdef</primary>
   </indexterm>
@ -16889,6 +16893,11 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
       <entry>get name of the sequence that a <type>serial</type>, <type>smallserial</type> or <type>bigserial</type> column
       uses</entry>
      </row>
+      <row>
+       <entry><literal><function>pg_get_statisticsextdef(<parameter>statext_oid</parameter>)</function></literal></entry>
+       <entry><type>text</type></entry>
+       <entry>get <command>CREATE STATISTICS</> command for extended statistics objects</entry>
+      </row>
      <row>
       <entry><function>pg_get_triggerdef</function>(<parameter>trigger_oid</parameter>)</entry>
       <entry><type>text</type></entry>
@ -17034,19 +17043,20 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
  <para>
   <function>pg_get_constraintdef</function>,
   <function>pg_get_indexdef</function>, <function>pg_get_ruledef</function>,
-   and <function>pg_get_triggerdef</function>, respectively reconstruct the
-   creating command for a constraint, index, rule, or trigger. (Note that this
-   is a decompiled reconstruction, not the original text of the command.)
-   <function>pg_get_expr</function> decompiles the internal form of an
-   individual expression, such as the default value for a column.  It can be
-   useful when examining the contents of system catalogs.  If the expression
-   might contain Vars, specify the OID of the relation they refer to as the
-   second parameter; if no Vars are expected, zero is sufficient.
-   <function>pg_get_viewdef</function> reconstructs the <command>SELECT</>
-   query that defines a view. Most of these functions come in two variants,
-   one of which can optionally <quote>pretty-print</> the result.  The
-   pretty-printed format is more readable, but the default format is more
-   likely to be interpreted the same way by future versions of
+   <function>pg_get_statisticsextdef</function>, and
+   <function>pg_get_triggerdef</function>, respectively reconstruct the
+   creating command for a constraint, index, rule, extended statistics object,
+   or trigger. (Note that this is a decompiled reconstruction, not the
+   original text of the command.) <function>pg_get_expr</function> decompiles
+   the internal form of an individual expression, such as the default value
+   for a column.  It can be useful when examining the contents of system
+   catalogs.  If the expression might contain Vars, specify the OID of the
+   relation they refer to as the second parameter; if no Vars are expected,
+   zero is sufficient. <function>pg_get_viewdef</function> reconstructs the
+   <command>SELECT</> query that defines a view. Most of these functions come
+   in two variants, one of which can optionally <quote>pretty-print</> the
+   result.  The pretty-printed format is more readable, but the default format
+   is more likely to be interpreted the same way by future versions of
   <productname>PostgreSQL</>; avoid using pretty-printed output for dump
   purposes.  Passing <literal>false</> for the pretty-print parameter yields
   the same result as the variant that does not have the parameter at all.
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@ -34,6 +34,7 @@ Complete list of usable sgml source files in this directory.
 <!ENTITY alterSequence      SYSTEM "alter_sequence.sgml">
 <!ENTITY alterSubscription  SYSTEM "alter_subscription.sgml">
 <!ENTITY alterSystem        SYSTEM "alter_system.sgml">
+<!ENTITY alterStatistics    SYSTEM "alter_statistics.sgml">
 <!ENTITY alterTable         SYSTEM "alter_table.sgml">
 <!ENTITY alterTableSpace    SYSTEM "alter_tablespace.sgml">
 <!ENTITY alterTSConfig      SYSTEM "alter_tsconfig.sgml">
@ -80,6 +81,7 @@ Complete list of usable sgml source files in this directory.
 <!ENTITY createSchema       SYSTEM "create_schema.sgml">
 <!ENTITY createSequence     SYSTEM "create_sequence.sgml">
 <!ENTITY createServer       SYSTEM "create_server.sgml">
+<!ENTITY createStatistics   SYSTEM "create_statistics.sgml">
 <!ENTITY createSubscription SYSTEM "create_subscription.sgml">
 <!ENTITY createTable        SYSTEM "create_table.sgml">
 <!ENTITY createTableAs      SYSTEM "create_table_as.sgml">
@ -126,6 +128,7 @@ Complete list of usable sgml source files in this directory.
 <!ENTITY dropSchema         SYSTEM "drop_schema.sgml">
 <!ENTITY dropSequence       SYSTEM "drop_sequence.sgml">
 <!ENTITY dropServer         SYSTEM "drop_server.sgml">
+<!ENTITY dropStatistics     SYSTEM "drop_statistics.sgml">
 <!ENTITY dropSubscription   SYSTEM "drop_subscription.sgml">
 <!ENTITY dropTable          SYSTEM "drop_table.sgml">
 <!ENTITY dropTableSpace     SYSTEM "drop_tablespace.sgml">
--- a/doc/src/sgml/ref/alter_statistics.sgml
+++ b/doc/src/sgml/ref/alter_statistics.sgml
@ -0,0 +1,115 @@
+<!--
+doc/src/sgml/ref/alter_statistics.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-ALTERSTATISTICS">
+ <indexterm zone="sql-alterstatistics">
+  <primary>ALTER STATISTICS</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>ALTER STATISTICS</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>ALTER STATISTICS</refname>
+  <refpurpose>
+   change the definition of a extended statistics
+  </refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+ALTER STATISTICS <replaceable class="parameter">name</replaceable> OWNER TO { <replaceable class="PARAMETER">new_owner</replaceable> | CURRENT_USER | SESSION_USER }
+ALTER STATISTICS <replaceable class="parameter">name</replaceable> RENAME TO <replaceable class="parameter">new_name</replaceable>
+ALTER STATISTICS <replaceable class="parameter">name</replaceable> SET SCHEMA <replaceable class="parameter">new_schema</replaceable>
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>ALTER STATISTICS</command> changes the parameters of an existing
+   extended statistics.  Any parameters not specifically set in the
+   <command>ALTER STATISTICS</command> command retain their prior settings.
+  </para>
+
+  <para>
+   You must own the statistics to use <command>ALTER STATISTICS</>.
+   To change a statistics' schema, you must also have <literal>CREATE</>
+   privilege on the new schema.
+   To alter the owner, you must also be a direct or indirect member of the new
+   owning role, and that role must have <literal>CREATE</literal> privilege on
+   the statistics' schema.  (These restrictions enforce that altering the owner
+   doesn't do anything you couldn't do by dropping and recreating the statistics.
+   However, a superuser can alter ownership of any statistics anyway.)
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+   <para>
+    <variablelist>
+     <varlistentry>
+      <term><replaceable class="parameter">name</replaceable></term>
+      <listitem>
+       <para>
+        The name (optionally schema-qualified) of the statistics to be altered.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><replaceable class="PARAMETER">new_owner</replaceable></term>
+      <listitem>
+       <para>
+        The user name of the new owner of the statistics.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><replaceable class="parameter">new_name</replaceable></term>
+      <listitem>
+       <para>
+        The new name for the statistics.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><replaceable class="parameter">new_schema</replaceable></term>
+      <listitem>
+       <para>
+        The new schema for the statistics.
+       </para>
+      </listitem>
+     </varlistentry>
+
+    </variablelist>
+   </para>
+  </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   There's no <command>ALTER STATISTICS</command> command in the SQL standard.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-createstatistics"></member>
+   <member><xref linkend="sql-dropstatistics"></member>
+  </simplelist>
+ </refsect1>
+
+</refentry>
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@ -119,9 +119,12 @@ ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
     <para>
      This form drops a column from a table.  Indexes and
      table constraints involving the column will be automatically
-      dropped as well.  You will need to say <literal>CASCADE</> if
-      anything outside the table depends on the column, for example,
-      foreign key references or views.
+      dropped as well.
+      Multivariate statistics referencing the dropped column will also be
+      removed if the removal of the column would cause the statistics to
+      contain data for only a single column.
+      You will need to say <literal>CASCADE</> if anything outside the table
+      depends on the column, for example, foreign key references or views.
      If <literal>IF EXISTS</literal> is specified and the column
      does not exist, no error is thrown. In this case a notice
      is issued instead.
--- a/doc/src/sgml/ref/comment.sgml
+++ b/doc/src/sgml/ref/comment.sgml
@ -51,6 +51,7 @@ COMMENT ON
  SCHEMA <replaceable class="PARAMETER">object_name</replaceable> |
  SEQUENCE <replaceable class="PARAMETER">object_name</replaceable> |
  SERVER <replaceable class="PARAMETER">object_name</replaceable> |
+  STATISTICS <replaceable class="PARAMETER">object_name</replaceable> |
  TABLE <replaceable class="PARAMETER">object_name</replaceable> |
  TABLESPACE <replaceable class="PARAMETER">object_name</replaceable> |
  TEXT SEARCH CONFIGURATION <replaceable class="PARAMETER">object_name</replaceable> |
@ -125,8 +126,8 @@ COMMENT ON
      The name of the object to be commented.  Names of tables,
      aggregates, collations, conversions, domains, foreign tables, functions,
      indexes, operators, operator classes, operator families, sequences,
-      text search objects, types, and views can be schema-qualified.
-      When commenting on a column,
+      statistics, text search objects, types, and views can be
+      schema-qualified. When commenting on a column,
      <replaceable class="parameter">relation_name</replaceable> must refer
      to a table, view, composite type, or foreign table.
     </para>
@ -327,6 +328,7 @@ COMMENT ON RULE my_rule ON my_table IS 'Logs updates of employee records';
 COMMENT ON SCHEMA my_schema IS 'Departmental data';
 COMMENT ON SEQUENCE my_sequence IS 'Used to generate primary keys';
 COMMENT ON SERVER myserver IS 'my foreign server';
+COMMENT ON STATISTICS my_statistics IS 'Improves planner row estimations';
 COMMENT ON TABLE my_schema.my_table IS 'Employee Information';
 COMMENT ON TABLESPACE my_tablespace IS 'Tablespace for indexes';
 COMMENT ON TEXT SEARCH CONFIGURATION my_config IS 'Special word filtering';
--- a/doc/src/sgml/ref/create_statistics.sgml
+++ b/doc/src/sgml/ref/create_statistics.sgml
@ -0,0 +1,155 @@
+<!--
+doc/src/sgml/ref/create_statistics.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-CREATESTATISTICS">
+ <indexterm zone="sql-createstatistics">
+  <primary>CREATE STATISTICS</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>CREATE STATISTICS</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>CREATE STATISTICS</refname>
+  <refpurpose>define extended statistics</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_name</replaceable> ON (
+  <replaceable class="PARAMETER">column_name</replaceable>, <replaceable class="PARAMETER">column_name</replaceable> [, ...])
+  FROM <replaceable class="PARAMETER">table_name</replaceable>
+</synopsis>
+
+ </refsynopsisdiv>
+
+ <refsect1 id="SQL-CREATESTATISTICS-description">
+  <title>Description</title>
+
+  <para>
+   <command>CREATE STATISTICS</command> will create a new extended statistics
+   object on the specified table.
+   The statistics will be created in the current database and
+   will be owned by the user issuing the command.
+  </para>
+
+  <para>
+   If a schema name is given (for example, <literal>CREATE STATISTICS
+   myschema.mystat ...</>) then the statistics is created in the specified
+   schema.  Otherwise it is created in the current schema.  The name of
+   the statistics must be distinct from the name of any other statistics in the
+   same schema.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+
+   <varlistentry>
+    <term><literal>IF NOT EXISTS</></term>
+    <listitem>
+     <para>
+      Do not throw an error if a statistics with the same name already exists.
+      A notice is issued in this case.  Note that only the name of the
+      statistics object is considered here. The definition of the statistics is
+      not considered.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="PARAMETER">statistics_name</replaceable></term>
+    <listitem>
+     <para>
+      The name (optionally schema-qualified) of the statistics to be created.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="PARAMETER">column_name</replaceable></term>
+    <listitem>
+     <para>
+      The name of a column to be included in the statistics.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="PARAMETER">table_name</replaceable></term>
+    <listitem>
+     <para>
+      The name (optionally schema-qualified) of the table the statistics should
+      be created on.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   You must be the owner of a table to create or change statistics on it.
+  </para>
+ </refsect1>
+
+ <refsect1 id="SQL-CREATESTATISTICS-examples">
+  <title>Examples</title>
+
+  <para>
+   Create table <structname>t1</> with two functionally dependent columns, i.e.
+   knowledge of a value in the first column is sufficient for determining the
+   value in the other column. Then functional dependencies are built on those
+   columns:
+
+<programlisting>
+CREATE TABLE t1 (
+    a   int,
+    b   int
+);
+
+INSERT INTO t1 SELECT i/100, i/500
+                 FROM generate_series(1,1000000) s(i);
+
+CREATE STATISTICS s1 ON (a, b) FROM t1;
+
+ANALYZE t1;
+
+-- valid combination of values
+EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0);
+
+-- invalid combination of values
+EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 1);
+</programlisting>
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   There's no <command>CREATE STATISTICS</command> command in the SQL standard.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-alterstatistics"></member>
+   <member><xref linkend="sql-dropstatistics"></member>
+  </simplelist>
+ </refsect1>
+</refentry>
--- a/doc/src/sgml/ref/drop_statistics.sgml
+++ b/doc/src/sgml/ref/drop_statistics.sgml
@ -0,0 +1,98 @@
+<!--
+doc/src/sgml/ref/drop_statistics.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-DROPSTATISTICS">
+ <indexterm zone="sql-dropstatistics">
+  <primary>DROP STATISTICS</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>DROP STATISTICS</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>DROP STATISTICS</refname>
+  <refpurpose>remove extended statistics</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+DROP STATISTICS [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable> [, ...]
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>DROP STATISTICS</command> removes statistics from the database.
+   Only the statistics owner, the schema owner, and superuser can drop a
+   statistics.
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><literal>IF EXISTS</literal></term>
+    <listitem>
+     <para>
+      Do not throw an error if the statistics do not exist. A notice is
+      issued in this case.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="PARAMETER">name</replaceable></term>
+    <listitem>
+     <para>
+      The name (optionally schema-qualified) of the statistics to drop.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   To destroy two statistics objects on different schemas, without failing
+   if they don't exist:
+
+<programlisting>
+DROP STATISTICS IF EXISTS
+    accounting.users_uid_creation,
+    public.grants_user_role;
+</programlisting>
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   There's no <command>DROP STATISTICS</command> command in the SQL standard.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-alterstatistics"></member>
+   <member><xref linkend="sql-createstatistics"></member>
+  </simplelist>
+ </refsect1>
+
+</refentry>
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@ -60,6 +60,7 @@
   &alterSchema;
   &alterSequence;
   &alterServer;
+   &alterStatistics;
   &alterSubscription;
   &alterSystem;
   &alterTable;
@ -108,6 +109,7 @@
   &createSchema;
   &createSequence;
   &createServer;
+   &createStatistics;
   &createSubscription;
   &createTable;
   &createTableAs;
@ -154,6 +156,7 @@
   &dropSchema;
   &dropSequence;
   &dropServer;
+   &dropStatistics;
   &dropSubscription;
   &dropTable;
   &dropTableSpace;
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global

 SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \
 	main nodes optimizer port postmaster regex replication rewrite \
-	storage tcop tsearch utils $(top_builddir)/src/timezone
+	statistics storage tcop tsearch utils $(top_builddir)/src/timezone

 include $(srcdir)/common.mk

--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@ -33,6 +33,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
 	pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
 	pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
 	pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
+	pg_statistic_ext.h \
 	pg_statistic.h pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \
 	pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
 	pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \
--- a/src/backend/catalog/aclchk.c
+++ b/src/backend/catalog/aclchk.c
@ -48,6 +48,7 @@
 #include "catalog/pg_operator.h"
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_type.h"
@ -3302,6 +3303,8 @@ static const char *const no_priv_msg[MAX_ACL_KIND] =
 	gettext_noop("permission denied for collation %s"),
 	/* ACL_KIND_CONVERSION */
 	gettext_noop("permission denied for conversion %s"),
+	/* ACL_KIND_STATISTICS */
+	gettext_noop("permission denied for statistics %s"),
 	/* ACL_KIND_TABLESPACE */
 	gettext_noop("permission denied for tablespace %s"),
 	/* ACL_KIND_TSDICTIONARY */
@ -3352,6 +3355,8 @@ static const char *const not_owner_msg[MAX_ACL_KIND] =
 	gettext_noop("must be owner of collation %s"),
 	/* ACL_KIND_CONVERSION */
 	gettext_noop("must be owner of conversion %s"),
+	/* ACL_KIND_STATISTICS */
+	gettext_noop("must be owner of statistics %s"),
 	/* ACL_KIND_TABLESPACE */
 	gettext_noop("must be owner of tablespace %s"),
 	/* ACL_KIND_TSDICTIONARY */
@ -3467,6 +3472,10 @@ pg_aclmask(AclObjectKind objkind, Oid table_oid, AttrNumber attnum, Oid roleid,
 												   mask, how, NULL);
 		case ACL_KIND_NAMESPACE:
 			return pg_namespace_aclmask(table_oid, roleid, mask, how);
+		case ACL_KIND_STATISTICS:
+			elog(ERROR, "grantable rights not supported for statistics");
+			/* not reached, but keep compiler quiet */
+			return ACL_NO_RIGHTS;
 		case ACL_KIND_TABLESPACE:
 			return pg_tablespace_aclmask(table_oid, roleid, mask, how);
 		case ACL_KIND_FDW:
@ -5103,6 +5112,32 @@ pg_subscription_ownercheck(Oid sub_oid, Oid roleid)
 	return has_privs_of_role(roleid, ownerId);
 }

+/*
+ * Ownership check for a extended statistics (specified by OID).
+ */
+bool
+pg_statistics_ownercheck(Oid stat_oid, Oid roleid)
+{
+	HeapTuple	tuple;
+	Oid			ownerId;
+
+	/* Superusers bypass all permission checking. */
+	if (superuser_arg(roleid))
+		return true;
+
+	tuple = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stat_oid));
+	if (!HeapTupleIsValid(tuple))
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("statistics with OID %u do not exist", stat_oid)));
+
+	ownerId = ((Form_pg_statistic_ext) GETSTRUCT(tuple))->staowner;
+
+	ReleaseSysCache(tuple);
+
+	return has_privs_of_role(roleid, ownerId);
+}
+
 /*
 * Check whether specified role has CREATEROLE privilege (or is a superuser)
 *
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@ -51,6 +51,7 @@
 #include "catalog/pg_publication.h"
 #include "catalog/pg_publication_rel.h"
 #include "catalog/pg_rewrite.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_transform.h"
@ -154,6 +155,7 @@ static const Oid object_classes[] = {
 	RewriteRelationId,			/* OCLASS_REWRITE */
 	TriggerRelationId,			/* OCLASS_TRIGGER */
 	NamespaceRelationId,		/* OCLASS_SCHEMA */
+	StatisticExtRelationId,		/* OCLASS_STATISTIC_EXT */
 	TSParserRelationId,			/* OCLASS_TSPARSER */
 	TSDictionaryRelationId,		/* OCLASS_TSDICT */
 	TSTemplateRelationId,		/* OCLASS_TSTEMPLATE */
@ -1263,6 +1265,10 @@ doDeletion(const ObjectAddress *object, int flags)
 			DropTransformById(object->objectId);
 			break;

+		case OCLASS_STATISTIC_EXT:
+			RemoveStatisticsById(object->objectId);
+			break;
+
 		default:
 			elog(ERROR, "unrecognized object class: %u",
 				 object->classId);
@ -2377,6 +2383,9 @@ getObjectClass(const ObjectAddress *object)
 		case NamespaceRelationId:
 			return OCLASS_SCHEMA;

+		case StatisticExtRelationId:
+			return OCLASS_STATISTIC_EXT;
+
 		case TSParserRelationId:
 			return OCLASS_TSPARSER;

--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@ -52,6 +52,7 @@
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_partitioned_table.h"
 #include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription_rel.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_type.h"
@ -1609,7 +1610,10 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
 	heap_close(attr_rel, RowExclusiveLock);

 	if (attnum > 0)
+	{
 		RemoveStatistics(relid, attnum);
+		RemoveStatisticsExt(relid, attnum);
+	}

 	relation_close(rel, NoLock);
 }
@ -1860,6 +1864,7 @@ heap_drop_with_catalog(Oid relid)
 	 * delete statistics
 	 */
 	RemoveStatistics(relid, 0);
+	RemoveStatisticsExt(relid, 0);

 	/*
 	 * delete attribute tuples
@ -2771,6 +2776,75 @@ RemoveStatistics(Oid relid, AttrNumber attnum)
 }


+/*
+ * RemoveStatisticsExt --- remove entries in pg_statistic_ext for a relation
+ *
+ * If attnum is zero, remove all entries for rel; else remove only the
+ * one(s) involving that column.
+ */
+void
+RemoveStatisticsExt(Oid relid, AttrNumber attnum)
+{
+	Relation	pgstatisticext;
+	SysScanDesc scan;
+	ScanKeyData key;
+	HeapTuple	tuple;
+
+	/*
+	 * Scan pg_statistic_ext to delete relevant tuples
+	 */
+	pgstatisticext = heap_open(StatisticExtRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&key,
+				Anum_pg_statistic_ext_starelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+
+	scan = systable_beginscan(pgstatisticext,
+							  StatisticExtRelidIndexId,
+							  true, NULL, 1, &key);
+
+	while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+	{
+		bool		delete = false;
+
+		if (attnum == 0)
+			delete = true;
+		else if (attnum != 0)
+		{
+			Form_pg_statistic_ext	staForm;
+			int			i;
+
+			/*
+			 * Decode the stakeys array and delete any stats that involve the
+			 * specified column.
+			 */
+			staForm = (Form_pg_statistic_ext) GETSTRUCT(tuple);
+			for (i = 0; i < staForm->stakeys.dim1; i++)
+			{
+				if (staForm->stakeys.values[i] == attnum)
+				{
+					delete = true;
+					break;
+				}
+			}
+		}
+
+		if (delete)
+		{
+			CatalogTupleDelete(pgstatisticext, &tuple->t_self);
+			deleteDependencyRecordsFor(StatisticExtRelationId,
+									   HeapTupleGetOid(tuple),
+									   false);
+		}
+	}
+
+	systable_endscan(scan);
+
+	heap_close(pgstatisticext, RowExclusiveLock);
+}
+
+
 /*
 * RelationTruncateIndexes - truncate all indexes associated
 * with the heap relation to zero tuples.
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@ -2085,6 +2085,62 @@ ConversionIsVisible(Oid conid)
 	return visible;
 }

+/*
+ * get_statistics_oid - find a statistics by possibly qualified name
+ *
+ * If not found, returns InvalidOid if missing_ok, else throws error
+ */
+Oid
+get_statistics_oid(List *names, bool missing_ok)
+{
+	char	   *schemaname;
+	char	   *stats_name;
+	Oid			namespaceId;
+	Oid			stats_oid = InvalidOid;
+	ListCell   *l;
+
+	/* deconstruct the name list */
+	DeconstructQualifiedName(names, &schemaname, &stats_name);
+
+	if (schemaname)
+	{
+		/* use exact schema given */
+		namespaceId = LookupExplicitNamespace(schemaname, missing_ok);
+		if (missing_ok && !OidIsValid(namespaceId))
+			stats_oid = InvalidOid;
+		else
+			stats_oid = GetSysCacheOid2(STATEXTNAMENSP,
+										PointerGetDatum(stats_name),
+										ObjectIdGetDatum(namespaceId));
+	}
+	else
+	{
+		/* search for it in search path */
+		recomputeNamespacePath();
+
+		foreach(l, activeSearchPath)
+		{
+			namespaceId = lfirst_oid(l);
+
+			if (namespaceId == myTempNamespace)
+				continue;		/* do not look in temp namespace */
+			stats_oid = GetSysCacheOid2(STATEXTNAMENSP,
+										PointerGetDatum(stats_name),
+										ObjectIdGetDatum(namespaceId));
+			if (OidIsValid(stats_oid))
+				break;
+		}
+	}
+
+	if (!OidIsValid(stats_oid) && !missing_ok)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("statistics \"%s\" do not exist",
+						NameListToString(names))));
+
+	return stats_oid;
+}
+
 /*
 * get_ts_parser_oid - find a TS parser by possibly qualified name
 *
--- a/src/backend/catalog/objectaddress.c
+++ b/src/backend/catalog/objectaddress.c
@ -48,6 +48,7 @@
 #include "catalog/pg_publication.h"
 #include "catalog/pg_publication_rel.h"
 #include "catalog/pg_rewrite.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_transform.h"
@ -478,6 +479,18 @@ static const ObjectPropertyType ObjectProperty[] =
 		InvalidAttrNumber,
 		ACL_KIND_SUBSCRIPTION,
 		true
+	},
+	{
+		StatisticExtRelationId,
+		StatisticExtOidIndexId,
+		STATEXTOID,
+		STATEXTNAMENSP,
+		Anum_pg_statistic_ext_staname,
+		Anum_pg_statistic_ext_stanamespace,
+		Anum_pg_statistic_ext_staowner,
+		InvalidAttrNumber,		/* no ACL (same as relation) */
+		ACL_KIND_STATISTICS,
+		true
 	}
 };

@ -696,6 +709,10 @@ static const struct object_type_map
 	/* OCLASS_TRANSFORM */
 	{
 		"transform", OBJECT_TRANSFORM
+	},
+	/* OBJECT_STATISTIC_EXT */
+	{
+		"statistics", OBJECT_STATISTIC_EXT
 	}
 };

@ -974,6 +991,12 @@ get_object_address(ObjectType objtype, Node *object,
 				address = get_object_address_defacl(castNode(List, object),
 													missing_ok);
 				break;
+			case OBJECT_STATISTIC_EXT:
+				address.classId = StatisticExtRelationId;
+				address.objectId = get_statistics_oid(castNode(List, object),
+													  missing_ok);
+				address.objectSubId = 0;
+				break;
 			default:
 				elog(ERROR, "unrecognized objtype: %d", (int) objtype);
 				/* placate compiler, in case it thinks elog might return */
@ -2083,6 +2106,7 @@ pg_get_object_address(PG_FUNCTION_ARGS)
 		case OBJECT_ATTRIBUTE:
 		case OBJECT_COLLATION:
 		case OBJECT_CONVERSION:
+		case OBJECT_STATISTIC_EXT:
 		case OBJECT_TSPARSER:
 		case OBJECT_TSDICTIONARY:
 		case OBJECT_TSTEMPLATE:
@ -2370,6 +2394,10 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address,
 						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 						 errmsg("must be superuser")));
 			break;
+		case OBJECT_STATISTIC_EXT:
+			if (!pg_statistics_ownercheck(address.objectId, roleid))
+				aclcheck_error_type(ACLCHECK_NOT_OWNER, address.objectId);
+			break;
 		default:
 			elog(ERROR, "unrecognized object type: %d",
 				 (int) objtype);
@ -3857,6 +3885,10 @@ getObjectTypeDescription(const ObjectAddress *object)
 			appendStringInfoString(&buffer, "subscription");
 			break;

+		case OCLASS_STATISTIC_EXT:
+			appendStringInfoString(&buffer, "statistics");
+			break;
+
 		default:
 			appendStringInfo(&buffer, "unrecognized %u", object->classId);
 			break;
@ -4880,6 +4912,29 @@ getObjectIdentityParts(const ObjectAddress *object,
 				break;
 			}

+		case OCLASS_STATISTIC_EXT:
+			{
+				HeapTuple	tup;
+				Form_pg_statistic_ext formStatistic;
+				char	   *schema;
+
+				tup = SearchSysCache1(STATEXTOID,
+									  ObjectIdGetDatum(object->objectId));
+				if (!HeapTupleIsValid(tup))
+					elog(ERROR, "cache lookup failed for statistics %u",
+						 object->objectId);
+				formStatistic = (Form_pg_statistic_ext) GETSTRUCT(tup);
+				schema = get_namespace_name_or_temp(formStatistic->stanamespace);
+				appendStringInfoString(&buffer,
+									   quote_qualified_identifier(schema,
+										   NameStr(formStatistic->staname)));
+				if (objname)
+					*objname = list_make2(schema,
+								   pstrdup(NameStr(formStatistic->staname)));
+				ReleaseSysCache(tup);
+			}
+			break;
+
 		default:
 			appendStringInfo(&buffer, "unrecognized object %u %u %d",
 							 object->classId,
--- a/src/backend/catalog/pg_shdepend.c
+++ b/src/backend/catalog/pg_shdepend.c
@ -39,6 +39,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_shdepend.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_ts_config.h"
@ -1415,6 +1416,7 @@ shdepReassignOwned(List *roleids, Oid newrole)
 				case OperatorFamilyRelationId:
 				case OperatorClassRelationId:
 				case ExtensionRelationId:
+				case StatisticExtRelationId:
 				case TableSpaceRelationId:
 				case DatabaseRelationId:
 				case TSConfigRelationId:
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@ -186,6 +186,16 @@ CREATE OR REPLACE VIEW pg_sequences AS
    WHERE NOT pg_is_other_temp_schema(N.oid)
          AND relkind = 'S';

+CREATE VIEW pg_stats_ext AS
+    SELECT
+        N.nspname AS schemaname,
+        C.relname AS tablename,
+        S.staname AS staname,
+        S.stakeys AS attnums,
+        length(s.standistinct) AS ndistbytes
+    FROM (pg_statistic_ext S JOIN pg_class C ON (C.oid = S.starelid))
+        LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace);
+
 CREATE VIEW pg_stats WITH (security_barrier) AS
    SELECT
        nspname AS schemaname,
--- a/src/backend/commands/Makefile
+++ b/src/backend/commands/Makefile
@ -18,8 +18,8 @@ OBJS = amcmds.o aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \
 	event_trigger.o explain.o extension.o foreigncmds.o functioncmds.o \
 	indexcmds.o lockcmds.o matview.o operatorcmds.o opclasscmds.o \
 	policy.o portalcmds.o prepare.o proclang.o publicationcmds.o \
-	schemacmds.o seclabel.o sequence.o subscriptioncmds.o tablecmds.o \
-	tablespace.o trigger.o tsearchcmds.o typecmds.o user.o vacuum.o \
-	vacuumlazy.o variable.o view.o
+	schemacmds.o seclabel.o sequence.o statscmds.o subscriptioncmds.o \
+	tablecmds.o tablespace.o trigger.o tsearchcmds.o typecmds.o user.o \
+	vacuum.o vacuumlazy.o variable.o view.o

 include $(top_srcdir)/src/backend/common.mk
--- a/src/backend/commands/alter.c
+++ b/src/backend/commands/alter.c
@ -33,6 +33,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_subscription.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_ts_config.h"
 #include "catalog/pg_ts_dict.h"
 #include "catalog/pg_ts_parser.h"
@ -120,6 +121,10 @@ report_namespace_conflict(Oid classId, const char *name, Oid nspOid)
 			Assert(OidIsValid(nspOid));
 			msgfmt = gettext_noop("conversion \"%s\" already exists in schema \"%s\"");
 			break;
+		case StatisticExtRelationId:
+			Assert(OidIsValid(nspOid));
+			msgfmt = gettext_noop("statistics \"%s\" already exists in schema \"%s\"");
+			break;
 		case TSParserRelationId:
 			Assert(OidIsValid(nspOid));
 			msgfmt = gettext_noop("text search parser \"%s\" already exists in schema \"%s\"");
@ -373,6 +378,7 @@ ExecRenameStmt(RenameStmt *stmt)
 		case OBJECT_OPCLASS:
 		case OBJECT_OPFAMILY:
 		case OBJECT_LANGUAGE:
+		case OBJECT_STATISTIC_EXT:
 		case OBJECT_TSCONFIGURATION:
 		case OBJECT_TSDICTIONARY:
 		case OBJECT_TSPARSER:
@ -489,6 +495,7 @@ ExecAlterObjectSchemaStmt(AlterObjectSchemaStmt *stmt,
 		case OBJECT_OPERATOR:
 		case OBJECT_OPCLASS:
 		case OBJECT_OPFAMILY:
+		case OBJECT_STATISTIC_EXT:
 		case OBJECT_TSCONFIGURATION:
 		case OBJECT_TSDICTIONARY:
 		case OBJECT_TSPARSER:
@ -803,6 +810,7 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt)
 		case OBJECT_OPERATOR:
 		case OBJECT_OPCLASS:
 		case OBJECT_OPFAMILY:
+		case OBJECT_STATISTIC_EXT:
 		case OBJECT_TABLESPACE:
 		case OBJECT_TSDICTIONARY:
 		case OBJECT_TSCONFIGURATION:
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@ -17,6 +17,7 @@
 #include <math.h>

 #include "access/multixact.h"
+#include "access/sysattr.h"
 #include "access/transam.h"
 #include "access/tupconvert.h"
 #include "access/tuptoaster.h"
@ -28,6 +29,7 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_inherits_fn.h"
 #include "catalog/pg_namespace.h"
+#include "catalog/pg_statistic_ext.h"
 #include "commands/dbcommands.h"
 #include "commands/tablecmds.h"
 #include "commands/vacuum.h"
@ -39,13 +41,17 @@
 #include "parser/parse_relation.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
 #include "storage/bufmgr.h"
 #include "storage/lmgr.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
 #include "utils/acl.h"
 #include "utils/attoptcache.h"
+#include "utils/builtins.h"
 #include "utils/datum.h"
+#include "utils/fmgroids.h"
 #include "utils/guc.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
@ -566,6 +572,10 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 			update_attstats(RelationGetRelid(Irel[ind]), false,
 							thisdata->attr_cnt, thisdata->vacattrstats);
 		}
+
+		/* Build extended statistics (if there are any). */
+		BuildRelationExtStatistics(onerel, totalrows, numrows, rows, attr_cnt,
+								   vacattrstats);
 	}

 	/*
@ -1681,19 +1691,6 @@ ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
 /*
 * Extra information used by the default analysis routines
 */
-typedef struct
-{
-	Oid			eqopr;			/* '=' operator for datatype, if any */
-	Oid			eqfunc;			/* and associated function */
-	Oid			ltopr;			/* '<' operator for datatype, if any */
-} StdAnalyzeData;
-
-typedef struct
-{
-	Datum		value;			/* a data value */
-	int			tupno;			/* position index for tuple it came from */
-} ScalarItem;
-
 typedef struct
 {
 	int			count;			/* # of duplicates */
--- a/src/backend/commands/dropcmds.c
+++ b/src/backend/commands/dropcmds.c
@ -286,6 +286,13 @@ does_not_exist_skipping(ObjectType objtype, Node *object)
 			msg = gettext_noop("schema \"%s\" does not exist, skipping");
 			name = strVal((Value *) object);
 			break;
+		case OBJECT_STATISTIC_EXT:
+			if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+			{
+				msg = gettext_noop("extended statistics \"%s\" do not exist, skipping");
+				name = NameListToString(castNode(List, object));
+			}
+			break;
 		case OBJECT_TSPARSER:
 			if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
 			{
--- a/src/backend/commands/event_trigger.c
+++ b/src/backend/commands/event_trigger.c
@ -112,6 +112,7 @@ static event_trigger_support_data event_trigger_support[] = {
 	{"SCHEMA", true},
 	{"SEQUENCE", true},
 	{"SERVER", true},
+	{"STATISTICS", true},
 	{"SUBSCRIPTION", true},
 	{"TABLE", true},
 	{"TABLESPACE", false},
@ -1108,6 +1109,7 @@ EventTriggerSupportsObjectType(ObjectType obtype)
 		case OBJECT_SCHEMA:
 		case OBJECT_SEQUENCE:
 		case OBJECT_SUBSCRIPTION:
+		case OBJECT_STATISTIC_EXT:
 		case OBJECT_TABCONSTRAINT:
 		case OBJECT_TABLE:
 		case OBJECT_TRANSFORM:
@ -1173,6 +1175,7 @@ EventTriggerSupportsObjectClass(ObjectClass objclass)
 		case OCLASS_PUBLICATION:
 		case OCLASS_PUBLICATION_REL:
 		case OCLASS_SUBSCRIPTION:
+		case OCLASS_STATISTIC_EXT:
 			return true;
 	}

--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@ -0,0 +1,296 @@
+/*-------------------------------------------------------------------------
+ *
+ * statscmds.c
+ *	  Commands for creating and altering extended statistics
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/statscmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_statistic_ext.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "statistics/statistics.h"
+#include "utils/builtins.h"
+#include "utils/inval.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+
+/* used for sorting the attnums in CreateStatistics */
+static int
+compare_int16(const void *a, const void *b)
+{
+	return memcmp(a, b, sizeof(int16));
+}
+
+/*
+ *		CREATE STATISTICS
+ */
+ObjectAddress
+CreateStatistics(CreateStatsStmt *stmt)
+{
+	int			i;
+	ListCell   *l;
+	int16		attnums[STATS_MAX_DIMENSIONS];
+	int			numcols = 0;
+	ObjectAddress address = InvalidObjectAddress;
+	char	   *namestr;
+	NameData	staname;
+	Oid			statoid;
+	Oid			namespaceId;
+	HeapTuple	htup;
+	Datum		values[Natts_pg_statistic_ext];
+	bool		nulls[Natts_pg_statistic_ext];
+	int2vector *stakeys;
+	Relation	statrel;
+	Relation	rel;
+	Oid			relid;
+	ObjectAddress parentobject,
+				childobject;
+	Datum		types[1];		/* only ndistinct defined now */
+	int			ntypes;
+	ArrayType  *staenabled;
+	bool		build_ndistinct;
+	bool		requested_type = false;
+
+	Assert(IsA(stmt, CreateStatsStmt));
+
+	/* resolve the pieces of the name (namespace etc.) */
+	namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr);
+	namestrcpy(&staname, namestr);
+
+	/*
+	 * If if_not_exists was given and the statistics already exists, bail out.
+	 */
+	if (SearchSysCacheExists2(STATEXTNAMENSP,
+							  PointerGetDatum(&staname),
+							  ObjectIdGetDatum(namespaceId)))
+	{
+		if (stmt->if_not_exists)
+		{
+			ereport(NOTICE,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("statistics \"%s\" already exist, skipping",
+							namestr)));
+			return InvalidObjectAddress;
+		}
+
+		ereport(ERROR,
+				(errcode(ERRCODE_DUPLICATE_OBJECT),
+				 errmsg("statistics \"%s\" already exist", namestr)));
+	}
+
+	rel = heap_openrv(stmt->relation, AccessExclusiveLock);
+	relid = RelationGetRelid(rel);
+
+	if (rel->rd_rel->relkind != RELKIND_RELATION &&
+		rel->rd_rel->relkind != RELKIND_MATVIEW)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("relation \"%s\" is not a table or materialized view",
+						RelationGetRelationName(rel))));
+
+	/*
+	 * Transform column names to array of attnums. While at it, enforce some
+	 * constraints.
+	 */
+	foreach(l, stmt->keys)
+	{
+		char	   *attname = strVal(lfirst(l));
+		HeapTuple	atttuple;
+		Form_pg_attribute attForm;
+		TypeCacheEntry *type;
+
+		atttuple = SearchSysCacheAttName(relid, attname);
+		if (!HeapTupleIsValid(atttuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+			  errmsg("column \"%s\" referenced in statistics does not exist",
+					 attname)));
+		attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+		/* Disallow use of system attributes in extended stats */
+		if (attForm->attnum < 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("statistic creation on system columns is not supported")));
+
+		/* Disallow data types without a less-than operator */
+		type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
+		if (type->lt_opr == InvalidOid)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("only scalar types can be used in extended statistics")));
+
+		/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
+		if (numcols >= STATS_MAX_DIMENSIONS)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_COLUMNS),
+					 errmsg("cannot have more than %d keys in statistics",
+							STATS_MAX_DIMENSIONS)));
+
+		attnums[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum;
+		ReleaseSysCache(atttuple);
+		numcols++;
+	}
+
+	/*
+	 * Check that at least two columns were specified in the statement. The
+	 * upper bound was already checked in the loop above.
+	 */
+	if (numcols < 2)
+		ereport(ERROR,
+				(errcode(ERRCODE_TOO_MANY_COLUMNS),
+				 errmsg("statistics require at least 2 columns")));
+
+	/*
+	 * Sort the attnums, which makes detecting duplicies somewhat easier, and
+	 * it does not hurt (it does not affect the efficiency, unlike for
+	 * indexes, for example).
+	 */
+	qsort(attnums, numcols, sizeof(int16), compare_int16);
+
+	/*
+	 * Look for duplicities in the list of columns. The attnums are sorted so
+	 * just check consecutive elements.
+	 */
+	for (i = 1; i < numcols; i++)
+		if (attnums[i] == attnums[i - 1])
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+				  errmsg("duplicate column name in statistics definition")));
+
+	stakeys = buildint2vector(attnums, numcols);
+
+	/*
+	 * Parse the statistics options.  Currently only statistics types are
+	 * recognized.
+	 */
+	build_ndistinct = false;
+	foreach(l, stmt->options)
+	{
+		DefElem    *opt = (DefElem *) lfirst(l);
+
+		if (strcmp(opt->defname, "ndistinct") == 0)
+		{
+			build_ndistinct = defGetBoolean(opt);
+			requested_type = true;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized STATISTICS option \"%s\"",
+							opt->defname)));
+	}
+	/* If no statistic type was specified, build them all. */
+	if (!requested_type)
+		build_ndistinct = true;
+
+	/* construct the char array of enabled statistic types */
+	ntypes = 0;
+	if (build_ndistinct)
+		types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
+	Assert(ntypes > 0);
+	staenabled = construct_array(types, ntypes, CHAROID, 1, true, 'c');
+
+	/*
+	 * Everything seems fine, so let's build the pg_statistic_ext tuple.
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+	values[Anum_pg_statistic_ext_starelid - 1] = ObjectIdGetDatum(relid);
+	values[Anum_pg_statistic_ext_staname - 1] = NameGetDatum(&staname);
+	values[Anum_pg_statistic_ext_stanamespace - 1] = ObjectIdGetDatum(namespaceId);
+	values[Anum_pg_statistic_ext_staowner - 1] = ObjectIdGetDatum(GetUserId());
+	values[Anum_pg_statistic_ext_stakeys - 1] = PointerGetDatum(stakeys);
+	values[Anum_pg_statistic_ext_staenabled - 1] = PointerGetDatum(staenabled);
+
+	/* no statistics build yet */
+	nulls[Anum_pg_statistic_ext_standistinct - 1] = true;
+
+	/* insert it into pg_statistic_ext */
+	statrel = heap_open(StatisticExtRelationId, RowExclusiveLock);
+	htup = heap_form_tuple(statrel->rd_att, values, nulls);
+	CatalogTupleInsert(statrel, htup);
+	statoid = HeapTupleGetOid(htup);
+	heap_freetuple(htup);
+	heap_close(statrel, RowExclusiveLock);
+	relation_close(rel, NoLock);
+
+	/*
+	 * Add a dependency on a table, so that stats get dropped on DROP TABLE.
+	 */
+	ObjectAddressSet(parentobject, RelationRelationId, relid);
+	ObjectAddressSet(childobject, StatisticExtRelationId, statoid);
+	recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
+
+	/*
+	 * Also add dependency on the schema.  This is required to ensure that we
+	 * drop the statistics on DROP SCHEMA.  This is not handled automatically
+	 * by DROP TABLE because the statistics are not an object in the table's
+	 * schema.
+	 */
+	ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
+	recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
+
+	ObjectAddressSet(address, StatisticExtRelationId, statoid);
+
+	/*
+	 * Invalidate relcache so that others see the new statistics.
+	 */
+	CacheInvalidateRelcache(rel);
+
+	return address;
+}
+
+/*
+ * Guts of statistics deletion.
+ */
+void
+RemoveStatisticsById(Oid statsOid)
+{
+	Relation	relation;
+	Oid			relid;
+	Relation	rel;
+	HeapTuple	tup;
+	Form_pg_statistic_ext statext;
+
+	/*
+	 * Delete the pg_statistic_ext tuple.
+	 */
+	relation = heap_open(StatisticExtRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
+
+	if (!HeapTupleIsValid(tup)) /* should not happen */
+		elog(ERROR, "cache lookup failed for statistics %u", statsOid);
+
+	statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
+	relid = statext->starelid;
+
+	rel = heap_open(relid, AccessExclusiveLock);
+
+	simple_heap_delete(relation, &tup->t_self);
+
+	CacheInvalidateRelcache(rel);
+
+	ReleaseSysCache(tup);
+
+	heap_close(relation, RowExclusiveLock);
+	heap_close(rel, NoLock);
+}
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@ -3337,6 +3337,20 @@ _copyIndexStmt(const IndexStmt *from)
 	return newnode;
 }

+static CreateStatsStmt *
+_copyCreateStatsStmt(const CreateStatsStmt *from)
+{
+	CreateStatsStmt *newnode = makeNode(CreateStatsStmt);
+
+	COPY_NODE_FIELD(defnames);
+	COPY_NODE_FIELD(relation);
+	COPY_NODE_FIELD(keys);
+	COPY_NODE_FIELD(options);
+	COPY_SCALAR_FIELD(if_not_exists);
+
+	return newnode;
+}
+
 static CreateFunctionStmt *
 _copyCreateFunctionStmt(const CreateFunctionStmt *from)
 {
@ -5050,6 +5064,9 @@ copyObject(const void *from)
 		case T_IndexStmt:
 			retval = _copyIndexStmt(from);
 			break;
+		case T_CreateStatsStmt:
+			retval = _copyCreateStatsStmt(from);
+			break;
 		case T_CreateFunctionStmt:
 			retval = _copyCreateFunctionStmt(from);
 			break;
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@ -1334,6 +1334,18 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b)
 	return true;
 }

+static bool
+_equalCreateStatsStmt(const CreateStatsStmt *a, const CreateStatsStmt *b)
+{
+	COMPARE_NODE_FIELD(defnames);
+	COMPARE_NODE_FIELD(relation);
+	COMPARE_NODE_FIELD(keys);
+	COMPARE_NODE_FIELD(options);
+	COMPARE_SCALAR_FIELD(if_not_exists);
+
+	return true;
+}
+
 static bool
 _equalCreateFunctionStmt(const CreateFunctionStmt *a, const CreateFunctionStmt *b)
 {
@ -3236,6 +3248,9 @@ equal(const void *a, const void *b)
 		case T_IndexStmt:
 			retval = _equalIndexStmt(a, b);
 			break;
+		case T_CreateStatsStmt:
+			retval = _equalCreateStatsStmt(a, b);
+			break;
 		case T_CreateFunctionStmt:
 			retval = _equalCreateFunctionStmt(a, b);
 			break;
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@ -2202,6 +2202,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
 	WRITE_NODE_FIELD(lateral_vars);
 	WRITE_BITMAPSET_FIELD(lateral_referencers);
 	WRITE_NODE_FIELD(indexlist);
+	WRITE_NODE_FIELD(statlist);
 	WRITE_UINT_FIELD(pages);
 	WRITE_FLOAT_FIELD(tuples, "%.0f");
 	WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
@ -2274,6 +2275,18 @@ _outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node)
 		appendStringInfo(str, " %d", list_length(node->rinfos[i]));
 }

+static void
+_outStatisticExtInfo(StringInfo str, const StatisticExtInfo *node)
+{
+	WRITE_NODE_TYPE("STATISTICEXTINFO");
+
+	/* NB: this isn't a complete set of fields */
+	WRITE_OID_FIELD(statOid);
+	/* don't write rel, leads to infinite recursion in plan tree dump */
+	WRITE_CHAR_FIELD(kind);
+	WRITE_BITMAPSET_FIELD(keys);
+}
+
 static void
 _outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
 {
@ -2577,6 +2590,18 @@ _outIndexStmt(StringInfo str, const IndexStmt *node)
 	WRITE_BOOL_FIELD(if_not_exists);
 }

+static void
+_outCreateStatsStmt(StringInfo str, const CreateStatsStmt *node)
+{
+	WRITE_NODE_TYPE("CREATESTATSSTMT");
+
+	WRITE_NODE_FIELD(defnames);
+	WRITE_NODE_FIELD(relation);
+	WRITE_NODE_FIELD(keys);
+	WRITE_NODE_FIELD(options);
+	WRITE_BOOL_FIELD(if_not_exists);
+}
+
 static void
 _outNotifyStmt(StringInfo str, const NotifyStmt *node)
 {
@ -3936,6 +3961,9 @@ outNode(StringInfo str, const void *obj)
 			case T_PlannerParamItem:
 				_outPlannerParamItem(str, obj);
 				break;
+			case T_StatisticExtInfo:
+				_outStatisticExtInfo(str, obj);
+				break;

 			case T_ExtensibleNode:
 				_outExtensibleNode(str, obj);
@ -3953,6 +3981,9 @@ outNode(StringInfo str, const void *obj)
 			case T_IndexStmt:
 				_outIndexStmt(str, obj);
 				break;
+			case T_CreateStatsStmt:
+				_outCreateStatsStmt(str, obj);
+				break;
 			case T_NotifyStmt:
 				_outNotifyStmt(str, obj);
 				break;
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@ -29,6 +29,7 @@
 #include "catalog/heap.h"
 #include "catalog/partition.h"
 #include "catalog/pg_am.h"
+#include "catalog/pg_statistic_ext.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@ -40,8 +41,11 @@
 #include "parser/parse_relation.h"
 #include "parser/parsetree.h"
 #include "rewrite/rewriteManip.h"
+#include "statistics/statistics.h"
 #include "storage/bufmgr.h"
+#include "utils/builtins.h"
 #include "utils/lsyscache.h"
+#include "utils/syscache.h"
 #include "utils/rel.h"
 #include "utils/snapmgr.h"

@ -63,7 +67,7 @@ static List *get_relation_constraints(PlannerInfo *root,
 						 bool include_notnull);
 static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
 				  Relation heapRelation);
-
+static List *get_relation_statistics(RelOptInfo *rel, Relation relation);

 /*
 * get_relation_info -
@ -398,6 +402,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,

 	rel->indexlist = indexinfos;

+	rel->statlist = get_relation_statistics(rel, relation);
+
 	/* Grab foreign-table info using the relcache, while we have it */
 	if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
 	{
@ -1251,6 +1257,65 @@ get_relation_constraints(PlannerInfo *root,
 	return result;
 }

+/*
+ * get_relation_statistics
+ *		Retrieve extended statistics defined on the table.
+ *
+ * Returns a List (possibly empty) of StatisticExtInfo objects describing
+ * the statistics.  Note that this doesn't load the actual statistics data,
+ * just the identifying metadata.  Only stats actually built are considered.
+ */
+static List *
+get_relation_statistics(RelOptInfo *rel, Relation relation)
+{
+	List	   *statoidlist;
+	List	   *stainfos = NIL;
+	ListCell   *l;
+
+	statoidlist = RelationGetStatExtList(relation);
+
+	foreach(l, statoidlist)
+	{
+		Oid			statOid = lfirst_oid(l);
+		Form_pg_statistic_ext staForm;
+		HeapTuple	htup;
+		Bitmapset  *keys = NULL;
+		int			i;
+
+		htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
+		if (!htup)
+			elog(ERROR, "cache lookup failed for statistics %u", statOid);
+		staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
+
+		/*
+		 * First, build the array of columns covered.  This is ultimately
+		 * wasted if no stats are actually built, but it doesn't seem worth
+		 * troubling over that case.
+		 */
+		for (i = 0; i < staForm->stakeys.dim1; i++)
+			keys = bms_add_member(keys, staForm->stakeys.values[i]);
+
+		/* add one StatisticExtInfo for each kind built */
+		if (statext_is_kind_built(htup, STATS_EXT_NDISTINCT))
+		{
+			StatisticExtInfo *info = makeNode(StatisticExtInfo);
+
+			info->statOid = statOid;
+			info->rel = rel;
+			info->kind = STATS_EXT_NDISTINCT;
+			info->keys = bms_copy(keys);
+
+			stainfos = lcons(info, stainfos);
+		}
+
+		ReleaseSysCache(htup);
+		bms_free(keys);
+	}
+
+	list_free(statoidlist);
+
+	return stainfos;
+}

 /*
 * relation_excluded_by_constraints
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@ -257,7 +257,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 		ConstraintsSetStmt CopyStmt CreateAsStmt CreateCastStmt
 		CreateDomainStmt CreateExtensionStmt CreateGroupStmt CreateOpClassStmt
 		CreateOpFamilyStmt AlterOpFamilyStmt CreatePLangStmt
-		CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt
+		CreateSchemaStmt CreateSeqStmt CreateStmt CreateStatsStmt CreateTableSpaceStmt
 		CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt
 		CreateAssertStmt CreateTransformStmt CreateTrigStmt CreateEventTrigStmt
 		CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatePolicyStmt
@ -874,6 +874,7 @@ stmt :
 			| CreateSeqStmt
 			| CreateStmt
 			| CreateSubscriptionStmt
+			| CreateStatsStmt
 			| CreateTableSpaceStmt
 			| CreateTransformStmt
 			| CreateTrigStmt
@ -3747,6 +3748,35 @@ OptConsTableSpace:   USING INDEX TABLESPACE name	{ $$ = $4; }
 ExistingIndex:   USING INDEX index_name				{ $$ = $3; }
 		;

+/*****************************************************************************
+ *
+ *		QUERY :
+ *				CREATE STATISTICS stats_name WITH (options) ON (columns) FROM relname
+ *
+ *****************************************************************************/
+
+
+CreateStatsStmt:	CREATE STATISTICS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name
+						{
+							CreateStatsStmt *n = makeNode(CreateStatsStmt);
+							n->defnames = $3;
+							n->relation = $10;
+							n->keys = $7;
+							n->options = $4;
+							n->if_not_exists = false;
+							$$ = (Node *)n;
+						}
+					| CREATE STATISTICS IF_P NOT EXISTS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name
+						{
+							CreateStatsStmt *n = makeNode(CreateStatsStmt);
+							n->defnames = $6;
+							n->relation = $13;
+							n->keys = $10;
+							n->options = $7;
+							n->if_not_exists = true;
+							$$ = (Node *)n;
+						}
+			;

 /*****************************************************************************
 *
@ -6042,6 +6072,7 @@ drop_type_any_name:
 			| FOREIGN TABLE							{ $$ = OBJECT_FOREIGN_TABLE; }
 			| COLLATION								{ $$ = OBJECT_COLLATION; }
 			| CONVERSION_P							{ $$ = OBJECT_CONVERSION; }
+			| STATISTICS							{ $$ = OBJECT_STATISTIC_EXT; }
 			| TEXT_P SEARCH PARSER					{ $$ = OBJECT_TSPARSER; }
 			| TEXT_P SEARCH DICTIONARY				{ $$ = OBJECT_TSDICTIONARY; }
 			| TEXT_P SEARCH TEMPLATE				{ $$ = OBJECT_TSTEMPLATE; }
@ -6119,7 +6150,7 @@ opt_restart_seqs:
 *                 EXTENSION | EVENT TRIGGER | FOREIGN DATA WRAPPER |
 *                 FOREIGN TABLE | INDEX | [PROCEDURAL] LANGUAGE |
 *                 MATERIALIZED VIEW | POLICY | ROLE | SCHEMA | SEQUENCE |
- *                 SERVER | TABLE | TABLESPACE |
+ *                 SERVER | STATISTICS | TABLE | TABLESPACE |
 *                 TEXT SEARCH CONFIGURATION | TEXT SEARCH DICTIONARY |
 *                 TEXT SEARCH PARSER | TEXT SEARCH TEMPLATE | TYPE |
 *                 VIEW] <objname> |
@ -6288,6 +6319,7 @@ comment_type_any_name:
 			COLUMN								{ $$ = OBJECT_COLUMN; }
 			| INDEX								{ $$ = OBJECT_INDEX; }
 			| SEQUENCE							{ $$ = OBJECT_SEQUENCE; }
+			| STATISTICS						{ $$ = OBJECT_STATISTIC_EXT; }
 			| TABLE								{ $$ = OBJECT_TABLE; }
 			| VIEW								{ $$ = OBJECT_VIEW; }
 			| MATERIALIZED VIEW					{ $$ = OBJECT_MATVIEW; }
@ -8428,6 +8460,15 @@ RenameStmt: ALTER AGGREGATE aggregate_with_argtypes RENAME TO name
 					n->missing_ok = false;
 					$$ = (Node *)n;
 				}
+			| ALTER STATISTICS any_name RENAME TO name
+				{
+					RenameStmt *n = makeNode(RenameStmt);
+					n->renameType = OBJECT_STATISTIC_EXT;
+					n->object = (Node *) $3;
+					n->newname = $6;
+					n->missing_ok = false;
+					$$ = (Node *)n;
+				}
 			| ALTER TEXT_P SEARCH PARSER any_name RENAME TO name
 				{
 					RenameStmt *n = makeNode(RenameStmt);
@ -8643,6 +8684,15 @@ AlterObjectSchemaStmt:
 					n->missing_ok = true;
 					$$ = (Node *)n;
 				}
+			| ALTER STATISTICS any_name SET SCHEMA name
+				{
+					AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt);
+					n->objectType = OBJECT_STATISTIC_EXT;
+					n->object = (Node *) $3;
+					n->newschema = $6;
+					n->missing_ok = false;
+					$$ = (Node *)n;
+				}
 			| ALTER TEXT_P SEARCH PARSER any_name SET SCHEMA name
 				{
 					AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt);
@ -8906,6 +8956,14 @@ AlterOwnerStmt: ALTER AGGREGATE aggregate_with_argtypes OWNER TO RoleSpec
 					n->newowner = $6;
 					$$ = (Node *)n;
 				}
+			| ALTER STATISTICS any_name OWNER TO RoleSpec
+				{
+					AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
+					n->objectType = OBJECT_STATISTIC_EXT;
+					n->object = (Node *) $3;
+					n->newowner = $6;
+					$$ = (Node *)n;
+				}
 			| ALTER TEXT_P SEARCH DICTIONARY any_name OWNER TO RoleSpec
 				{
 					AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
--- a/src/backend/statistics/Makefile
+++ b/src/backend/statistics/Makefile
@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for statistics
+#
+# IDENTIFICATION
+#    src/backend/statistics/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/statistics
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = extended_stats.o mvdistinct.o
+
+include $(top_srcdir)/src/backend/common.mk
--- a/src/backend/statistics/README
+++ b/src/backend/statistics/README
@ -0,0 +1,34 @@
+Extended statistics
+===================
+
+When estimating various quantities (e.g. condition selectivities) the default
+approach relies on the assumption of independence. In practice that's often
+not true, resulting in estimation errors.
+
+Extended statistics track different types of dependencies between the columns,
+hopefully improving the estimates and producing better plans.
+
+Currently we only have one type of extended statistics - ndistinct
+coefficients, and we use it to improve estimates of grouping queries. See
+README.ndistinct for details.
+
+
+Size of sample in ANALYZE
+-------------------------
+When performing ANALYZE, the number of rows to sample is determined as
+
+    (300 * statistics_target)
+
+That works reasonably well for statistics on individual columns, but perhaps
+it's not enough for extended statistics. Papers analyzing estimation errors
+all use samples proportional to the table (usually finding that 1-3% of the
+table is enough to build accurate stats).
+
+The requested accuracy (number of MCV items or histogram bins) should also
+be considered when determining the sample size, and in extended statistics
+those are not necessarily limited by statistics_target.
+
+This however merits further discussion, because collecting the sample is quite
+expensive and increasing it further would make ANALYZE even more painful.
+Judging by the experiments with the current implementation, the fixed size
+seems to work reasonably well for now, so we leave this as a future work.
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@ -0,0 +1,389 @@
+/*-------------------------------------------------------------------------
+ *
+ * extended_stats.c
+ *	  POSTGRES extended statistics
+ *
+ * Generic code supporting statistic objects created via CREATE STATISTICS.
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/statistics/extended_stats.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_statistic_ext.h"
+#include "nodes/relation.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+/*
+ * Used internally to refer to an individual pg_statistic_ext entry.
+ */
+typedef struct StatExtEntry
+{
+	Oid			statOid;	/* OID of pg_statistic_ext entry */
+	Bitmapset  *columns;	/* attribute numbers covered by the statistics */
+	List	   *types;		/* 'char' list of enabled statistic kinds */
+} StatExtEntry;
+
+
+static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
+static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
+					  int natts, VacAttrStats **vacattrstats);
+static void statext_store(Relation pg_stext, Oid relid,
+			  MVNDistinct *ndistinct,
+			  VacAttrStats **stats);
+
+
+/*
+ * Compute requested extended stats, using the rows sampled for the plain
+ * (single-column) stats.
+ *
+ * This fetches a list of stats from pg_statistic_ext, computes the stats
+ * and serializes them back into the catalog (as bytea values).
+ */
+void
+BuildRelationExtStatistics(Relation onerel, double totalrows,
+						   int numrows, HeapTuple *rows,
+						   int natts, VacAttrStats **vacattrstats)
+{
+	Relation	pg_stext;
+	ListCell   *lc;
+	List	   *stats;
+
+	pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock);
+	stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
+
+	foreach(lc, stats)
+	{
+		StatExtEntry   *stat = (StatExtEntry *) lfirst(lc);
+		MVNDistinct	   *ndistinct = NULL;
+		VacAttrStats  **stats;
+		ListCell	   *lc2;
+
+		/* filter only the interesting vacattrstats records */
+		stats = lookup_var_attr_stats(onerel, stat->columns,
+									  natts, vacattrstats);
+
+		/* check allowed number of dimensions */
+		Assert(bms_num_members(stat->columns) >= 2 &&
+			   bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
+
+		/* compute statistic of each type */
+		foreach(lc2, stat->types)
+		{
+			char	t = (char) lfirst_int(lc2);
+
+			if (t == STATS_EXT_NDISTINCT)
+				ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
+													stat->columns, stats);
+		}
+
+		/* store the statistics in the catalog */
+		statext_store(pg_stext, stat->statOid, ndistinct, stats);
+	}
+
+	heap_close(pg_stext, RowExclusiveLock);
+}
+
+/*
+ * statext_is_kind_built
+ *		Is this stat kind built in the given pg_statistic_ext tuple?
+ */
+bool
+statext_is_kind_built(HeapTuple htup, char type)
+{
+	AttrNumber  attnum;
+
+	switch (type)
+	{
+		case STATS_EXT_NDISTINCT:
+			attnum = Anum_pg_statistic_ext_standistinct;
+			break;
+
+		default:
+			elog(ERROR, "unexpected statistics type requested: %d", type);
+	}
+
+	return !heap_attisnull(htup, attnum);
+}
+
+/*
+ * Return a list (of StatExtEntry) of statistics for the given relation.
+ */
+static List *
+fetch_statentries_for_relation(Relation pg_statext, Oid relid)
+{
+	SysScanDesc scan;
+	ScanKeyData skey;
+	HeapTuple   htup;
+	List       *result = NIL;
+
+	/*
+	 * Prepare to scan pg_statistic_ext for entries having indrelid = this
+	 * rel.
+	 */
+	ScanKeyInit(&skey,
+				Anum_pg_statistic_ext_starelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+
+	scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
+							  NULL, 1, &skey);
+
+	while (HeapTupleIsValid(htup = systable_getnext(scan)))
+	{
+		StatExtEntry *entry;
+		Datum		datum;
+		bool		isnull;
+		int			i;
+		ArrayType  *arr;
+		char	   *enabled;
+		Form_pg_statistic_ext staForm;
+
+		entry = palloc0(sizeof(StatExtEntry));
+		entry->statOid = HeapTupleGetOid(htup);
+		staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
+		for (i = 0; i < staForm->stakeys.dim1; i++)
+		{
+			entry->columns = bms_add_member(entry->columns,
+											staForm->stakeys.values[i]);
+		}
+
+		/* decode the staenabled char array into a list of chars */
+		datum = SysCacheGetAttr(STATEXTOID, htup,
+								Anum_pg_statistic_ext_staenabled, &isnull);
+		Assert(!isnull);
+		arr = DatumGetArrayTypeP(datum);
+		if (ARR_NDIM(arr) != 1 ||
+			ARR_HASNULL(arr) ||
+			ARR_ELEMTYPE(arr) != CHAROID)
+			elog(ERROR, "staenabled is not a 1-D char array");
+		enabled = (char *) ARR_DATA_PTR(arr);
+		for (i = 0; i < ARR_DIMS(arr)[0]; i++)
+		{
+			Assert(enabled[i] == STATS_EXT_NDISTINCT);
+			entry->types = lappend_int(entry->types, (int) enabled[i]);
+		}
+
+		result = lappend(result, entry);
+	}
+
+	systable_endscan(scan);
+
+	return result;
+}
+
+/*
+ * Using 'vacattrstats' of size 'natts' as input data, return a newly built
+ * VacAttrStats array which includes only the items corresponding to attributes
+ * indicated by 'attrs'.
+ */
+static VacAttrStats **
+lookup_var_attr_stats(Relation rel, Bitmapset *attrs, int natts,
+					  VacAttrStats **vacattrstats)
+{
+	int			i = 0;
+	int			x = -1;
+	VacAttrStats **stats;
+	Bitmapset  *matched = NULL;
+
+	stats = (VacAttrStats **)
+		palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
+
+	/* lookup VacAttrStats info for the requested columns (same attnum) */
+	while ((x = bms_next_member(attrs, x)) >= 0)
+	{
+		int		j;
+
+		stats[i] = NULL;
+		for (j = 0; j < natts; j++)
+		{
+			if (x == vacattrstats[j]->tupattnum)
+			{
+				stats[i] = vacattrstats[j];
+				break;
+			}
+		}
+
+		if (!stats[i])
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("extended statistics could not be collected for column \"%s\" of relation %s.%s",
+							NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname),
+							get_namespace_name(rel->rd_rel->relnamespace),
+							RelationGetRelationName(rel)),
+					 errhint("Consider ALTER TABLE \"%s\".\"%s\" ALTER \"%s\" SET STATISTICS -1",
+							 get_namespace_name(rel->rd_rel->relnamespace),
+							 RelationGetRelationName(rel),
+							 NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname))));
+
+		/*
+		 * Check that we found a non-dropped column and that the attnum
+		 * matches.
+		 */
+		Assert(!stats[i]->attr->attisdropped);
+		matched = bms_add_member(matched, stats[i]->tupattnum);
+
+		i++;
+	}
+	if (bms_subset_compare(matched, attrs) != BMS_EQUAL)
+		elog(ERROR, "could not find all attributes in attribute stats array");
+	bms_free(matched);
+
+	return stats;
+}
+
+/*
+ * statext_store
+ *	Serializes the statistics and stores them into the pg_statistic_ext tuple.
+ */
+static void
+statext_store(Relation pg_stext, Oid statOid,
+			  MVNDistinct *ndistinct,
+			  VacAttrStats **stats)
+{
+	HeapTuple	stup,
+				oldtup;
+	Datum		values[Natts_pg_statistic_ext];
+	bool		nulls[Natts_pg_statistic_ext];
+	bool		replaces[Natts_pg_statistic_ext];
+
+	memset(nulls, 1, Natts_pg_statistic_ext * sizeof(bool));
+	memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool));
+	memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum));
+
+	/*
+	 * Construct a new pg_statistic_ext tuple, replacing the calculated stats.
+	 */
+	if (ndistinct != NULL)
+	{
+		bytea	   *data = statext_ndistinct_serialize(ndistinct);
+
+		nulls[Anum_pg_statistic_ext_standistinct - 1] = (data == NULL);
+		values[Anum_pg_statistic_ext_standistinct - 1] = PointerGetDatum(data);
+	}
+
+	/* always replace the value (either by bytea or NULL) */
+	replaces[Anum_pg_statistic_ext_standistinct - 1] = true;
+
+	/* there should already be a pg_statistic_ext tuple */
+	oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
+	if (!HeapTupleIsValid(oldtup))
+		elog(ERROR, "cache lookup failed for extended statistics %u", statOid);
+
+	/* replace it */
+	stup = heap_modify_tuple(oldtup,
+							 RelationGetDescr(pg_stext),
+							 values,
+							 nulls,
+							 replaces);
+	ReleaseSysCache(oldtup);
+	CatalogTupleUpdate(pg_stext, &stup->t_self, stup);
+
+	heap_freetuple(stup);
+}
+
+/* initialize multi-dimensional sort */
+MultiSortSupport
+multi_sort_init(int ndims)
+{
+	MultiSortSupport mss;
+
+	Assert(ndims >= 2);
+
+	mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
+									 +sizeof(SortSupportData) * ndims);
+
+	mss->ndims = ndims;
+
+	return mss;
+}
+
+/*
+ * Prepare sort support info using the given sort operator
+ * at the position 'sortdim'
+ */
+void
+multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper)
+{
+	SortSupport		ssup = &mss->ssup[sortdim];
+
+	ssup->ssup_cxt = CurrentMemoryContext;
+	ssup->ssup_collation = DEFAULT_COLLATION_OID;
+	ssup->ssup_nulls_first = false;
+	ssup->ssup_cxt = CurrentMemoryContext;
+
+	PrepareSortSupportFromOrderingOp(oper, ssup);
+}
+
+/* compare all the dimensions in the selected order */
+int
+multi_sort_compare(const void *a, const void *b, void *arg)
+{
+	MultiSortSupport mss = (MultiSortSupport) arg;
+	SortItem   *ia = (SortItem *) a;
+	SortItem   *ib = (SortItem *) b;
+	int			i;
+
+	for (i = 0; i < mss->ndims; i++)
+	{
+		int			compare;
+
+		compare = ApplySortComparator(ia->values[i], ia->isnull[i],
+									  ib->values[i], ib->isnull[i],
+									  &mss->ssup[i]);
+
+		if (compare != 0)
+			return compare;
+	}
+
+	/* equal by default */
+	return 0;
+}
+
+/* compare selected dimension */
+int
+multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
+					   MultiSortSupport mss)
+{
+	return ApplySortComparator(a->values[dim], a->isnull[dim],
+							   b->values[dim], b->isnull[dim],
+							   &mss->ssup[dim]);
+}
+
+int
+multi_sort_compare_dims(int start, int end,
+						const SortItem *a, const SortItem *b,
+						MultiSortSupport mss)
+{
+	int			dim;
+
+	for (dim = start; dim <= end; dim++)
+	{
+		int			r = ApplySortComparator(a->values[dim], a->isnull[dim],
+											b->values[dim], b->isnull[dim],
+											&mss->ssup[dim]);
+
+		if (r != 0)
+			return r;
+	}
+
+	return 0;
+}
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@ -0,0 +1,671 @@
+/*-------------------------------------------------------------------------
+ *
+ * mvdistinct.c
+ *	  POSTGRES multivariate ndistinct coefficients
+ *
+ * Estimating number of groups in a combination of columns (e.g. for GROUP BY)
+ * is tricky, and the estimation error is often significant.
+
+ * The multivariate ndistinct coefficients address this by storing ndistinct
+ * estimates for combinations of the user-specified columns.  So for example
+ * given a statistics object on three columns (a,b,c), this module estimates
+ * and store n-distinct for (a,b), (a,c), (b,c) and (a,b,c).  The per-column
+ * estimates are already available in pg_statistic.
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/statistics/mvdistinct.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_statistic_ext.h"
+#include "utils/fmgrprotos.h"
+#include "utils/lsyscache.h"
+#include "lib/stringinfo.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+
+
+static double ndistinct_for_combination(double totalrows, int numrows,
+					HeapTuple *rows, VacAttrStats **stats,
+					int k, int *combination);
+static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
+static int	n_choose_k(int n, int k);
+static int	num_combinations(int n);
+
+/* Combination generator API */
+
+/* internal state for generator of k-combinations of n elements */
+typedef struct CombinationGenerator
+{
+	int		k;				/* size of the combination */
+	int		n;				/* total number of elements */
+	int		current;		/* index of the next combination to return */
+	int		ncombinations;	/* number of combinations (size of array) */
+	int	   *combinations;	/* array of pre-built combinations */
+} CombinationGenerator;
+
+static CombinationGenerator *generator_init(int n, int k);
+static void generator_free(CombinationGenerator *state);
+static int *generator_next(CombinationGenerator *state);
+static void generate_combinations(CombinationGenerator *state);
+
+
+/*
+ * statext_ndistinct_build
+ *		Compute ndistinct coefficient for the combination of attributes.
+ *
+ * This computes the ndistinct estimate using the same estimator used
+ * in analyze.c and then computes the coefficient.
+ */
+MVNDistinct *
+statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
+						Bitmapset *attrs, VacAttrStats **stats)
+{
+	MVNDistinct *result;
+	int			k;
+	int			itemcnt;
+	int			numattrs = bms_num_members(attrs);
+	int			numcombs = num_combinations(numattrs);
+
+	result = palloc(offsetof(MVNDistinct, items) +
+					numcombs * sizeof(MVNDistinctItem));
+	result->magic = STATS_NDISTINCT_MAGIC;
+	result->type = STATS_NDISTINCT_TYPE_BASIC;
+	result->nitems = numcombs;
+
+	itemcnt = 0;
+	for (k = 2; k <= numattrs; k++)
+	{
+		int	   *combination;
+		CombinationGenerator *generator;
+
+		/* generate combinations of K out of N elements */
+		generator = generator_init(numattrs, k);
+
+		while ((combination = generator_next(generator)))
+		{
+			MVNDistinctItem *item = &result->items[itemcnt];
+			int		j;
+
+			item->attrs = NULL;
+			for (j = 0; j < k; j++)
+				item->attrs = bms_add_member(item->attrs,
+											 stats[combination[j]]->attr->attnum);
+			item->ndistinct =
+				ndistinct_for_combination(totalrows, numrows, rows,
+										  stats, k, combination);
+
+			itemcnt++;
+			Assert(itemcnt <= result->nitems);
+		}
+
+		generator_free(generator);
+	}
+
+	/* must consume exactly the whole output array */
+	Assert(itemcnt == result->nitems);
+
+	return result;
+}
+
+/*
+ * statext_ndistinct_load
+ *		Load the ndistinct value for the indicated pg_statistic_ext tuple
+ */
+MVNDistinct *
+statext_ndistinct_load(Oid mvoid)
+{
+	bool		isnull = false;
+	Datum		ndist;
+	HeapTuple	htup;
+
+	htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid));
+	if (!htup)
+		elog(ERROR, "cache lookup failed for statistics %u", mvoid);
+
+	ndist = SysCacheGetAttr(STATEXTOID, htup,
+							Anum_pg_statistic_ext_standistinct, &isnull);
+	if (isnull)
+		elog(ERROR,
+			 "requested statistic kind %c not yet built for statistics %u",
+			 STATS_EXT_NDISTINCT, mvoid);
+
+	ReleaseSysCache(htup);
+
+	return statext_ndistinct_deserialize(DatumGetByteaP(ndist));
+}
+
+/*
+ * statext_ndistinct_serialize
+ *		serialize ndistinct to the on-disk bytea format
+ */
+bytea *
+statext_ndistinct_serialize(MVNDistinct *ndistinct)
+{
+	int			i;
+	bytea	   *output;
+	char	   *tmp;
+	Size		len;
+
+	Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
+	Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
+
+	/*
+	 * Base size is base struct size, plus one base struct for each items,
+	 * including number of items for each.
+	 */
+	len = VARHDRSZ + offsetof(MVNDistinct, items) +
+		ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + sizeof(int));
+
+	/* and also include space for the actual attribute numbers */
+	for (i = 0; i < ndistinct->nitems; i++)
+	{
+		int		nmembers;
+
+		nmembers = bms_num_members(ndistinct->items[i].attrs);
+		Assert(nmembers >= 2);
+		len += sizeof(AttrNumber) * nmembers;
+	}
+
+	output = (bytea *) palloc(len);
+	SET_VARSIZE(output, len);
+
+	tmp = VARDATA(output);
+
+	/* Store the base struct values */
+	memcpy(tmp, ndistinct, offsetof(MVNDistinct, items));
+	tmp += offsetof(MVNDistinct, items);
+
+	/*
+	 * store number of attributes and attribute numbers for each ndistinct
+	 * entry
+	 */
+	for (i = 0; i < ndistinct->nitems; i++)
+	{
+		MVNDistinctItem item = ndistinct->items[i];
+		int		nmembers = bms_num_members(item.attrs);
+		int		x;
+
+		memcpy(tmp, &item.ndistinct, sizeof(double));
+		tmp += sizeof(double);
+		memcpy(tmp, &nmembers, sizeof(int));
+		tmp += sizeof(int);
+
+		x = -1;
+		while ((x = bms_next_member(item.attrs, x)) >= 0)
+		{
+			AttrNumber	value = (AttrNumber) x;
+
+			memcpy(tmp, &value, sizeof(AttrNumber));
+			tmp += sizeof(AttrNumber);
+		}
+
+		Assert(tmp <= ((char *) output + len));
+	}
+
+	return output;
+}
+
+/*
+ * statext_ndistinct_deserialize
+ *		Read an on-disk bytea format MVNDistinct to in-memory format
+ */
+MVNDistinct *
+statext_ndistinct_deserialize(bytea *data)
+{
+	int			i;
+	Size		expected_size;
+	MVNDistinct *ndistinct;
+	char	   *tmp;
+
+	if (data == NULL)
+		return NULL;
+
+	if (VARSIZE_ANY_EXHDR(data) < offsetof(MVNDistinct, items))
+		elog(ERROR, "invalid MVNDistinct size %ld (expected at least %ld)",
+			 VARSIZE_ANY_EXHDR(data), offsetof(MVNDistinct, items));
+
+	/* read the MVNDistinct header */
+	ndistinct = (MVNDistinct *) palloc(sizeof(MVNDistinct));
+
+	/* initialize pointer to the data part (skip the varlena header) */
+	tmp = VARDATA_ANY(data);
+
+	/* get the header and perform basic sanity checks */
+	memcpy(ndistinct, tmp, offsetof(MVNDistinct, items));
+	tmp += offsetof(MVNDistinct, items);
+
+	if (ndistinct->magic != STATS_NDISTINCT_MAGIC)
+		elog(ERROR, "invalid ndistinct magic %d (expected %d)",
+			 ndistinct->magic, STATS_NDISTINCT_MAGIC);
+
+	if (ndistinct->type != STATS_NDISTINCT_TYPE_BASIC)
+		elog(ERROR, "invalid ndistinct type %d (expected %d)",
+			 ndistinct->type, STATS_NDISTINCT_TYPE_BASIC);
+
+	Assert(ndistinct->nitems > 0);
+
+	/* what minimum bytea size do we expect for those parameters */
+	expected_size = offsetof(MVNDistinct, items) +
+		ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) +
+							 sizeof(AttrNumber) * 2);
+
+	if (VARSIZE_ANY_EXHDR(data) < expected_size)
+		elog(ERROR, "invalid dependencies size %ld (expected at least %ld)",
+			 VARSIZE_ANY_EXHDR(data), expected_size);
+
+	/* allocate space for the ndistinct items */
+	ndistinct = repalloc(ndistinct, offsetof(MVNDistinct, items) +
+						 (ndistinct->nitems * sizeof(MVNDistinctItem)));
+
+	for (i = 0; i < ndistinct->nitems; i++)
+	{
+		MVNDistinctItem *item = &ndistinct->items[i];
+		int			nelems;
+
+		item->attrs = NULL;
+
+		/* ndistinct value */
+		memcpy(&item->ndistinct, tmp, sizeof(double));
+		tmp += sizeof(double);
+
+		/* number of attributes */
+		memcpy(&nelems, tmp, sizeof(int));
+		tmp += sizeof(int);
+		Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS));
+
+		while (nelems-- > 0)
+		{
+			AttrNumber	attno;
+
+			memcpy(&attno, tmp, sizeof(AttrNumber));
+			tmp += sizeof(AttrNumber);
+			item->attrs = bms_add_member(item->attrs, attno);
+		}
+
+		/* still within the bytea */
+		Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
+	}
+
+	/* we should have consumed the whole bytea exactly */
+	Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
+
+	return ndistinct;
+}
+
+/*
+ * pg_ndistinct_in
+ *		input routine for type pg_ndistinct
+ *
+ * pg_ndistinct is real enough to be a table column, but it has no
+ * operations of its own, and disallows input (jus like pg_node_tree).
+ */
+Datum
+pg_ndistinct_in(PG_FUNCTION_ARGS)
+{
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("cannot accept a value of type %s", "pg_ndistinct")));
+
+	PG_RETURN_VOID();			/* keep compiler quiet */
+}
+
+/*
+ * pg_ndistinct
+ *		output routine for type pg_ndistinct
+ *
+ * Produces a human-readable representation of the value.
+ */
+Datum
+pg_ndistinct_out(PG_FUNCTION_ARGS)
+{
+	bytea	   *data = PG_GETARG_BYTEA_PP(0);
+	MVNDistinct *ndist = statext_ndistinct_deserialize(data);
+	int			i;
+	StringInfoData str;
+
+	initStringInfo(&str);
+	appendStringInfoChar(&str, '[');
+
+	for (i = 0; i < ndist->nitems; i++)
+	{
+		MVNDistinctItem item = ndist->items[i];
+
+		if (i > 0)
+			appendStringInfoString(&str, ", ");
+
+		appendStringInfoChar(&str, '{');
+		outBitmapset(&str, item.attrs);
+		appendStringInfo(&str, ", %f}", item.ndistinct);
+	}
+
+	appendStringInfoChar(&str, ']');
+
+	PG_RETURN_CSTRING(str.data);
+}
+
+/*
+ * pg_ndistinct_recv
+ *		binary input routine for type pg_ndistinct
+ */
+Datum
+pg_ndistinct_recv(PG_FUNCTION_ARGS)
+{
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("cannot accept a value of type %s", "pg_ndistinct")));
+
+	PG_RETURN_VOID();			/* keep compiler quiet */
+}
+
+/*
+ * pg_ndistinct_send
+ *		binary output routine for type pg_ndistinct
+ *
+ * n-distinct is serialized into a bytea value, so let's send that.
+ */
+Datum
+pg_ndistinct_send(PG_FUNCTION_ARGS)
+{
+	return byteasend(fcinfo);
+}
+
+/*
+ * ndistinct_for_combination
+ *		Estimates number of distinct values in a combination of columns.
+ *
+ * This uses the same ndistinct estimator as compute_scalar_stats() in
+ * ANALYZE, i.e.,
+ *		n*d / (n - f1 + f1*n/N)
+ *
+ * except that instead of values in a single column we are dealing with
+ * combination of multiple columns.
+ */
+static double
+ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
+						  VacAttrStats **stats, int k, int *combination)
+{
+	int			i,
+				j;
+	int			f1,
+				cnt,
+				d;
+	bool	   *isnull;
+	Datum	   *values;
+	SortItem   *items;
+	MultiSortSupport mss;
+
+	mss = multi_sort_init(k);
+
+	/*
+	 * In order to determine the number of distinct elements, create separate
+	 * values[]/isnull[] arrays with all the data we have, then sort them
+	 * using the specified column combination as dimensions.  We could try to
+	 * sort in place, but it'd probably be more complex and bug-prone.
+	 */
+	items = (SortItem *) palloc(numrows * sizeof(SortItem));
+	values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
+	isnull = (bool *) palloc0(sizeof(bool) * numrows * k);
+
+	for (i = 0; i < numrows; i++)
+	{
+		items[i].values = &values[i * k];
+		items[i].isnull = &isnull[i * k];
+	}
+
+	/*
+	 * For each dimension, set up sort-support and fill in the values from
+	 * the sample data.
+	 */
+	for (i = 0; i < k; i++)
+	{
+		VacAttrStats   *colstat = stats[combination[i]];
+		TypeCacheEntry *type;
+
+		type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
+		if (type->lt_opr == InvalidOid)		/* shouldn't happen */
+			elog(ERROR, "cache lookup failed for ordering operator for type %u",
+				 colstat->attrtypid);
+
+		/* prepare the sort function for this dimension */
+		multi_sort_add_dimension(mss, i, type->lt_opr);
+
+		/* accumulate all the data for this dimension into the arrays */
+		for (j = 0; j < numrows; j++)
+		{
+			items[j].values[i] =
+				heap_getattr(rows[j],
+							 colstat->attr->attnum,
+							 colstat->tupDesc,
+							 &items[j].isnull[i]);
+		}
+	}
+
+	/* We can sort the array now ... */
+	qsort_arg((void *) items, numrows, sizeof(SortItem),
+			  multi_sort_compare, mss);
+
+	/* ... and count the number of distinct combinations */
+
+	f1 = 0;
+	cnt = 1;
+	d = 1;
+	for (i = 1; i < numrows; i++)
+	{
+		if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
+		{
+			if (cnt == 1)
+				f1 += 1;
+
+			d++;
+			cnt = 0;
+		}
+
+		cnt += 1;
+	}
+
+	if (cnt == 1)
+		f1 += 1;
+
+	return estimate_ndistinct(totalrows, numrows, d, f1);
+}
+
+/* The Duj1 estimator (already used in analyze.c). */
+static double
+estimate_ndistinct(double totalrows, int numrows, int d, int f1)
+{
+	double		numer,
+				denom,
+				ndistinct;
+
+	numer = (double) numrows * (double) d;
+
+	denom = (double) (numrows - f1) +
+		(double) f1 *(double) numrows / totalrows;
+
+	ndistinct = numer / denom;
+
+	/* Clamp to sane range in case of roundoff error */
+	if (ndistinct < (double) d)
+		ndistinct = (double) d;
+
+	if (ndistinct > totalrows)
+		ndistinct = totalrows;
+
+	return floor(ndistinct + 0.5);
+}
+
+/*
+ * n_choose_k
+ *		computes binomial coefficients using an algorithm that is both
+ *		efficient and prevents overflows
+ */
+static int
+n_choose_k(int n, int k)
+{
+	int			d,
+				r;
+
+	Assert((k > 0) && (n >= k));
+
+	/* use symmetry of the binomial coefficients */
+	k = Min(k, n - k);
+
+	r = 1;
+	for (d = 1; d <= k; ++d)
+	{
+		r *= n--;
+		r /= d;
+	}
+
+	return r;
+}
+
+/*
+ * num_combinations
+ *		number of combinations, excluding single-value combinations
+ */
+static int
+num_combinations(int n)
+{
+	int			k;
+	int			ncombs = 1;
+
+	for (k = 1; k <= n; k++)
+		ncombs *= 2;
+
+	ncombs -= (n + 1);
+
+	return ncombs;
+}
+
+/*
+ * generator_init
+ *		initialize the generator of combinations
+ *
+ * The generator produces combinations of K elements in the interval (0..N).
+ * We prebuild all the combinations in this method, which is simpler than
+ * generating them on the fly.
+ */
+static CombinationGenerator *
+generator_init(int n, int k)
+{
+	CombinationGenerator *state;
+
+	Assert((n >= k) && (k > 0));
+
+	/* allocate the generator state as a single chunk of memory */
+	state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator));
+
+	state->ncombinations = n_choose_k(n, k);
+
+	/* pre-allocate space for all combinations*/
+	state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations);
+
+	state->current = 0;
+	state->k = k;
+	state->n = n;
+
+	/* now actually pre-generate all the combinations of K elements */
+	generate_combinations(state);
+
+	/* make sure we got the expected number of combinations */
+	Assert(state->current == state->ncombinations);
+
+	/* reset the number, so we start with the first one */
+	state->current = 0;
+
+	return state;
+}
+
+/*
+ * generator_next
+ *		returns the next combination from the prebuilt list
+ *
+ * Returns a combination of K array indexes (0 .. N), as specified to
+ * generator_init), or NULL when there are no more combination.
+ */
+static int *
+generator_next(CombinationGenerator *state)
+{
+	if (state->current == state->ncombinations)
+		return NULL;
+
+	return &state->combinations[state->k * state->current++];
+}
+
+/*
+ * generator_free
+ *		free the internal state of the generator
+ *
+ * Releases the generator internal state (pre-built combinations).
+ */
+static void
+generator_free(CombinationGenerator *state)
+{
+	pfree(state->combinations);
+	pfree(state);
+}
+
+/*
+ * generate_combinations_recurse
+ *		given a prefix, generate all possible combinations
+ *
+ * Given a prefix (first few elements of the combination), generate following
+ * elements recursively. We generate the combinations in lexicographic order,
+ * which eliminates permutations of the same combination.
+ */
+static void
+generate_combinations_recurse(CombinationGenerator *state,
+							  int index, int start, int *current)
+{
+	/* If we haven't filled all the elements, simply recurse. */
+	if (index < state->k)
+	{
+		int		i;
+
+		/*
+		 * The values have to be in ascending order, so make sure we start
+		 * with the value passed by parameter.
+		 */
+
+		for (i = start; i < state->n; i++)
+		{
+			current[index] = i;
+			generate_combinations_recurse(state, (index + 1), (i + 1), current);
+		}
+
+		return;
+	}
+	else
+	{
+		/* we got a valid combination, add it to the array */
+		memcpy(&state->combinations[(state->k * state->current)],
+			   current, state->k * sizeof(int));
+		state->current++;
+	}
+}
+
+/*
+ * generate_combinations
+ *		generate all k-combinations of N elements
+ */
+static void
+generate_combinations(CombinationGenerator *state)
+{
+	int	   *current = (int *) palloc0(sizeof(int) * state->k);
+
+	generate_combinations_recurse(state, 0, 0, current);
+
+	pfree(current);
+}
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@ -1623,6 +1623,10 @@ ProcessUtilitySlow(ParseState *pstate,
 				commandCollected = true;
 				break;

+			case T_CreateStatsStmt:
+				address = CreateStatistics((CreateStatsStmt *) parsetree);
+				break;
+
 			case T_AlterCollationStmt:
 				address = AlterCollation((AlterCollationStmt *) parsetree);
 				break;
@ -1992,6 +1996,8 @@ AlterObjectTypeCommandTag(ObjectType objtype)
 			break;
 		case OBJECT_SUBSCRIPTION:
 			tag = "ALTER SUBSCRIPTION";
+		case OBJECT_STATISTIC_EXT:
+			tag = "ALTER STATISTICS";
 			break;
 		default:
 			tag = "???";
@ -2286,6 +2292,8 @@ CreateCommandTag(Node *parsetree)
 					break;
 				case OBJECT_PUBLICATION:
 					tag = "DROP PUBLICATION";
+				case OBJECT_STATISTIC_EXT:
+					tag = "DROP STATISTICS";
 					break;
 				default:
 					tag = "???";
@ -2689,6 +2697,10 @@ CreateCommandTag(Node *parsetree)
 			tag = "EXECUTE";
 			break;

+		case T_CreateStatsStmt:
+			tag = "CREATE STATISTICS";
+			break;
+
 		case T_DeallocateStmt:
 			{
 				DeallocateStmt *stmt = (DeallocateStmt *) parsetree;
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@ -35,6 +35,7 @@
 #include "catalog/pg_operator.h"
 #include "catalog/pg_partitioned_table.h"
 #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_trigger.h"
 #include "catalog/pg_type.h"
 #include "commands/defrem.h"
@ -317,6 +318,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno,
 					   const Oid *excludeOps,
 					   bool attrsOnly, bool showTblSpc,
 					   int prettyFlags, bool missing_ok);
+static char *pg_get_statisticsext_worker(Oid statextid, bool missing_ok);
 static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags,
 						 bool attrsOnly);
 static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
@ -1421,6 +1423,85 @@ pg_get_indexdef_worker(Oid indexrelid, int colno,
 	return buf.data;
 }

+/*
+ * pg_get_statisticsextdef
+ *		Get the definition of an extended statistics object
+ */
+Datum
+pg_get_statisticsextdef(PG_FUNCTION_ARGS)
+{
+	Oid			statextid = PG_GETARG_OID(0);
+	char	   *res;
+
+	res = pg_get_statisticsext_worker(statextid, true);
+
+	if (res == NULL)
+		PG_RETURN_NULL();
+
+	PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+/*
+ * Internal workhorse to decompile an extended statistics object.
+ */
+static char *
+pg_get_statisticsext_worker(Oid statextid, bool missing_ok)
+{
+	Form_pg_statistic_ext	statextrec;
+	Form_pg_class			pgclassrec;
+	HeapTuple	statexttup;
+	HeapTuple	pgclasstup;
+	StringInfoData buf;
+	int			colno;
+
+	statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
+
+	if (!HeapTupleIsValid(statexttup))
+	{
+		if (missing_ok)
+			return NULL;
+		elog(ERROR, "cache lookup failed for extended statistics %u", statextid);
+	}
+
+	statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup);
+
+	pgclasstup = SearchSysCache1(RELOID, ObjectIdGetDatum(statextrec->starelid));
+
+	if (!HeapTupleIsValid(statexttup))
+	{
+		ReleaseSysCache(statexttup);
+		elog(ERROR, "cache lookup failed for relation %u", statextrec->starelid);
+	}
+
+	pgclassrec = (Form_pg_class) GETSTRUCT(pgclasstup);
+
+	initStringInfo(&buf);
+
+	appendStringInfo(&buf, "CREATE STATISTICS %s ON (",
+							quote_identifier(NameStr(statextrec->staname)));
+
+	for (colno = 0; colno < statextrec->stakeys.dim1; colno++)
+	{
+		AttrNumber	attnum = statextrec->stakeys.values[colno];
+		char	   *attname;
+
+		if (colno > 0)
+			appendStringInfoString(&buf, ", ");
+
+		attname = get_relid_attribute_name(statextrec->starelid, attnum);
+
+		appendStringInfoString(&buf, quote_identifier(attname));
+	}
+
+	appendStringInfo(&buf, ") FROM %s",
+							quote_identifier(NameStr(pgclassrec->relname)));
+
+	ReleaseSysCache(statexttup);
+	ReleaseSysCache(pgclasstup);
+
+	return buf.data;
+}
+
 /*
 * pg_get_partkeydef
 *
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@ -110,6 +110,7 @@
 #include "catalog/pg_operator.h"
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_type.h"
 #include "executor/executor.h"
 #include "mb/pg_wchar.h"
@ -126,6 +127,7 @@
 #include "parser/parse_clause.h"
 #include "parser/parse_coerce.h"
 #include "parser/parsetree.h"
+#include "statistics/statistics.h"
 #include "utils/builtins.h"
 #include "utils/bytea.h"
 #include "utils/date.h"
@ -164,6 +166,8 @@ static double eqjoinsel_inner(Oid operator,
 static double eqjoinsel_semi(Oid operator,
 			   VariableStatData *vardata1, VariableStatData *vardata2,
 			   RelOptInfo *inner_rel);
+static bool estimate_multivariate_ndistinct(PlannerInfo *root,
+			   RelOptInfo *rel, List **varinfos, double *ndistinct);
 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
 				  Datum lobound, Datum hibound, Oid boundstypid,
 				  double *scaledlobound, double *scaledhibound);
@ -3398,25 +3402,25 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	{
 		GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
 		RelOptInfo *rel = varinfo1->rel;
-		double		reldistinct = varinfo1->ndistinct;
+		double		reldistinct = 1;
 		double		relmaxndistinct = reldistinct;
 		int			relvarcount = 1;
 		List	   *newvarinfos = NIL;
+		List	   *relvarinfos = NIL;

 		/*
-		 * Get the product of numdistinct estimates of the Vars for this rel.
-		 * Also, construct new varinfos list of remaining Vars.
+		 * Split the list of varinfos in two - one for the current rel,
+		 * one for remaining Vars on other rels.
 		 */
+		relvarinfos = lcons(varinfo1, relvarinfos);
 		for_each_cell(l, lnext(list_head(varinfos)))
 		{
 			GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);

 			if (varinfo2->rel == varinfo1->rel)
 			{
-				reldistinct *= varinfo2->ndistinct;
-				if (relmaxndistinct < varinfo2->ndistinct)
-					relmaxndistinct = varinfo2->ndistinct;
-				relvarcount++;
+				/* varinfos on current rel */
+				relvarinfos = lcons(varinfo2, relvarinfos);
 			}
 			else
 			{
@ -3425,6 +3429,43 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 			}
 		}

+		/*
+		 * Get the numdistinct estimate for the Vars of this rel.  We
+		 * iteratively search for multivariate n-distinct with maximum number
+		 * of vars; assuming that each var group is independent of the others,
+		 * we multiply them together.  Any remaining relvarinfos after
+		 * no more multivariate matches are found are assumed independent too,
+		 * so their individual ndistinct estimates are multiplied also.
+		 */
+		while (relvarinfos)
+		{
+			double		mvndistinct;
+
+			if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
+												&mvndistinct))
+			{
+				reldistinct *= mvndistinct;
+				if (relmaxndistinct < mvndistinct)
+					relmaxndistinct = mvndistinct;
+				relvarcount++;	/* inaccurate, but doesn't matter */
+			}
+			else
+			{
+				foreach (l, relvarinfos)
+				{
+					GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+
+					reldistinct *= varinfo2->ndistinct;
+					if (relmaxndistinct < varinfo2->ndistinct)
+						relmaxndistinct = varinfo2->ndistinct;
+					relvarcount++;
+				}
+
+				/* we're done with this relation */
+				relvarinfos = NIL;
+			}
+		}
+
 		/*
 		 * Sanity check --- don't divide by zero if empty relation.
 		 */
@ -3667,6 +3708,132 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
 *-------------------------------------------------------------------------
 */

+/*
+ * Find applicable ndistinct statistics for the given list of VarInfos (which
+ * must all belong to the given rel), and update *ndistinct to the estimate of
+ * the MVNDistinctItem that best matches.  If a match it found, *varinfos is
+ * updated to remove the list of matched varinfos.
+ *
+ * Varinfos that aren't for simple Vars are ignored.
+ *
+ * Return TRUE if we're able to find a match, FALSE otherwise.
+ */
+static bool
+estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
+								List **varinfos, double *ndistinct)
+{
+	ListCell   *lc;
+	Bitmapset  *attnums = NULL;
+	int			nmatches;
+	Oid			statOid = InvalidOid;
+	MVNDistinct *stats;
+	Bitmapset  *matched = NULL;
+
+	/* bail out immediately if the table has no extended statistics */
+	if (!rel->statlist)
+		return false;
+
+	/* Determine the attnums we're looking for */
+	foreach(lc, *varinfos)
+	{
+		GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+
+		Assert(varinfo->rel == rel);
+
+		if (IsA(varinfo->var, Var))
+		{
+			attnums = bms_add_member(attnums,
+									 ((Var *) varinfo->var)->varattno);
+		}
+	}
+
+	/* look for the ndistinct statistics matching the most vars */
+	nmatches = 1; /* we require at least two matches */
+	foreach(lc, rel->statlist)
+	{
+		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+		Bitmapset  *shared;
+
+		/* skip statistics of other kinds */
+		if (info->kind != STATS_EXT_NDISTINCT)
+			continue;
+
+		/* compute attnums shared by the vars and the statistic */
+		shared = bms_intersect(info->keys, attnums);
+
+		/*
+		 * Does this statistics matches more columns than the currently
+		 * best statistic?  If so, use this one instead.
+		 *
+		 * XXX This should break ties using name of the statistic, or
+		 * something like that, to make the outcome stable.
+		 */
+		if (bms_num_members(shared) > nmatches)
+		{
+			statOid = info->statOid;
+			nmatches = bms_num_members(shared);
+			matched = shared;
+		}
+	}
+
+	/* No match? */
+	if (statOid == InvalidOid)
+		return false;
+	Assert(nmatches > 1 && matched != NULL);
+
+	stats = statext_ndistinct_load(statOid);
+
+	/*
+	 * If we have a match, search it for the specific item that matches (there
+	 * must be one), and construct the output values.
+	 */
+	if (stats)
+	{
+		int		i;
+		List   *newlist = NIL;
+		MVNDistinctItem *item = NULL;
+
+		/* Find the specific item that exactly matches the combination */
+		for (i = 0; i < stats->nitems; i++)
+		{
+			MVNDistinctItem *tmpitem = &stats->items[i];
+
+			if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
+			{
+				item = tmpitem;
+				break;
+			}
+		}
+
+		/* make sure we found an item */
+		if (!item)
+			elog(ERROR, "corrupt MVNDistinct entry");
+
+		/* Form the output varinfo list, keeping only unmatched ones */
+		foreach(lc, *varinfos)
+		{
+			GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+			AttrNumber	attnum;
+
+			if (!IsA(varinfo->var, Var))
+			{
+				newlist = lappend(newlist, varinfo);
+				continue;
+			}
+
+			attnum = ((Var *) varinfo->var)->varattno;
+			if (!bms_is_member(attnum, matched))
+				newlist = lappend(newlist, varinfo);
+		}
+
+		*varinfos = newlist;
+		*ndistinct = item->ndistinct;
+		return true;
+	}
+
+	return false;
+}
+
 /*
 * convert_to_scalar
 *	  Convert non-NULL values of the indicated types to the comparison
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@ -56,6 +56,7 @@
 #include "catalog/pg_publication.h"
 #include "catalog/pg_rewrite.h"
 #include "catalog/pg_shseclabel.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_trigger.h"
@ -4451,6 +4452,82 @@ RelationGetIndexList(Relation relation)
 	return result;
 }

+/*
+ * RelationGetStatExtList
+ *		get a list of OIDs of extended statistics on this relation
+ *
+ * The statistics list is created only if someone requests it, in a way
+ * similar to RelationGetIndexList().  We scan pg_statistic_ext to find
+ * relevant statistics, and add the list to the relcache entry so that we
+ * won't have to compute it again.  Note that shared cache inval of a
+ * relcache entry will delete the old list and set rd_statvalid to 0,
+ * so that we must recompute the statistics list on next request.  This
+ * handles creation or deletion of a statistic.
+ *
+ * The returned list is guaranteed to be sorted in order by OID, although
+ * this is not currently needed.
+ *
+ * Since shared cache inval causes the relcache's copy of the list to go away,
+ * we return a copy of the list palloc'd in the caller's context.  The caller
+ * may list_free() the returned list after scanning it. This is necessary
+ * since the caller will typically be doing syscache lookups on the relevant
+ * statistics, and syscache lookup could cause SI messages to be processed!
+ */
+List *
+RelationGetStatExtList(Relation relation)
+{
+	Relation	indrel;
+	SysScanDesc indscan;
+	ScanKeyData skey;
+	HeapTuple	htup;
+	List	   *result;
+	List	   *oldlist;
+	MemoryContext oldcxt;
+
+	/* Quick exit if we already computed the list. */
+	if (relation->rd_statvalid != 0)
+		return list_copy(relation->rd_statlist);
+
+	/*
+	 * We build the list we intend to return (in the caller's context) while
+	 * doing the scan.  After successfully completing the scan, we copy that
+	 * list into the relcache entry.  This avoids cache-context memory leakage
+	 * if we get some sort of error partway through.
+	 */
+	result = NIL;
+
+	/* Prepare to scan pg_statistic_ext for entries having starelid = this rel. */
+	ScanKeyInit(&skey,
+				Anum_pg_statistic_ext_starelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(relation)));
+
+	indrel = heap_open(StatisticExtRelationId, AccessShareLock);
+	indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
+								 NULL, 1, &skey);
+
+	while (HeapTupleIsValid(htup = systable_getnext(indscan)))
+		/* TODO maybe include only already built statistics? */
+		result = insert_ordered_oid(result, HeapTupleGetOid(htup));
+
+	systable_endscan(indscan);
+
+	heap_close(indrel, AccessShareLock);
+
+	/* Now save a copy of the completed list in the relcache entry. */
+	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+	oldlist = relation->rd_statlist;
+	relation->rd_statlist = list_copy(result);
+
+	relation->rd_statvalid = true;
+	MemoryContextSwitchTo(oldcxt);
+
+	/* Don't leak the old list, if there is one */
+	list_free(oldlist);
+
+	return result;
+}
+
 /*
 * insert_ordered_oid
 *		Insert a new Oid into a sorted list of Oids, preserving ordering
@ -5560,6 +5637,8 @@ load_relcache_init_file(bool shared)
 		rel->rd_pkattr = NULL;
 		rel->rd_idattr = NULL;
 		rel->rd_pubactions = NULL;
+		rel->rd_statvalid = false;
+		rel->rd_statlist = NIL;
 		rel->rd_createSubid = InvalidSubTransactionId;
 		rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
 		rel->rd_amcache = NULL;
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@ -61,6 +61,7 @@
 #include "catalog/pg_shseclabel.h"
 #include "catalog/pg_replication_origin.h"
 #include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_subscription.h"
 #include "catalog/pg_subscription_rel.h"
 #include "catalog/pg_tablespace.h"
@ -726,6 +727,28 @@ static const struct cachedesc cacheinfo[] = {
 		},
 		32
 	},
+	{StatisticExtRelationId,	/* STATEXTNAMENSP */
+		StatisticExtNameIndexId,
+		2,
+		{
+			Anum_pg_statistic_ext_staname,
+			Anum_pg_statistic_ext_stanamespace,
+			0,
+			0
+		},
+		4
+	},
+	{StatisticExtRelationId,	/* STATEXTOID */
+		StatisticExtOidIndexId,
+		1,
+		{
+			ObjectIdAttributeNumber,
+			0,
+			0,
+			0
+		},
+		4
+	},
 	{StatisticRelationId,		/* STATRELATTINH */
 		StatisticRelidAttnumInhIndexId,
 		3,
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@ -272,6 +272,10 @@ getSchemaData(Archive *fout, int *numTablesPtr)
 		write_msg(NULL, "reading indexes\n");
 	getIndexes(fout, tblinfo, numTables);

+	if (g_verbose)
+		write_msg(NULL, "reading extended statistics\n");
+	getExtendedStatistics(fout, tblinfo, numTables);
+
 	if (g_verbose)
 		write_msg(NULL, "reading constraints\n");
 	getConstraints(fout, tblinfo, numTables);
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@ -3540,7 +3540,8 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass)
 				 strcmp(te->desc, "TRIGGER") == 0 ||
 				 strcmp(te->desc, "ROW SECURITY") == 0 ||
 				 strcmp(te->desc, "POLICY") == 0 ||
-				 strcmp(te->desc, "USER MAPPING") == 0)
+				 strcmp(te->desc, "USER MAPPING") == 0 ||
+				 strcmp(te->desc, "STATISTICS") == 0)
 		{
 			/* these object types don't have separate owners */
 		}
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@ -192,6 +192,7 @@ static void dumpAttrDef(Archive *fout, AttrDefInfo *adinfo);
 static void dumpSequence(Archive *fout, TableInfo *tbinfo);
 static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo);
 static void dumpIndex(Archive *fout, IndxInfo *indxinfo);
+static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo);
 static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo);
 static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo);
 static void dumpTSParser(Archive *fout, TSParserInfo *prsinfo);
@ -6582,6 +6583,99 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
 	destroyPQExpBuffer(query);
 }

+/*
+ * getExtendedStatistics
+ *	  get information about extended statistics on a dumpable table
+ *	  or materialized view.
+ *
+ * Note: extended statistics data is not returned directly to the caller, but
+ * it does get entered into the DumpableObject tables.
+ */
+void
+getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables)
+{
+	int				i,
+					j;
+	PQExpBuffer		query;
+	PGresult	   *res;
+	StatsExtInfo   *statsextinfo;
+	int				ntups;
+	int				i_tableoid;
+	int				i_oid;
+	int				i_staname;
+	int				i_stadef;
+
+	/* Extended statistics were new in v10 */
+	if (fout->remoteVersion < 100000)
+		return;
+
+	query = createPQExpBuffer();
+
+	for (i = 0; i < numTables; i++)
+	{
+		TableInfo  *tbinfo = &tblinfo[i];
+
+		/* Only plain tables and materialized views can have extended statistics. */
+		if (tbinfo->relkind != RELKIND_RELATION &&
+			tbinfo->relkind != RELKIND_MATVIEW)
+			continue;
+
+		/*
+		 * Ignore extended statistics of tables whose definitions are not to
+		 * be dumped.
+		 */
+		if (!(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION))
+			continue;
+
+		if (g_verbose)
+			write_msg(NULL, "reading extended statistics for table \"%s.%s\"\n",
+					  tbinfo->dobj.namespace->dobj.name,
+					  tbinfo->dobj.name);
+
+		/* Make sure we are in proper schema so stadef is right */
+		selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name);
+
+		resetPQExpBuffer(query);
+
+		appendPQExpBuffer(query,
+						  "SELECT "
+							"tableoid, "
+							"oid, "
+							"staname, "
+						  "pg_catalog.pg_get_statisticsextdef(oid) AS stadef "
+						  "FROM pg_statistic_ext "
+						  "WHERE starelid = '%u' "
+						  "ORDER BY staname", tbinfo->dobj.catId.oid);
+
+		res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+		ntups = PQntuples(res);
+
+		i_tableoid = PQfnumber(res, "tableoid");
+		i_oid = PQfnumber(res, "oid");
+		i_staname = PQfnumber(res, "staname");
+		i_stadef = PQfnumber(res, "stadef");
+
+		statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo));
+
+		for (j = 0; j < ntups; j++)
+		{
+			statsextinfo[j].dobj.objType = DO_STATSEXT;
+			statsextinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid));
+			statsextinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid));
+			AssignDumpId(&statsextinfo[j].dobj);
+			statsextinfo[j].dobj.name = pg_strdup(PQgetvalue(res, j, i_staname));
+			statsextinfo[j].dobj.namespace = tbinfo->dobj.namespace;
+			statsextinfo[j].statsexttable = tbinfo;
+			statsextinfo[j].statsextdef = pg_strdup(PQgetvalue(res, j, i_stadef));
+		}
+
+		PQclear(res);
+	}
+
+	destroyPQExpBuffer(query);
+}
+
 /*
 * getConstraints
 *
@ -9234,6 +9328,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
 		case DO_INDEX:
 			dumpIndex(fout, (IndxInfo *) dobj);
 			break;
+		case DO_STATSEXT:
+			dumpStatisticsExt(fout, (StatsExtInfo *) dobj);
+			break;
 		case DO_REFRESH_MATVIEW:
 			refreshMatViewData(fout, (TableDataInfo *) dobj);
 			break;
@ -15728,6 +15825,61 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo)
 	destroyPQExpBuffer(labelq);
 }

+/*
+ * dumpStatisticsExt
+ *	  write out to fout an extended statistics object
+ */
+static void
+dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo)
+{
+	DumpOptions *dopt = fout->dopt;
+	TableInfo  *tbinfo = statsextinfo->statsexttable;
+	PQExpBuffer q;
+	PQExpBuffer delq;
+	PQExpBuffer labelq;
+
+	if (dopt->dataOnly)
+		return;
+
+	q = createPQExpBuffer();
+	delq = createPQExpBuffer();
+	labelq = createPQExpBuffer();
+
+	appendPQExpBuffer(labelq, "STATISTICS %s",
+					  fmtId(statsextinfo->dobj.name));
+
+	appendPQExpBuffer(q, "%s;\n", statsextinfo->statsextdef);
+
+	appendPQExpBuffer(delq, "DROP STATISTICS %s.",
+						  fmtId(tbinfo->dobj.namespace->dobj.name));
+	appendPQExpBuffer(delq, "%s;\n",
+						  fmtId(statsextinfo->dobj.name));
+
+	if (statsextinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)
+			ArchiveEntry(fout, statsextinfo->dobj.catId,
+						 statsextinfo->dobj.dumpId,
+						 statsextinfo->dobj.name,
+						 tbinfo->dobj.namespace->dobj.name,
+						 NULL,
+						 tbinfo->rolname, false,
+						 "STATISTICS", SECTION_POST_DATA,
+						 q->data, delq->data, NULL,
+						 NULL, 0,
+						 NULL, NULL);
+
+	/* Dump Statistics Comments */
+	if (statsextinfo->dobj.dump & DUMP_COMPONENT_COMMENT)
+		dumpComment(fout, labelq->data,
+					tbinfo->dobj.namespace->dobj.name,
+					tbinfo->rolname,
+					statsextinfo->dobj.catId, 0,
+					statsextinfo->dobj.dumpId);
+
+	destroyPQExpBuffer(q);
+	destroyPQExpBuffer(delq);
+	destroyPQExpBuffer(labelq);
+}
+
 /*
 * dumpConstraint
 *	  write out to fout a user-defined constraint
@ -17266,6 +17418,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
 				addObjectDependency(postDataBound, dobj->dumpId);
 				break;
 			case DO_INDEX:
+			case DO_STATSEXT:
 			case DO_REFRESH_MATVIEW:
 			case DO_TRIGGER:
 			case DO_EVENT_TRIGGER:
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@ -56,6 +56,7 @@ typedef enum
 	DO_TABLE,
 	DO_ATTRDEF,
 	DO_INDEX,
+	DO_STATSEXT,
 	DO_RULE,
 	DO_TRIGGER,
 	DO_CONSTRAINT,
@ -362,6 +363,13 @@ typedef struct _indxInfo
 	int			relpages;		/* relpages of the underlying table */
 } IndxInfo;

+typedef struct _statsExtInfo
+{
+	DumpableObject dobj;
+	TableInfo  *statsexttable;	/* link to table the stats ext is for */
+	char	   *statsextdef;
+} StatsExtInfo;
+
 typedef struct _ruleInfo
 {
 	DumpableObject dobj;
@ -682,6 +690,7 @@ extern void getOwnedSeqs(Archive *fout, TableInfo tblinfo[], int numTables);
 extern InhInfo *getInherits(Archive *fout, int *numInherits);
 extern PartInfo *getPartitions(Archive *fout, int *numPartitions);
 extern void getIndexes(Archive *fout, TableInfo tblinfo[], int numTables);
+extern void getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables);
 extern void getConstraints(Archive *fout, TableInfo tblinfo[], int numTables);
 extern RuleInfo *getRules(Archive *fout, int *numRules);
 extern void getTriggers(Archive *fout, TableInfo tblinfo[], int numTables);
--- a/src/bin/pg_dump/pg_dump_sort.c
+++ b/src/bin/pg_dump/pg_dump_sort.c
@ -53,10 +53,11 @@ static const int dbObjectTypePriority[] =
 	18,							/* DO_TABLE */
 	20,							/* DO_ATTRDEF */
 	28,							/* DO_INDEX */
-	29,							/* DO_RULE */
-	30,							/* DO_TRIGGER */
+	29,							/* DO_STATSEXT */
+	30,							/* DO_RULE */
+	31,							/* DO_TRIGGER */
 	27,							/* DO_CONSTRAINT */
-	31,							/* DO_FK_CONSTRAINT */
+	32,							/* DO_FK_CONSTRAINT */
 	2,							/* DO_PROCLANG */
 	10,							/* DO_CAST */
 	23,							/* DO_TABLE_DATA */
@ -1291,6 +1292,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize)
 					 "INDEX %s  (ID %d OID %u)",
 					 obj->name, obj->dumpId, obj->catId.oid);
 			return;
+		case DO_STATSEXT:
+			snprintf(buf, bufsize,
+					 "STATISTICS %s  (ID %d OID %u)",
+					 obj->name, obj->dumpId, obj->catId.oid);
+			return;
 		case DO_REFRESH_MATVIEW:
 			snprintf(buf, bufsize,
 					 "REFRESH MATERIALIZED VIEW %s  (ID %d OID %u)",
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@ -2320,6 +2320,57 @@ describeOneTableDetails(const char *schemaname,
 			PQclear(result);
 		}

+		/* print any extended statistics */
+		if (pset.sversion >= 100000)
+		{
+			printfPQExpBuffer(&buf,
+							  "SELECT oid, stanamespace::regnamespace AS nsp, staname, stakeys,\n"
+							  "  (SELECT pg_catalog.string_agg(pg_catalog.quote_ident(attname::text),', ') \n"
+						   "    FROM ((SELECT pg_catalog.unnest(stakeys) AS attnum) s\n"
+			   "         JOIN pg_catalog.pg_attribute a ON (starelid = a.attrelid AND\n"
+							  "a.attnum = s.attnum AND not attisdropped))) AS columns,\n"
+							  "  (staenabled::char[] @> '{d}'::char[]) AS ndist_enabled\n"
+			  "FROM pg_catalog.pg_statistic_ext stat WHERE starelid  = '%s'\n"
+			  "ORDER BY 1;",
+							  oid);
+
+			result = PSQLexec(buf.data);
+			if (!result)
+				goto error_return;
+			else
+				tuples = PQntuples(result);
+
+			if (tuples > 0)
+			{
+				printTableAddFooter(&cont, _("Statistics:"));
+
+				for (i = 0; i < tuples; i++)
+				{
+					int		cnt = 0;
+
+					printfPQExpBuffer(&buf, "    ");
+
+					/* statistics name (qualified with namespace) */
+					appendPQExpBuffer(&buf, "\"%s.%s\" WITH (",
+									  PQgetvalue(result, i, 1),
+									  PQgetvalue(result, i, 2));
+
+					/* options */
+					if (strcmp(PQgetvalue(result, i, 5), "t") == 0)
+					{
+						appendPQExpBufferStr(&buf, "ndistinct");
+						cnt++;
+					}
+
+					appendPQExpBuffer(&buf, ") ON (%s)",
+									  PQgetvalue(result, i, 4));
+
+					printTableAddFooter(&cont, buf.data);
+				}
+			}
+			PQclear(result);
+		}
+
 		/* print rules */
 		if (tableinfo.hasrules && tableinfo.relkind != RELKIND_MATVIEW)
 		{
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@ -53,6 +53,6 @@
 */

 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	201703241
+#define CATALOG_VERSION_NO	201703242

 #endif
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@ -147,6 +147,7 @@ typedef enum ObjectClass
 	OCLASS_REWRITE,				/* pg_rewrite */
 	OCLASS_TRIGGER,				/* pg_trigger */
 	OCLASS_SCHEMA,				/* pg_namespace */
+	OCLASS_STATISTIC_EXT,		/* pg_statistic_ext */
 	OCLASS_TSPARSER,			/* pg_ts_parser */
 	OCLASS_TSDICT,				/* pg_ts_dict */
 	OCLASS_TSTEMPLATE,			/* pg_ts_template */
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@ -119,6 +119,7 @@ extern void RemoveAttrDefault(Oid relid, AttrNumber attnum,
 				  DropBehavior behavior, bool complain, bool internal);
 extern void RemoveAttrDefaultById(Oid attrdefId);
 extern void RemoveStatistics(Oid relid, AttrNumber attnum);
+extern void RemoveStatisticsExt(Oid relid, AttrNumber attnum);

 extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno,
 						  bool relhasoids);
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@ -182,6 +182,13 @@ DECLARE_UNIQUE_INDEX(pg_largeobject_loid_pn_index, 2683, on pg_largeobject using
 DECLARE_UNIQUE_INDEX(pg_largeobject_metadata_oid_index, 2996, on pg_largeobject_metadata using btree(oid oid_ops));
 #define LargeObjectMetadataOidIndexId	2996

+DECLARE_UNIQUE_INDEX(pg_statistic_ext_oid_index, 3380, on pg_statistic_ext using btree(oid oid_ops));
+#define StatisticExtOidIndexId	3380
+DECLARE_UNIQUE_INDEX(pg_statistic_ext_name_index, 3997, on pg_statistic_ext using btree(staname name_ops, stanamespace oid_ops));
+#define StatisticExtNameIndexId 3997
+DECLARE_INDEX(pg_statistic_ext_relid_index, 3379, on pg_statistic_ext using btree(starelid oid_ops));
+#define StatisticExtRelidIndexId 3379
+
 DECLARE_UNIQUE_INDEX(pg_namespace_nspname_index, 2684, on pg_namespace using btree(nspname name_ops));
 #define NamespaceNameIndexId  2684
 DECLARE_UNIQUE_INDEX(pg_namespace_oid_index, 2685, on pg_namespace using btree(oid oid_ops));
--- a/src/include/catalog/namespace.h
+++ b/src/include/catalog/namespace.h
@ -141,6 +141,8 @@ extern Oid	get_collation_oid(List *collname, bool missing_ok);
 extern Oid	get_conversion_oid(List *conname, bool missing_ok);
 extern Oid	FindDefaultConversionProc(int32 for_encoding, int32 to_encoding);

+extern Oid	get_statistics_oid(List *names, bool missing_ok);
+
 /* initialization & transaction cleanup code */
 extern void InitializeSearchPath(void);
 extern void AtEOXact_Namespace(bool isCommit, bool parallel);
--- a/src/include/catalog/pg_cast.h
+++ b/src/include/catalog/pg_cast.h
@ -254,6 +254,10 @@ DATA(insert (	23	 18   78 e f ));
 /* pg_node_tree can be coerced to, but not from, text */
 DATA(insert (  194	 25    0 i b ));

+/* pg_ndistinct can be coerced to, but not from, bytea and text */
+DATA(insert (  3361  17    0 i b ));
+DATA(insert (  3361  25    0 i i ));
+
 /*
 * Datetime category
 */
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@ -1983,6 +1983,8 @@ DESCR("select statement of a view");
 DATA(insert OID = 1642 (  pg_get_userbyid	   PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 19 "26" _null_ _null_ _null_ _null_ _null_ pg_get_userbyid _null_ _null_ _null_ ));
 DESCR("role name by OID (with fallback)");
 DATA(insert OID = 1643 (  pg_get_indexdef	   PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_indexdef _null_ _null_ _null_ ));
+DESCR("extended statistics description");
+DATA(insert OID = 3415 (  pg_get_statisticsextdef	   PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_statisticsextdef _null_ _null_ _null_ ));
 DESCR("index description");
 DATA(insert OID = 3352 (  pg_get_partkeydef    PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_partkeydef _null_ _null_ _null_ ));
 DESCR("partition key description");
@ -2758,6 +2760,15 @@ DESCR("current user privilege on any column by rel name");
 DATA(insert OID = 3029 (  has_any_column_privilege	   PGNSP PGUID 12 10 0 0 0 f f f f t f s s 2 0 16 "26 25" _null_ _null_ _null_ _null_ _null_ has_any_column_privilege_id _null_ _null_ _null_ ));
 DESCR("current user privilege on any column by rel oid");

+DATA(insert OID = 3355 (  pg_ndistinct_in	PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3361 "2275" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_in _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3356 (  pg_ndistinct_out	PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_out _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3357 (  pg_ndistinct_recv PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 3361 "2281" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_recv _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3358 (  pg_ndistinct_send PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_send _null_ _null_ _null_ ));
+DESCR("I/O");
+
 DATA(insert OID = 1928 (  pg_stat_get_numscans			PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ ));
 DESCR("statistics: number of scans done for table/index");
 DATA(insert OID = 1929 (  pg_stat_get_tuples_returned	PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ ));
--- a/src/include/catalog/pg_statistic_ext.h
+++ b/src/include/catalog/pg_statistic_ext.h
@ -0,0 +1,75 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_statistic_ext.h
+ *	  definition of the system "extended statistic" relation (pg_statistic_ext)
+ *	  along with the relation's initial contents.
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/catalog/pg_statistic_ext.h
+ *
+ * NOTES
+ *	  the genbki.pl script reads this file and generates .bki
+ *	  information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_STATISTIC_EXT_H
+#define PG_STATISTIC_EXT_H
+
+#include "catalog/genbki.h"
+
+/* ----------------
+ *		pg_statistic_ext definition.  cpp turns this into
+ *		typedef struct FormData_pg_statistic_ext
+ * ----------------
+ */
+#define StatisticExtRelationId	3381
+
+CATALOG(pg_statistic_ext,3381)
+{
+	/* These fields form the unique key for the entry: */
+	Oid			starelid;		/* relation containing attributes */
+	NameData	staname;		/* statistics name */
+	Oid			stanamespace;	/* OID of namespace containing this statistics */
+	Oid			staowner;		/* statistics owner */
+
+	/*
+	 * variable-length fields start here, but we allow direct access to
+	 * stakeys
+	 */
+	int2vector	stakeys;		/* array of column keys */
+
+#ifdef CATALOG_VARLEN
+	char		staenabled[1] BKI_FORCE_NOT_NULL;	/* statistic types
+													 * requested to build */
+	pg_ndistinct standistinct;	/* ndistinct coefficients (serialized) */
+#endif
+
+} FormData_pg_statistic_ext;
+
+/* ----------------
+ *		Form_pg_statistic_ext corresponds to a pointer to a tuple with
+ *		the format of pg_statistic_ext relation.
+ * ----------------
+ */
+typedef FormData_pg_statistic_ext *Form_pg_statistic_ext;
+
+/* ----------------
+ *		compiler constants for pg_statistic_ext
+ * ----------------
+ */
+#define Natts_pg_statistic_ext					7
+#define Anum_pg_statistic_ext_starelid			1
+#define Anum_pg_statistic_ext_staname			2
+#define Anum_pg_statistic_ext_stanamespace		3
+#define Anum_pg_statistic_ext_staowner			4
+#define Anum_pg_statistic_ext_stakeys			5
+#define Anum_pg_statistic_ext_staenabled		6
+#define Anum_pg_statistic_ext_standistinct		7
+
+#define STATS_EXT_NDISTINCT		'd'
+
+#endif   /* PG_STATISTIC_EXT_H */
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@ -364,6 +364,10 @@ DATA(insert OID = 194 ( pg_node_tree	PGNSP PGUID -1 f b S f t \054 0 0 0 pg_node
 DESCR("string representing an internal node tree");
 #define PGNODETREEOID	194

+DATA(insert OID = 3361 ( pg_ndistinct		PGNSP PGUID -1 f b S f t \054 0 0 0 pg_ndistinct_in pg_ndistinct_out pg_ndistinct_recv pg_ndistinct_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ ));
+DESCR("multivariate ndistinct coefficients");
+#define PGNDISTINCTOID	3361
+
 DATA(insert OID = 32 ( pg_ddl_command	PGNSP PGUID SIZEOF_POINTER t p P f t \054 0 0 0 pg_ddl_command_in pg_ddl_command_out pg_ddl_command_recv pg_ddl_command_send - - - ALIGNOF_POINTER p f 0 -1 0 0 _null_ _null_ _null_ ));
 DESCR("internal type for passing CollectedCommand");
 #define PGDDLCOMMANDOID 32
--- a/src/include/catalog/toasting.h
+++ b/src/include/catalog/toasting.h
@ -53,6 +53,7 @@ DECLARE_TOAST(pg_proc, 2836, 2837);
 DECLARE_TOAST(pg_rewrite, 2838, 2839);
 DECLARE_TOAST(pg_seclabel, 3598, 3599);
 DECLARE_TOAST(pg_statistic, 2840, 2841);
+DECLARE_TOAST(pg_statistic_ext, 3439, 3440);
 DECLARE_TOAST(pg_trigger, 2336, 2337);

 /* shared catalogs */
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@ -77,6 +77,10 @@ extern ObjectAddress DefineOperator(List *names, List *parameters);
 extern void RemoveOperatorById(Oid operOid);
 extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt);

+/* commands/statscmds.c */
+extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt);
+extern void RemoveStatisticsById(Oid statsOid);
+
 /* commands/aggregatecmds.c */
 extern ObjectAddress DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle,
 				List *parameters);
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@ -279,6 +279,7 @@ typedef enum NodeTag
 	T_PlaceHolderInfo,
 	T_MinMaxAggInfo,
 	T_PlannerParamItem,
+	T_StatisticExtInfo,

 	/*
 	 * TAGS FOR MEMORY NODES (memnodes.h)
@ -424,6 +425,7 @@ typedef enum NodeTag
 	T_CreateSubscriptionStmt,
 	T_AlterSubscriptionStmt,
 	T_DropSubscriptionStmt,
+	T_CreateStatsStmt,
 	T_AlterCollationStmt,

 	/*
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@ -1593,6 +1593,7 @@ typedef enum ObjectType
 	OBJECT_SCHEMA,
 	OBJECT_SEQUENCE,
 	OBJECT_SUBSCRIPTION,
+	OBJECT_STATISTIC_EXT,
 	OBJECT_TABCONSTRAINT,
 	OBJECT_TABLE,
 	OBJECT_TABLESPACE,
@ -2656,6 +2657,20 @@ typedef struct IndexStmt
 	bool		if_not_exists;	/* just do nothing if index already exists? */
 } IndexStmt;

+/* ----------------------
+ *		Create Statistics Statement
+ * ----------------------
+ */
+typedef struct CreateStatsStmt
+{
+	NodeTag		type;
+	List	   *defnames;		/* qualified name (list of Value strings) */
+	RangeVar   *relation;		/* relation to build statistics on */
+	List	   *keys;			/* String nodes naming referenced columns */
+	List	   *options;		/* list of DefElem */
+	bool		if_not_exists;	/* do nothing if statistics already exists */
+} CreateStatsStmt;
+
 /* ----------------------
 *		Create Function Statement
 * ----------------------
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@ -529,6 +529,7 @@ typedef struct RelOptInfo
 	List	   *lateral_vars;	/* LATERAL Vars and PHVs referenced by rel */
 	Relids		lateral_referencers;	/* rels that reference me laterally */
 	List	   *indexlist;		/* list of IndexOptInfo */
+	List	   *statlist;		/* list of StatisticExtInfo */
 	BlockNumber pages;			/* size estimates derived from pg_class */
 	double		tuples;
 	double		allvisfrac;
@ -668,6 +669,24 @@ typedef struct ForeignKeyOptInfo
 	List	   *rinfos[INDEX_MAX_KEYS];
 } ForeignKeyOptInfo;

+/*
+ * StatisticExtInfo
+ *		Information about extended statistics for planning/optimization
+ *
+ * This contains information about which columns are covered by the
+ * statistics (stakeys), which options were requested while adding the
+ * statistics (*_enabled), and which kinds of statistics were actually
+ * built and are available for the optimizer (*_built).
+ */
+typedef struct StatisticExtInfo
+{
+	NodeTag		type;
+
+	Oid			statOid;		/* OID of the statistics row */
+	RelOptInfo *rel;			/* back-link to index's table */
+	char		kind;			/* statistic kind of this entry */
+	Bitmapset  *keys;			/* attnums of the columns covered */
+} StatisticExtInfo;

 /*
 * EquivalenceClasses
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@ -0,0 +1,64 @@
+/*-------------------------------------------------------------------------
+ *
+ * extended_stats_internal.h
+ *	  POSTGRES extended statistics internal declarations
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/include/statistics/extended_stats_internal.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef EXTENDED_STATS_INTERNAL_H
+#define EXTENDED_STATS_INTERNAL_H
+
+#include "utils/sortsupport.h"
+#include "statistics/statistics.h"
+
+
+typedef struct
+{
+	Oid			eqopr;			/* '=' operator for datatype, if any */
+	Oid			eqfunc;			/* and associated function */
+	Oid			ltopr;			/* '<' operator for datatype, if any */
+} StdAnalyzeData;
+
+typedef struct
+{
+	Datum		value;			/* a data value */
+	int			tupno;			/* position index for tuple it came from */
+} ScalarItem;
+
+/* multi-sort */
+typedef struct MultiSortSupportData
+{
+	int			ndims;			/* number of dimensions supported by the */
+	SortSupportData ssup[1];	/* sort support data for each dimension */
+} MultiSortSupportData;
+
+typedef MultiSortSupportData *MultiSortSupport;
+
+typedef struct SortItem
+{
+	Datum	   *values;
+	bool	   *isnull;
+} SortItem;
+
+extern MVNDistinct *statext_ndistinct_build(double totalrows,
+						int numrows, HeapTuple *rows,
+						Bitmapset *attrs, VacAttrStats **stats);
+extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
+extern MVNDistinct *statext_ndistinct_deserialize(bytea *data);
+
+extern MultiSortSupport multi_sort_init(int ndims);
+extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
+						 Oid oper);
+extern int	multi_sort_compare(const void *a, const void *b, void *arg);
+extern int multi_sort_compare_dim(int dim, const SortItem * a,
+					   const SortItem * b, MultiSortSupport mss);
+extern int multi_sort_compare_dims(int start, int end, const SortItem * a,
+						const SortItem * b, MultiSortSupport mss);
+
+#endif   /* EXTENDED_STATS_INTERNAL_H */
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@ -0,0 +1,47 @@
+/*-------------------------------------------------------------------------
+ *
+ * statistics.h
+ *	  Extended statistics and selectivity estimation functions.
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/statistics/statistics.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STATISTICS_H
+#define STATISTICS_H
+
+#include "commands/vacuum.h"
+
+#define STATS_MAX_DIMENSIONS	8		/* max number of attributes */
+
+/* Multivariate distinct coefficients */
+#define STATS_NDISTINCT_MAGIC		0xA352BFA4	/* struct identifier */
+#define STATS_NDISTINCT_TYPE_BASIC	1	/* struct version */
+
+/* MVDistinctItem represents a single combination of columns */
+typedef struct MVNDistinctItem
+{
+	double		ndistinct;		/* ndistinct value for this combination */
+	Bitmapset  *attrs;			/* attr numbers of items */
+} MVNDistinctItem;
+
+/* A MVNDistinct object, comprising all possible combinations of columns */
+typedef struct MVNDistinct
+{
+	uint32		magic;			/* magic constant marker */
+	uint32		type;			/* type of ndistinct (BASIC) */
+	uint32		nitems;			/* number of items in the statistic */
+	MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER];
+} MVNDistinct;
+
+extern MVNDistinct *statext_ndistinct_load(Oid mvoid);
+
+extern void BuildRelationExtStatistics(Relation onerel, double totalrows,
+						   int numrows, HeapTuple *rows,
+						   int natts, VacAttrStats **vacattrstats);
+extern bool statext_is_kind_built(HeapTuple htup, char kind);
+
+#endif   /* STATISTICS_H */
--- a/src/include/utils/acl.h
+++ b/src/include/utils/acl.h
@ -192,6 +192,7 @@ typedef enum AclObjectKind
 	ACL_KIND_OPFAMILY,			/* pg_opfamily */
 	ACL_KIND_COLLATION,			/* pg_collation */
 	ACL_KIND_CONVERSION,		/* pg_conversion */
+	ACL_KIND_STATISTICS,		/* pg_statistic_ext */
 	ACL_KIND_TABLESPACE,		/* pg_tablespace */
 	ACL_KIND_TSDICTIONARY,		/* pg_ts_dict */
 	ACL_KIND_TSCONFIGURATION,	/* pg_ts_config */
@ -326,6 +327,7 @@ extern bool pg_event_trigger_ownercheck(Oid et_oid, Oid roleid);
 extern bool pg_extension_ownercheck(Oid ext_oid, Oid roleid);
 extern bool pg_publication_ownercheck(Oid pub_oid, Oid roleid);
 extern bool pg_subscription_ownercheck(Oid sub_oid, Oid roleid);
+extern bool pg_statistics_ownercheck(Oid stat_oid, Oid roleid);
 extern bool has_createrole_privilege(Oid roleid);
 extern bool has_bypassrls_privilege(Oid roleid);

--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@ -92,6 +92,7 @@ typedef struct RelationData
 	bool		rd_isvalid;		/* relcache entry is valid */
 	char		rd_indexvalid;	/* state of rd_indexlist: 0 = not valid, 1 =
 								 * valid, 2 = temporarily forced */
+	bool		rd_statvalid;	/* is rd_statlist valid? */

 	/*
 	 * rd_createSubid is the ID of the highest subtransaction the rel has
@ -136,6 +137,9 @@ typedef struct RelationData
 	Oid			rd_pkindex;		/* OID of primary key, if any */
 	Oid			rd_replidindex; /* OID of replica identity index, if any */

+	/* data managed by RelationGetStatExtList: */
+	List	   *rd_statlist;	/* list of OIDs of extended stats */
+
 	/* data managed by RelationGetIndexAttrBitmap: */
 	Bitmapset  *rd_indexattr;	/* identifies columns used in indexes */
 	Bitmapset  *rd_keyattr;		/* cols that can be ref'd by foreign keys */
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@ -39,6 +39,7 @@ extern void RelationClose(Relation relation);
 */
 extern List *RelationGetFKeyList(Relation relation);
 extern List *RelationGetIndexList(Relation relation);
+extern List *RelationGetStatExtList(Relation relation);
 extern Oid	RelationGetOidIndex(Relation relation);
 extern Oid	RelationGetPrimaryKeyIndex(Relation relation);
 extern Oid	RelationGetReplicaIndex(Relation relation);
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@ -86,6 +86,8 @@ enum SysCacheIdentifier
 	PUBLICATIONRELMAP,
 	RULERELNAME,
 	SEQRELID,
+	STATEXTNAMENSP,
+	STATEXTOID,
 	STATRELATTINH,
 	SUBSCRIPTIONOID,
 	SUBSCRIPTIONNAME,
--- a/src/test/regress/expected/alter_generic.out
+++ b/src/test/regress/expected/alter_generic.out
@ -496,6 +496,48 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD
 ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4);
 ERROR:  function 2(integer,integer) does not exist in operator family "alt_opf18"
 DROP OPERATOR FAMILY alt_opf18 USING btree;
+--
+-- Statistics
+--
+SET SESSION AUTHORIZATION regress_alter_user1;
+CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER);
+CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
+CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat2;   -- failed (name conflict)
+ERROR:  statistics "alt_stat2" already exists in schema "alt_nsp1"
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat3;   -- failed (name conflict)
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2;  -- failed (no role membership)
+ERROR:  must be member of role "regress_alter_user2"
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3;  -- OK
+ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2;    -- OK
+SET SESSION AUTHORIZATION regress_alter_user2;
+CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
+CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
+ALTER STATISTICS alt_stat3 RENAME TO alt_stat4;    -- failed (not owner)
+ERROR:  must be owner of statistics alt_stat3
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat4;    -- OK
+ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner)
+ERROR:  must be owner of statistics alt_stat3
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership)
+ERROR:  must be member of role "regress_alter_user3"
+ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2;		-- failed (not owner)
+ERROR:  must be owner of statistics alt_stat3
+ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2;		-- failed (name conflict)
+ERROR:  statistics "alt_stat2" already exists in schema "alt_nsp2"
+RESET SESSION AUTHORIZATION;
+SELECT nspname, staname, rolname
+  FROM pg_statistic_ext s, pg_namespace n, pg_authid a
+ WHERE s.stanamespace = n.oid AND s.staowner = a.oid
+   AND n.nspname in ('alt_nsp1', 'alt_nsp2')
+ ORDER BY nspname, staname;
+ nspname  |  staname  |       rolname       
+----------+-----------+---------------------
+ alt_nsp1 | alt_stat2 | regress_alter_user2
+ alt_nsp1 | alt_stat3 | regress_alter_user1
+ alt_nsp1 | alt_stat4 | regress_alter_user2
+ alt_nsp2 | alt_stat2 | regress_alter_user3
+(4 rows)
+
 --
 -- Text Search Dictionary
 --
@ -639,7 +681,7 @@ DROP LANGUAGE alt_lang3 CASCADE;
 DROP LANGUAGE alt_lang4 CASCADE;
 ERROR:  language "alt_lang4" does not exist
 DROP SCHEMA alt_nsp1 CASCADE;
-NOTICE:  drop cascades to 26 other objects
+NOTICE:  drop cascades to 27 other objects
 DETAIL:  drop cascades to function alt_func3(integer)
 drop cascades to function alt_agg3(integer)
 drop cascades to function alt_func4(integer)
@ -656,6 +698,7 @@ drop cascades to operator family alt_opc1 for access method hash
 drop cascades to operator family alt_opc2 for access method hash
 drop cascades to operator family alt_opf4 for access method hash
 drop cascades to operator family alt_opf2 for access method hash
+drop cascades to table alt_regress_1
 drop cascades to text search dictionary alt_ts_dict3
 drop cascades to text search dictionary alt_ts_dict4
 drop cascades to text search dictionary alt_ts_dict2
--- a/src/test/regress/expected/object_address.out
+++ b/src/test/regress/expected/object_address.out
@ -39,6 +39,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL (
 CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable;
 CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT);
 WARNING:  tables were not subscribed, you will have to run ALTER SUBSCRIPTION ... REFRESH PUBLICATION to subscribe the tables
+CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable;
 -- test some error cases
 SELECT pg_get_object_address('stone', '{}', '{}');
 ERROR:  unrecognized object type "stone"
@ -409,7 +410,8 @@ WITH objects (type, name, args) AS (VALUES
 				('access method', '{btree}', '{}'),
 				('publication', '{addr_pub}', '{}'),
 				('publication relation', '{addr_nsp, gentable}', '{addr_pub}'),
-				('subscription', '{addr_sub}', '{}')
+				('subscription', '{addr_sub}', '{}'),
+				('statistics', '{addr_nsp, gentable_stat}', '{}')
        )
 SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
 	-- test roundtrip through pg_identify_object_as_address
@ -457,6 +459,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
 trigger                   |            |                   | t on addr_nsp.gentable                                               | t
 operator family           | pg_catalog | integer_ops       | pg_catalog.integer_ops USING btree                                   | t
 policy                    |            |                   | genpol on addr_nsp.gentable                                          | t
+ statistics                | addr_nsp   | gentable_stat     | addr_nsp.gentable_stat                                               | t
 collation                 | pg_catalog | "default"         | pg_catalog."default"                                                 | t
 transform                 |            |                   | for integer on language sql                                          | t
 text search dictionary    | addr_nsp   | addr_ts_dict      | addr_nsp.addr_ts_dict                                                | t
@ -466,7 +469,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
 subscription              |            | addr_sub          | addr_sub                                                             | t
 publication               |            | addr_pub          | addr_pub                                                             | t
 publication relation      |            |                   | gentable in publication addr_pub                                     | t
-(45 rows)
+(46 rows)

 ---
 --- Cleanup resources
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@ -823,11 +823,12 @@ WHERE c.castmethod = 'b' AND
 text              | character         |        0 | i
 character varying | character         |        0 | i
 pg_node_tree      | text              |        0 | i
+ pg_ndistinct      | bytea             |        0 | i
 cidr              | inet              |        0 | i
 xml               | text              |        0 | a
 xml               | character varying |        0 | a
 xml               | character         |        0 | a
-(7 rows)
+(8 rows)

 -- **************** pg_conversion ****************
 -- Look for illegal values in pg_conversion fields.
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@ -2164,6 +2164,14 @@ pg_stats| SELECT n.nspname AS schemaname,
     JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum))))
     LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
  WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid))));
+pg_stats_ext| SELECT n.nspname AS schemaname,
+    c.relname AS tablename,
+    s.staname,
+    s.stakeys AS attnums,
+    length((s.standistinct)::text) AS ndistbytes
+   FROM ((pg_statistic_ext s
+     JOIN pg_class c ON ((c.oid = s.starelid)))
+     LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)));
 pg_tables| SELECT n.nspname AS schemaname,
    c.relname AS tablename,
    pg_get_userbyid(c.relowner) AS tableowner,
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@ -142,6 +142,7 @@ pg_shdepend|t
 pg_shdescription|t
 pg_shseclabel|t
 pg_statistic|t
+pg_statistic_ext|t
 pg_subscription|t
 pg_subscription_rel|t
 pg_tablespace|t
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@ -0,0 +1,155 @@
+-- Generic extended statistics support
+-- Ensure stats are dropped sanely
+CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+DROP STATISTICS ab1_a_b_stats;
+CREATE SCHEMA regress_schema_2;
+CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1;
+DROP STATISTICS regress_schema_2.ab1_a_b_stats;
+-- Ensure statistics are dropped when columns are
+CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1;
+CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1;
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+ALTER TABLE ab1 DROP COLUMN a;
+\d ab1
+                Table "public.ab1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ b      | integer |           |          | 
+ c      | integer |           |          | 
+Statistics:
+    "public.ab1_b_c_stats" WITH (ndistinct) ON (b, c)
+
+DROP TABLE ab1;
+-- Ensure things work sanely with SET STATISTICS 0
+CREATE TABLE ab1 (a INTEGER, b INTEGER);
+ALTER TABLE ab1 ALTER a SET STATISTICS 0;
+INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+ANALYZE ab1;
+ERROR:  extended statistics could not be collected for column "a" of relation public.ab1
+HINT:  Consider ALTER TABLE "public"."ab1" ALTER "a" SET STATISTICS -1
+ALTER TABLE ab1 ALTER a SET STATISTICS -1;
+ANALYZE ab1;
+DROP TABLE ab1;
+-- n-distinct tests
+CREATE TABLE ndistinct (
+    filler1 TEXT,
+    filler2 NUMERIC,
+    a INT,
+    b INT,
+    filler3 DATE,
+    c INT,
+    d INT
+);
+-- unknown column
+CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
+ERROR:  column "unknown_column" referenced in statistics does not exist
+-- single column
+CREATE STATISTICS s10 ON (a) FROM ndistinct;
+ERROR:  statistics require at least 2 columns
+-- single column, duplicated
+CREATE STATISTICS s10 ON (a,a) FROM ndistinct;
+ERROR:  duplicate column name in statistics definition
+-- two columns, one duplicated
+CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
+ERROR:  duplicate column name in statistics definition
+-- correct command
+CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
+-- perfectly correlated groups
+INSERT INTO ndistinct (a, b, c, filler1)
+     SELECT i/100, i/100, i/100, cash_words(i::money)
+       FROM generate_series(1,10000) s(i);
+ANALYZE ndistinct;
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+ staenabled |                                          standistinct                                          
+------------+------------------------------------------------------------------------------------------------
+ {d}        | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}]
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
+   Group Key: a, b
+   ->  Seq Scan on ndistinct
+(3 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
+   Group Key: a, b, c
+   ->  Seq Scan on ndistinct
+(3 rows)
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+         QUERY PLAN          
+-----------------------------
+ HashAggregate
+   Group Key: a, b, c, d
+   ->  Seq Scan on ndistinct
+(3 rows)
+
+TRUNCATE TABLE ndistinct;
+-- partially correlated groups
+INSERT INTO ndistinct (a, b, c)
+     SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE ndistinct;
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+ staenabled |                                          standistinct                                          
+------------+------------------------------------------------------------------------------------------------
+ {d}        | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}]
+(1 row)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ HashAggregate  (cost=230.00..232.01 rows=201 width=16)
+   Group Key: a, b
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=8)
+(3 rows)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ HashAggregate  (cost=255.00..257.01 rows=201 width=20)
+   Group Key: a, b, c
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=12)
+(3 rows)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ HashAggregate  (cost=280.00..290.00 rows=1000 width=24)
+   Group Key: a, b, c, d
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=16)
+(3 rows)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ HashAggregate  (cost=255.00..265.00 rows=1000 width=20)
+   Group Key: b, c, d
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=12)
+(3 rows)
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ HashAggregate  (cost=230.00..240.00 rows=1000 width=16)
+   Group Key: a, d
+   ->  Seq Scan on ndistinct  (cost=0.00..155.00 rows=10000 width=8)
+(3 rows)
+
+DROP TABLE ndistinct;
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@ -59,7 +59,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR
 -- Look for types that should have an array type according to their typtype,
 -- but don't.  We exclude composites here because we have not bothered to
 -- make array types corresponding to the system catalogs' rowtypes.
-- NOTE: as of v10, this check finds pg_node_tree and smgr.
+-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr.
 SELECT p1.oid, p1.typname
 FROM pg_type as p1
 WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
@ -67,11 +67,12 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
    (SELECT 1 FROM pg_type as p2
     WHERE p2.typname = ('_' || p1.typname)::name AND
           p2.typelem = p1.oid and p1.typarray = p2.oid);
- oid |   typname    
-----+--------------
- 194 | pg_node_tree
- 210 | smgr
-(2 rows)
+ oid  |   typname    
+------+--------------
+  194 | pg_node_tree
+ 3361 | pg_ndistinct
+  210 | smgr
+(3 rows)

 -- Make sure typarray points to a varlena array type of our own base
 SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype,
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@ -89,7 +89,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview
 # ----------
 # Another group of parallel tests
 # ----------
-test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan
+test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan stats_ext

 # rules cannot run concurrently with any test that creates a view
 test: rules psql_crosstab amutils
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@ -130,6 +130,7 @@ test: misc_functions
 test: sysviews
 test: tsrf
 test: tidscan
+test: stats_ext
 test: rules
 test: psql_crosstab
 test: select_parallel
--- a/src/test/regress/sql/alter_generic.sql
+++ b/src/test/regress/sql/alter_generic.sql
@ -433,6 +433,37 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD
 ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4);
 DROP OPERATOR FAMILY alt_opf18 USING btree;

+--
+-- Statistics
+--
+SET SESSION AUTHORIZATION regress_alter_user1;
+CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER);
+CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
+CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
+
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat2;   -- failed (name conflict)
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat3;   -- failed (name conflict)
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2;  -- failed (no role membership)
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3;  -- OK
+ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2;    -- OK
+
+SET SESSION AUTHORIZATION regress_alter_user2;
+CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
+CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
+
+ALTER STATISTICS alt_stat3 RENAME TO alt_stat4;    -- failed (not owner)
+ALTER STATISTICS alt_stat1 RENAME TO alt_stat4;    -- OK
+ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner)
+ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership)
+ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2;		-- failed (not owner)
+ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2;		-- failed (name conflict)
+
+RESET SESSION AUTHORIZATION;
+SELECT nspname, staname, rolname
+  FROM pg_statistic_ext s, pg_namespace n, pg_authid a
+ WHERE s.stanamespace = n.oid AND s.staowner = a.oid
+   AND n.nspname in ('alt_nsp1', 'alt_nsp2')
+ ORDER BY nspname, staname;

 --
 -- Text Search Dictionary
--- a/src/test/regress/sql/object_address.sql
+++ b/src/test/regress/sql/object_address.sql
@ -41,6 +41,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL (
 	TO SQL WITH FUNCTION int4recv(internal));
 CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable;
 CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT);
+CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable;

 -- test some error cases
 SELECT pg_get_object_address('stone', '{}', '{}');
@ -185,7 +186,8 @@ WITH objects (type, name, args) AS (VALUES
 				('access method', '{btree}', '{}'),
 				('publication', '{addr_pub}', '{}'),
 				('publication relation', '{addr_nsp, gentable}', '{addr_pub}'),
-				('subscription', '{addr_sub}', '{}')
+				('subscription', '{addr_sub}', '{}'),
+				('statistics', '{addr_nsp, gentable_stat}', '{}')
        )
 SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
 	-- test roundtrip through pg_identify_object_as_address
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@ -0,0 +1,102 @@
+-- Generic extended statistics support
+
+-- Ensure stats are dropped sanely
+CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+DROP STATISTICS ab1_a_b_stats;
+
+CREATE SCHEMA regress_schema_2;
+CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1;
+DROP STATISTICS regress_schema_2.ab1_a_b_stats;
+
+-- Ensure statistics are dropped when columns are
+CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1;
+CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1;
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+ALTER TABLE ab1 DROP COLUMN a;
+\d ab1
+DROP TABLE ab1;
+
+-- Ensure things work sanely with SET STATISTICS 0
+CREATE TABLE ab1 (a INTEGER, b INTEGER);
+ALTER TABLE ab1 ALTER a SET STATISTICS 0;
+INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
+CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
+ANALYZE ab1;
+ALTER TABLE ab1 ALTER a SET STATISTICS -1;
+ANALYZE ab1;
+DROP TABLE ab1;
+
+
+-- n-distinct tests
+CREATE TABLE ndistinct (
+    filler1 TEXT,
+    filler2 NUMERIC,
+    a INT,
+    b INT,
+    filler3 DATE,
+    c INT,
+    d INT
+);
+
+-- unknown column
+CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
+
+-- single column
+CREATE STATISTICS s10 ON (a) FROM ndistinct;
+
+-- single column, duplicated
+CREATE STATISTICS s10 ON (a,a) FROM ndistinct;
+
+-- two columns, one duplicated
+CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
+
+-- correct command
+CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
+
+-- perfectly correlated groups
+INSERT INTO ndistinct (a, b, c, filler1)
+     SELECT i/100, i/100, i/100, cash_words(i::money)
+       FROM generate_series(1,10000) s(i);
+
+ANALYZE ndistinct;
+
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+
+EXPLAIN (COSTS off)
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+
+TRUNCATE TABLE ndistinct;
+
+-- partially correlated groups
+INSERT INTO ndistinct (a, b, c)
+     SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE ndistinct;
+
+SELECT staenabled, standistinct
+  FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+
+EXPLAIN
+ SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
+
+DROP TABLE ndistinct;
--- a/src/test/regress/sql/type_sanity.sql
+++ b/src/test/regress/sql/type_sanity.sql
@ -53,7 +53,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR
 -- Look for types that should have an array type according to their typtype,
 -- but don't.  We exclude composites here because we have not bothered to
 -- make array types corresponding to the system catalogs' rowtypes.
-- NOTE: as of v10, this check finds pg_node_tree and smgr.
+-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr.

 SELECT p1.oid, p1.typname
 FROM pg_type as p1