diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 0f8703af5a..f103d914a6 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -7385,8 +7385,22 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
        <literal>d</literal> for n-distinct statistics,
        <literal>f</literal> for functional dependency statistics, and
        <literal>m</literal> for most common values (MCV) list statistics
+       <literal>e</literal> for expression statistics
       </para></entry>
      </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>stxexprs</structfield> <type>pg_node_tree</type>
+      </para>
+      <para>
+       Expression trees (in <function>nodeToString()</function>
+       representation) for statistics object attributes that are not simple
+       column references.  This is a list with one element per expression.
+       Null if all statistics object attributes are simple references.
+      </para></entry>
+     </row>
+
     </tbody>
    </tgroup>
   </table>
@@ -7452,7 +7466,7 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
        (references <link linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link>.<structfield>oid</structfield>)
       </para>
       <para>
-       Extended statistic object containing the definition for this data
+       Extended statistics object containing the definition for this data
       </para></entry>
      </row>
 
@@ -7484,6 +7498,15 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
        <structname>pg_mcv_list</structname> type
       </para></entry>
      </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>stxexprs</structfield> <type>pg_node_tree</type>
+      </para>
+      <para>
+       A list of any expressions covered by this statistics object.
+      </para></entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
@@ -7637,6 +7660,16 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
        see <xref linkend="logical-replication-publication"/>.
       </para></entry>
      </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>stxdexpr</structfield> <type>pg_statistic[]</type>
+      </para>
+      <para>
+       Per-expression statistics, serialized as an array of
+       <structname>pg_statistic</structname> type
+      </para></entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
@@ -9444,6 +9477,11 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
       <entry>extended planner statistics</entry>
      </row>
 
+     <row>
+      <entry><link linkend="view-pg-stats-ext-exprs"><structname>pg_stats_ext_exprs</structname></link></entry>
+      <entry>extended planner statistics for expressions</entry>
+     </row>
+
      <row>
       <entry><link linkend="view-pg-tables"><structname>pg_tables</structname></link></entry>
       <entry>tables</entry>
@@ -12696,10 +12734,19 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
        (references <link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.<structfield>attname</structfield>)
       </para>
       <para>
-       Name of the column described by this row
+       Names of the columns included in the extended statistics object
       </para></entry>
      </row>
 
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>exprs</structfield> <type>text[]</type>
+      </para>
+      <para>
+       Expressions included in the extended statistics object
+      </para></entry>
+      </row>
+
      <row>
       <entry role="catalog_table_entry"><para role="column_definition">
        <structfield>inherited</structfield> <type>bool</type>
@@ -12851,7 +12898,8 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
 
   <para>
    The view <structname>pg_stats_ext</structname> provides access to
-   the information stored in the <link
+   information about each extended statistics object in the database,
+   combining information stored in the <link
    linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link>
    and <link linkend="catalog-pg-statistic-ext-data"><structname>pg_statistic_ext_data</structname></link>
    catalogs.  This view allows access only to rows of
@@ -12908,7 +12956,7 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
        (references <link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.<structfield>nspname</structfield>)
       </para>
       <para>
-       Name of schema containing extended statistic
+       Name of schema containing extended statistics object
       </para></entry>
      </row>
 
@@ -12918,7 +12966,7 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
        (references <link linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link>.<structfield>stxname</structfield>)
       </para>
       <para>
-       Name of extended statistics
+       Name of extended statistics object
       </para></entry>
      </row>
 
@@ -12928,7 +12976,7 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
        (references <link linkend="catalog-pg-authid"><structname>pg_authid</structname></link>.<structfield>rolname</structfield>)
       </para>
       <para>
-       Owner of the extended statistics
+       Owner of the extended statistics object
       </para></entry>
      </row>
 
@@ -12938,7 +12986,7 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
        (references <link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.<structfield>attname</structfield>)
       </para>
       <para>
-       Names of the columns the extended statistics is defined on
+       Names of the columns the extended statistics object is defined on
       </para></entry>
      </row>
 
@@ -12947,7 +12995,7 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
        <structfield>kinds</structfield> <type>char[]</type>
       </para>
       <para>
-       Types of extended statistics enabled for this record
+       Types of extended statistics object enabled for this record
       </para></entry>
      </row>
 
@@ -13032,6 +13080,237 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
 
  </sect1>
 
+ <sect1 id="view-pg-stats-ext-exprs">
+  <title><structname>pg_stats_ext_exprs</structname></title>
+
+  <indexterm zone="view-pg-stats-ext-exprs">
+   <primary>pg_stats_ext_exprs</primary>
+  </indexterm>
+
+  <para>
+   The view <structname>pg_stats_ext_exprs</structname> provides access to
+   information about all expressions included in extended statistics objects,
+   combining information stored in the <link
+   linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link>
+   and <link linkend="catalog-pg-statistic-ext-data"><structname>pg_statistic_ext_data</structname></link>
+   catalogs.  This view allows access only to rows of
+   <link linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link> and <link linkend="catalog-pg-statistic-ext-data"><structname>pg_statistic_ext_data</structname></link>
+   that correspond to tables the user has permission to read, and therefore
+   it is safe to allow public read access to this view.
+  </para>
+
+  <para>
+   <structname>pg_stats_ext_exprs</structname> is also designed to present
+   the information in a more readable format than the underlying catalogs
+   &mdash; at the cost that its schema must be extended whenever the structure
+   of statistics in <link linkend="catalog-pg-statistic"><structname>pg_statistic</structname></link> changes.
+  </para>
+
+  <table>
+   <title><structname>pg_stats_ext_exprs</structname> Columns</title>
+   <tgroup cols="1">
+    <thead>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       Column Type
+      </para>
+      <para>
+       Description
+      </para></entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>schemaname</structfield> <type>name</type>
+       (references <link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.<structfield>nspname</structfield>)
+      </para>
+      <para>
+       Name of schema containing table
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>tablename</structfield> <type>name</type>
+       (references <link linkend="catalog-pg-class"><structname>pg_class</structname></link>.<structfield>relname</structfield>)
+      </para>
+      <para>
+       Name of table the statistics object is defined on
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>statistics_schemaname</structfield> <type>name</type>
+       (references <link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.<structfield>nspname</structfield>)
+      </para>
+      <para>
+       Name of schema containing extended statistics object
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>statistics_name</structfield> <type>name</type>
+       (references <link linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link>.<structfield>stxname</structfield>)
+      </para>
+      <para>
+       Name of extended statistics object
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>statistics_owner</structfield> <type>name</type>
+       (references <link linkend="catalog-pg-authid"><structname>pg_authid</structname></link>.<structfield>rolname</structfield>)
+      </para>
+      <para>
+       Owner of the extended statistics object
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>expr</structfield> <type>text</type>
+      </para>
+      <para>
+       Expression included in the extended statistics object
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>null_frac</structfield> <type>float4</type>
+      </para>
+      <para>
+       Fraction of expression entries that are null
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>avg_width</structfield> <type>int4</type>
+      </para>
+      <para>
+       Average width in bytes of expression's entries
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>n_distinct</structfield> <type>float4</type>
+      </para>
+      <para>
+       If greater than zero, the estimated number of distinct values in the
+       expression.  If less than zero, the negative of the number of distinct
+       values divided by the number of rows.  (The negated form is used when
+       <command>ANALYZE</command> believes that the number of distinct values is
+       likely to increase as the table grows; the positive form is used when
+       the expression seems to have a fixed number of possible values.)  For
+       example, -1 indicates a unique expression in which the number of distinct
+       values is the same as the number of rows.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>most_common_vals</structfield> <type>anyarray</type>
+      </para>
+      <para>
+       A list of the most common values in the expression. (Null if
+       no values seem to be more common than any others.)
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>most_common_freqs</structfield> <type>float4[]</type>
+      </para>
+      <para>
+       A list of the frequencies of the most common values,
+       i.e., number of occurrences of each divided by total number of rows.
+       (Null when <structfield>most_common_vals</structfield> is.)
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>histogram_bounds</structfield> <type>anyarray</type>
+      </para>
+      <para>
+       A list of values that divide the expression's values into groups of
+       approximately equal population.  The values in
+       <structfield>most_common_vals</structfield>, if present, are omitted from this
+       histogram calculation.  (This expression is null if the expression data type
+       does not have a <literal>&lt;</literal> operator or if the
+       <structfield>most_common_vals</structfield> list accounts for the entire
+       population.)
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>correlation</structfield> <type>float4</type>
+      </para>
+      <para>
+       Statistical correlation between physical row ordering and
+       logical ordering of the expression values.  This ranges from -1 to +1.
+       When the value is near -1 or +1, an index scan on the expression will
+       be estimated to be cheaper than when it is near zero, due to reduction
+       of random access to the disk.  (This expression is null if the expression's
+       data type does not have a <literal>&lt;</literal> operator.)
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>most_common_elems</structfield> <type>anyarray</type>
+      </para>
+      <para>
+       A list of non-null element values most often appearing within values of
+       the expression. (Null for scalar types.)
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>most_common_elem_freqs</structfield> <type>float4[]</type>
+      </para>
+      <para>
+       A list of the frequencies of the most common element values, i.e., the
+       fraction of rows containing at least one instance of the given value.
+       Two or three additional values follow the per-element frequencies;
+       these are the minimum and maximum of the preceding per-element
+       frequencies, and optionally the frequency of null elements.
+       (Null when <structfield>most_common_elems</structfield> is.)
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>elem_count_histogram</structfield> <type>float4[]</type>
+      </para>
+      <para>
+       A histogram of the counts of distinct non-null element values within the
+       values of the expression, followed by the average number of distinct
+       non-null elements.  (Null for scalar types.)
+      </para></entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+  <para>
+   The maximum number of entries in the array fields can be controlled on a
+   column-by-column basis using the <link linkend="sql-altertable"><command>ALTER
+   TABLE SET STATISTICS</command></link> command, or globally by setting the
+   <xref linkend="guc-default-statistics-target"/> run-time parameter.
+  </para>
+
+ </sect1>
+
  <sect1 id="view-pg-tables">
   <title><structname>pg_tables</structname></title>
 
diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml
index 4363be50c3..988f4c573f 100644
--- a/doc/src/sgml/ref/create_statistics.sgml
+++ b/doc/src/sgml/ref/create_statistics.sgml
@@ -21,9 +21,13 @@ PostgreSQL documentation
 
  <refsynopsisdiv>
 <synopsis>
+CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="parameter">statistics_name</replaceable>
+    ON ( <replaceable class="parameter">expression</replaceable> )
+    FROM <replaceable class="parameter">table_name</replaceable>
+
 CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="parameter">statistics_name</replaceable>
     [ ( <replaceable class="parameter">statistics_kind</replaceable> [, ... ] ) ]
-    ON <replaceable class="parameter">column_name</replaceable>, <replaceable class="parameter">column_name</replaceable> [, ...]
+    ON { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) }, { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [, ...]
     FROM <replaceable class="parameter">table_name</replaceable>
 </synopsis>
 
@@ -39,6 +43,19 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="parameter">statistics_na
    database and will be owned by the user issuing the command.
   </para>
 
+  <para>
+   The <command>CREATE STATISTICS</command> command has two basic forms. The
+   first form allows univariate statistics for a single expression to be
+   collected, providing benefits similar to an expression index without the
+   overhead of index maintenance.  This form does not allow the statistics
+   kind to be specified, since the various statistics kinds refer only to
+   multivariate statistics.  The second form of the command allows
+   multivariate statistics on multiple columns and/or expressions to be
+   collected, optionally specifying which statistics kinds to include.  This
+   form will also automatically cause univariate statistics to be collected on
+   any expressions included in the list.
+  </para>
+
   <para>
    If a schema name is given (for example, <literal>CREATE STATISTICS
    myschema.mystat ...</literal>) then the statistics object is created in the
@@ -79,14 +96,16 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="parameter">statistics_na
     <term><replaceable class="parameter">statistics_kind</replaceable></term>
     <listitem>
      <para>
-      A statistics kind to be computed in this statistics object.
+      A multivariate statistics kind to be computed in this statistics object.
       Currently supported kinds are
       <literal>ndistinct</literal>, which enables n-distinct statistics,
       <literal>dependencies</literal>, which enables functional
       dependency statistics, and <literal>mcv</literal> which enables
       most-common values lists.
       If this clause is omitted, all supported statistics kinds are
-      included in the statistics object.
+      included in the statistics object. Univariate expression statistics are
+      built automatically if the statistics definition includes any complex
+      expressions rather than just simple column references.
       For more information, see <xref linkend="planner-stats-extended"/>
       and <xref linkend="multivariate-statistics-examples"/>.
      </para>
@@ -98,8 +117,22 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="parameter">statistics_na
     <listitem>
      <para>
       The name of a table column to be covered by the computed statistics.
-      At least two column names must be given;  the order of the column names
-      is insignificant.
+      This is only allowed when building multivariate statistics.  At least
+      two column names or expressions must be specified, and their order is
+      not significant.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">expression</replaceable></term>
+    <listitem>
+     <para>
+      An expression to be covered by the computed statistics.  This may be
+      used to build univariate statistics on a single expression, or as part
+      of a list of multiple column names and/or expressions to build
+      multivariate statistics.  In the latter case, separate univariate
+      statistics are built automatically for each expression in the list.
      </para>
     </listitem>
    </varlistentry>
@@ -125,6 +158,13 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="parameter">statistics_na
    reading it.  Once created, however, the ownership of the statistics
    object is independent of the underlying table(s).
   </para>
+
+  <para>
+   Expression statistics are per-expression and are similar to creating an
+   index on the expression, except that they avoid the overhead of index
+   maintenance. Expression statistics are built automatically for each
+   expression in the statistics object definition.
+  </para>
  </refsect1>
 
  <refsect1 id="sql-createstatistics-examples">
@@ -196,6 +236,72 @@ EXPLAIN ANALYZE SELECT * FROM t2 WHERE (a = 1) AND (b = 2);
    in the table, allowing it to generate better estimates in both cases.
   </para>
 
+  <para>
+   Create table <structname>t3</structname> with a single timestamp column,
+   and run queries using expressions on that column.  Without extended
+   statistics, the planner has no information about the data distribution for
+   the expressions, and uses default estimates.  The planner also does not
+   realize that the value of the date truncated to the month is fully
+   determined by the value of the date truncated to the day. Then expression
+   and ndistinct statistics are built on those two expressions:
+
+<programlisting>
+CREATE TABLE t3 (
+    a   timestamp
+);
+
+INSERT INTO t3 SELECT i FROM generate_series('2020-01-01'::timestamp,
+                                             '2020-12-31'::timestamp,
+                                             '1 minute'::interval) s(i);
+
+ANALYZE t3;
+
+-- the number of matching rows will be drastically underestimated:
+EXPLAIN ANALYZE SELECT * FROM t3
+  WHERE date_trunc('month', a) = '2020-01-01'::timestamp;
+
+EXPLAIN ANALYZE SELECT * FROM t3
+  WHERE date_trunc('day', a) BETWEEN '2020-01-01'::timestamp
+                                 AND '2020-06-30'::timestamp;
+
+EXPLAIN ANALYZE SELECT date_trunc('month', a), date_trunc('day', a)
+   FROM t3 GROUP BY 1, 2;
+
+-- build ndistinct statistics on the pair of expressions (per-expression
+-- statistics are built automatically)
+CREATE STATISTICS s3 (ndistinct) ON date_trunc('month', a), date_trunc('day', a) FROM t3;
+
+ANALYZE t3;
+
+-- now the row count estimates are more accurate:
+EXPLAIN ANALYZE SELECT * FROM t3
+  WHERE date_trunc('month', a) = '2020-01-01'::timestamp;
+
+EXPLAIN ANALYZE SELECT * FROM t3
+  WHERE date_trunc('day', a) BETWEEN '2020-01-01'::timestamp
+                                 AND '2020-06-30'::timestamp;
+
+EXPLAIN ANALYZE SELECT date_trunc('month', a), date_trunc('day', a)
+   FROM t3 GROUP BY 1, 2;
+</programlisting>
+
+   Without expression and ndistinct statistics, the planner has no information
+   about the number of distinct values for the expressions, and has to rely
+   on default estimates. The equality and range conditions are assumed to have
+   0.5% selectivity, and the number of distinct values in the expression is
+   assumed to be the same as for the column (i.e. unique). This results in a
+   significant underestimate of the row count in the first two queries. Moreover,
+   the planner has no information about the relationship between the expressions,
+   so it assumes the two <literal>WHERE</literal> and <literal>GROUP BY</literal>
+   conditions are independent, and multiplies their selectivities together to
+   arrive at a severe overestimate of the group count in the aggregate query.
+   This is further exacerbated by the lack of accurate statistics for the
+   expressions, forcing the planner to use a default ndistinct estimate for the
+   expression derived from ndistinct for the column. With such statistics, the
+   planner recognizes that the conditions are correlated, and arrives at much
+   more accurate estimates.
+  </para>
+
  </refsect1>
 
  <refsect1>
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 70bc2123df..e36a9602c1 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -49,15 +49,15 @@ include $(top_srcdir)/src/backend/common.mk
 
 # Note: the order of this list determines the order in which the catalog
 # header files are assembled into postgres.bki.  BKI_BOOTSTRAP catalogs
-# must appear first, and there are reputedly other, undocumented ordering
-# dependencies.
+# must appear first, and pg_statistic before pg_statistic_ext_data, and
+# there are reputedly other, undocumented ordering dependencies.
 CATALOG_HEADERS := \
 	pg_proc.h pg_type.h pg_attribute.h pg_class.h \
 	pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
 	pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
 	pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
-	pg_statistic_ext.h pg_statistic_ext_data.h \
-	pg_statistic.h pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \
+	pg_statistic.h pg_statistic_ext.h pg_statistic_ext_data.h \
+	pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \
 	pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
 	pg_database.h pg_db_role_setting.h pg_tablespace.h \
 	pg_authid.h pg_auth_members.h pg_shdepend.h pg_shdescription.h \
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 0dca65dc7b..6483563204 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -264,6 +264,7 @@ CREATE VIEW pg_stats_ext WITH (security_barrier) AS
                   JOIN pg_attribute a
                        ON (a.attrelid = s.stxrelid AND a.attnum = k)
            ) AS attnames,
+           pg_get_statisticsobjdef_expressions(s.oid) as exprs,
            s.stxkind AS kinds,
            sd.stxdndistinct AS n_distinct,
            sd.stxddependencies AS dependencies,
@@ -290,6 +291,74 @@ CREATE VIEW pg_stats_ext WITH (security_barrier) AS
                 WHERE NOT has_column_privilege(c.oid, a.attnum, 'select') )
     AND (c.relrowsecurity = false OR NOT row_security_active(c.oid));
 
+CREATE VIEW pg_stats_ext_exprs WITH (security_barrier) AS
+    SELECT cn.nspname AS schemaname,
+           c.relname AS tablename,
+           sn.nspname AS statistics_schemaname,
+           s.stxname AS statistics_name,
+           pg_get_userbyid(s.stxowner) AS statistics_owner,
+           stat.expr,
+           (stat.a).stanullfrac AS null_frac,
+           (stat.a).stawidth AS avg_width,
+           (stat.a).stadistinct AS n_distinct,
+           (CASE
+               WHEN (stat.a).stakind1 = 1 THEN (stat.a).stavalues1
+               WHEN (stat.a).stakind2 = 1 THEN (stat.a).stavalues2
+               WHEN (stat.a).stakind3 = 1 THEN (stat.a).stavalues3
+               WHEN (stat.a).stakind4 = 1 THEN (stat.a).stavalues4
+               WHEN (stat.a).stakind5 = 1 THEN (stat.a).stavalues5
+           END) AS most_common_vals,
+           (CASE
+               WHEN (stat.a).stakind1 = 1 THEN (stat.a).stanumbers1
+               WHEN (stat.a).stakind2 = 1 THEN (stat.a).stanumbers2
+               WHEN (stat.a).stakind3 = 1 THEN (stat.a).stanumbers3
+               WHEN (stat.a).stakind4 = 1 THEN (stat.a).stanumbers4
+               WHEN (stat.a).stakind5 = 1 THEN (stat.a).stanumbers5
+           END) AS most_common_freqs,
+           (CASE
+               WHEN (stat.a).stakind1 = 2 THEN (stat.a).stavalues1
+               WHEN (stat.a).stakind2 = 2 THEN (stat.a).stavalues2
+               WHEN (stat.a).stakind3 = 2 THEN (stat.a).stavalues3
+               WHEN (stat.a).stakind4 = 2 THEN (stat.a).stavalues4
+               WHEN (stat.a).stakind5 = 2 THEN (stat.a).stavalues5
+           END) AS histogram_bounds,
+           (CASE
+               WHEN (stat.a).stakind1 = 3 THEN (stat.a).stanumbers1[1]
+               WHEN (stat.a).stakind2 = 3 THEN (stat.a).stanumbers2[1]
+               WHEN (stat.a).stakind3 = 3 THEN (stat.a).stanumbers3[1]
+               WHEN (stat.a).stakind4 = 3 THEN (stat.a).stanumbers4[1]
+               WHEN (stat.a).stakind5 = 3 THEN (stat.a).stanumbers5[1]
+           END) correlation,
+           (CASE
+               WHEN (stat.a).stakind1 = 4 THEN (stat.a).stavalues1
+               WHEN (stat.a).stakind2 = 4 THEN (stat.a).stavalues2
+               WHEN (stat.a).stakind3 = 4 THEN (stat.a).stavalues3
+               WHEN (stat.a).stakind4 = 4 THEN (stat.a).stavalues4
+               WHEN (stat.a).stakind5 = 4 THEN (stat.a).stavalues5
+           END) AS most_common_elems,
+           (CASE
+               WHEN (stat.a).stakind1 = 4 THEN (stat.a).stanumbers1
+               WHEN (stat.a).stakind2 = 4 THEN (stat.a).stanumbers2
+               WHEN (stat.a).stakind3 = 4 THEN (stat.a).stanumbers3
+               WHEN (stat.a).stakind4 = 4 THEN (stat.a).stanumbers4
+               WHEN (stat.a).stakind5 = 4 THEN (stat.a).stanumbers5
+           END) AS most_common_elem_freqs,
+           (CASE
+               WHEN (stat.a).stakind1 = 5 THEN (stat.a).stanumbers1
+               WHEN (stat.a).stakind2 = 5 THEN (stat.a).stanumbers2
+               WHEN (stat.a).stakind3 = 5 THEN (stat.a).stanumbers3
+               WHEN (stat.a).stakind4 = 5 THEN (stat.a).stanumbers4
+               WHEN (stat.a).stakind5 = 5 THEN (stat.a).stanumbers5
+           END) AS elem_count_histogram
+    FROM pg_statistic_ext s JOIN pg_class c ON (c.oid = s.stxrelid)
+         LEFT JOIN pg_statistic_ext_data sd ON (s.oid = sd.stxoid)
+         LEFT JOIN pg_namespace cn ON (cn.oid = c.relnamespace)
+         LEFT JOIN pg_namespace sn ON (sn.oid = s.stxnamespace)
+         JOIN LATERAL (
+             SELECT unnest(pg_get_statisticsobjdef_expressions(s.oid)) AS expr,
+                    unnest(sd.stxdexpr)::pg_statistic AS a
+         ) stat ON (stat.expr IS NOT NULL);
+
 -- unprivileged users may read pg_statistic_ext but not pg_statistic_ext_data
 REVOKE ALL on pg_statistic_ext_data FROM public;
 
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index 2bae205845..df4768952d 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -29,6 +29,8 @@
 #include "commands/comment.h"
 #include "commands/defrem.h"
 #include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
 #include "statistics/statistics.h"
 #include "utils/builtins.h"
 #include "utils/fmgroids.h"
@@ -62,7 +64,8 @@ ObjectAddress
 CreateStatistics(CreateStatsStmt *stmt)
 {
 	int16		attnums[STATS_MAX_DIMENSIONS];
-	int			numcols = 0;
+	int			nattnums = 0;
+	int			numcols;
 	char	   *namestr;
 	NameData	stxname;
 	Oid			statoid;
@@ -74,21 +77,25 @@ CreateStatistics(CreateStatsStmt *stmt)
 	Datum		datavalues[Natts_pg_statistic_ext_data];
 	bool		datanulls[Natts_pg_statistic_ext_data];
 	int2vector *stxkeys;
+	List	   *stxexprs = NIL;
+	Datum		exprsDatum;
 	Relation	statrel;
 	Relation	datarel;
 	Relation	rel = NULL;
 	Oid			relid;
 	ObjectAddress parentobject,
 				myself;
-	Datum		types[3];		/* one for each possible type of statistic */
+	Datum		types[4];		/* one for each possible type of statistic */
 	int			ntypes;
 	ArrayType  *stxkind;
 	bool		build_ndistinct;
 	bool		build_dependencies;
 	bool		build_mcv;
+	bool		build_expressions;
 	bool		requested_type = false;
 	int			i;
 	ListCell   *cell;
+	ListCell   *cell2;
 
 	Assert(IsA(stmt, CreateStatsStmt));
 
@@ -190,101 +197,124 @@ CreateStatistics(CreateStatsStmt *stmt)
 	}
 
 	/*
-	 * Currently, we only allow simple column references in the expression
-	 * list.  That will change someday, and again the grammar already supports
-	 * it so we have to enforce restrictions here.  For now, we can convert
-	 * the expression list to a simple array of attnums.  While at it, enforce
-	 * some constraints.
+	 * Make sure no more than STATS_MAX_DIMENSIONS columns are used. There
+	 * might be duplicates and so on, but we'll deal with those later.
+	 */
+	numcols = list_length(stmt->exprs);
+	if (numcols > STATS_MAX_DIMENSIONS)
+		ereport(ERROR,
+				(errcode(ERRCODE_TOO_MANY_COLUMNS),
+				 errmsg("cannot have more than %d columns in statistics",
+						STATS_MAX_DIMENSIONS)));
+
+	/*
+	 * Convert the expression list to a simple array of attnums, but also keep
+	 * a list of more complex expressions.  While at it, enforce some
+	 * constraints.
+	 *
+	 * XXX We do only the bare minimum to separate simple attribute and
+	 * complex expressions - for example "(a)" will be treated as a complex
+	 * expression. No matter how elaborate the check is, there'll always be a
+	 * way around it, if the user is determined (consider e.g. "(a+0)"), so
+	 * it's not worth protecting against it.
 	 */
 	foreach(cell, stmt->exprs)
 	{
 		Node	   *expr = (Node *) lfirst(cell);
-		ColumnRef  *cref;
-		char	   *attname;
+		StatsElem  *selem;
 		HeapTuple	atttuple;
 		Form_pg_attribute attForm;
 		TypeCacheEntry *type;
 
-		if (!IsA(expr, ColumnRef))
+		/*
+		 * We should not get anything else than StatsElem, given the grammar.
+		 * But let's keep it as a safety.
+		 */
+		if (!IsA(expr, StatsElem))
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("only simple column references are allowed in CREATE STATISTICS")));
-		cref = (ColumnRef *) expr;
+					 errmsg("only simple column references and expressions are allowed in CREATE STATISTICS")));
 
-		if (list_length(cref->fields) != 1)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("only simple column references are allowed in CREATE STATISTICS")));
-		attname = strVal((Value *) linitial(cref->fields));
+		selem = (StatsElem *) expr;
 
-		atttuple = SearchSysCacheAttName(relid, attname);
-		if (!HeapTupleIsValid(atttuple))
-			ereport(ERROR,
-					(errcode(ERRCODE_UNDEFINED_COLUMN),
-					 errmsg("column \"%s\" does not exist",
-							attname)));
-		attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
+		if (selem->name)		/* column reference */
+		{
+			char	   *attname;
 
-		/* Disallow use of system attributes in extended stats */
-		if (attForm->attnum <= 0)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("statistics creation on system columns is not supported")));
+			attname = selem->name;
 
-		/* Disallow data types without a less-than operator */
-		type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
-		if (type->lt_opr == InvalidOid)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
-							attname, format_type_be(attForm->atttypid))));
+			atttuple = SearchSysCacheAttName(relid, attname);
+			if (!HeapTupleIsValid(atttuple))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" does not exist",
+								attname)));
+			attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
 
-		/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
-		if (numcols >= STATS_MAX_DIMENSIONS)
-			ereport(ERROR,
-					(errcode(ERRCODE_TOO_MANY_COLUMNS),
-					 errmsg("cannot have more than %d columns in statistics",
-							STATS_MAX_DIMENSIONS)));
+			/* Disallow use of system attributes in extended stats */
+			if (attForm->attnum <= 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("statistics creation on system columns is not supported")));
 
-		attnums[numcols] = attForm->attnum;
-		numcols++;
-		ReleaseSysCache(atttuple);
+			/* Disallow data types without a less-than operator */
+			type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
+			if (type->lt_opr == InvalidOid)
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
+								attname, format_type_be(attForm->atttypid))));
+
+			attnums[nattnums] = attForm->attnum;
+			nattnums++;
+			ReleaseSysCache(atttuple);
+		}
+		else					/* expression */
+		{
+			Node	   *expr = selem->expr;
+			Oid			atttype;
+
+			Assert(expr != NULL);
+
+			/*
+			 * Disallow data types without a less-than operator.
+			 *
+			 * We ignore this for statistics on a single expression, in which
+			 * case we'll build the regular statistics only (and that code can
+			 * deal with such data types).
+			 */
+			if (list_length(stmt->exprs) > 1)
+			{
+				atttype = exprType(expr);
+				type = lookup_type_cache(atttype, TYPECACHE_LT_OPR);
+				if (type->lt_opr == InvalidOid)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("expression cannot be used in multivariate statistics because its type %s has no default btree operator class",
+									format_type_be(atttype))));
+			}
+
+			stxexprs = lappend(stxexprs, expr);
+		}
 	}
 
-	/*
-	 * Check that at least two columns were specified in the statement. The
-	 * upper bound was already checked in the loop above.
-	 */
-	if (numcols < 2)
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
-				 errmsg("extended statistics require at least 2 columns")));
-
-	/*
-	 * Sort the attnums, which makes detecting duplicates somewhat easier, and
-	 * it does not hurt (it does not affect the efficiency, unlike for
-	 * indexes, for example).
-	 */
-	qsort(attnums, numcols, sizeof(int16), compare_int16);
-
-	/*
-	 * Check for duplicates in the list of columns. The attnums are sorted so
-	 * just check consecutive elements.
-	 */
-	for (i = 1; i < numcols; i++)
-	{
-		if (attnums[i] == attnums[i - 1])
-			ereport(ERROR,
-					(errcode(ERRCODE_DUPLICATE_COLUMN),
-					 errmsg("duplicate column name in statistics definition")));
-	}
-
-	/* Form an int2vector representation of the sorted column list */
-	stxkeys = buildint2vector(attnums, numcols);
-
 	/*
 	 * Parse the statistics kinds.
+	 *
+	 * First check that if this is the case with a single expression, there
+	 * are no statistics kinds specified (we don't allow that for the simple
+	 * CREATE STATISTICS form).
 	 */
+	if ((list_length(stmt->exprs) == 1) && (list_length(stxexprs) == 1))
+	{
+		/* statistics kinds not specified */
+		if (list_length(stmt->stat_types) > 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("when building statistics on a single expression, statistics kinds may not be specified")));
+	}
+
+	/* OK, let's check that we recognize the statistics kinds. */
 	build_ndistinct = false;
 	build_dependencies = false;
 	build_mcv = false;
@@ -313,14 +343,91 @@ CreateStatistics(CreateStatsStmt *stmt)
 					 errmsg("unrecognized statistics kind \"%s\"",
 							type)));
 	}
-	/* If no statistic type was specified, build them all. */
-	if (!requested_type)
+
+	/*
+	 * If no statistic type was specified, build them all (but only when the
+	 * statistics is defined on more than one column/expression).
+	 */
+	if ((!requested_type) && (numcols >= 2))
 	{
 		build_ndistinct = true;
 		build_dependencies = true;
 		build_mcv = true;
 	}
 
+	/*
+	 * When there are non-trivial expressions, build the expression stats
+	 * automatically. This allows calculating good estimates for stats that
+	 * consider per-clause estimates (e.g. functional dependencies).
+	 */
+	build_expressions = (list_length(stxexprs) > 0);
+
+	/*
+	 * Check that at least two columns were specified in the statement, or
+	 * that we're building statistics on a single expression.
+	 */
+	if ((numcols < 2) && (list_length(stxexprs) != 1))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+				 errmsg("extended statistics require at least 2 columns")));
+
+	/*
+	 * Sort the attnums, which makes detecting duplicates somewhat easier, and
+	 * it does not hurt (it does not matter for the contents, unlike for
+	 * indexes, for example).
+	 */
+	qsort(attnums, nattnums, sizeof(int16), compare_int16);
+
+	/*
+	 * Check for duplicates in the list of columns. The attnums are sorted so
+	 * just check consecutive elements.
+	 */
+	for (i = 1; i < nattnums; i++)
+	{
+		if (attnums[i] == attnums[i - 1])
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_COLUMN),
+					 errmsg("duplicate column name in statistics definition")));
+	}
+
+	/*
+	 * Check for duplicate expressions. We do two loops, counting the
+	 * occurrences of each expression. This is O(N^2) but we only allow small
+	 * number of expressions and it's not executed often.
+	 *
+	 * XXX We don't cross-check attributes and expressions, because it does
+	 * not seem worth it. In principle we could check that expressions don't
+	 * contain trivial attribute references like "(a)", but the reasoning is
+	 * similar to why we don't bother with extracting columns from
+	 * expressions. It's either expensive or very easy to defeat for
+	 * determined user, and there's no risk if we allow such statistics (the
+	 * statistics is useless, but harmless).
+	 */
+	foreach(cell, stxexprs)
+	{
+		Node	   *expr1 = (Node *) lfirst(cell);
+		int			cnt = 0;
+
+		foreach(cell2, stxexprs)
+		{
+			Node	   *expr2 = (Node *) lfirst(cell2);
+
+			if (equal(expr1, expr2))
+				cnt += 1;
+		}
+
+		/* every expression should find at least itself */
+		Assert(cnt >= 1);
+
+		if (cnt > 1)
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_COLUMN),
+					 errmsg("duplicate expression in statistics definition")));
+	}
+
+	/* Form an int2vector representation of the sorted column list */
+	stxkeys = buildint2vector(attnums, nattnums);
+
 	/* construct the char array of enabled statistic types */
 	ntypes = 0;
 	if (build_ndistinct)
@@ -329,9 +436,23 @@ CreateStatistics(CreateStatsStmt *stmt)
 		types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES);
 	if (build_mcv)
 		types[ntypes++] = CharGetDatum(STATS_EXT_MCV);
+	if (build_expressions)
+		types[ntypes++] = CharGetDatum(STATS_EXT_EXPRESSIONS);
 	Assert(ntypes > 0 && ntypes <= lengthof(types));
 	stxkind = construct_array(types, ntypes, CHAROID, 1, true, TYPALIGN_CHAR);
 
+	/* convert the expressions (if any) to a text datum */
+	if (stxexprs != NIL)
+	{
+		char	   *exprsString;
+
+		exprsString = nodeToString(stxexprs);
+		exprsDatum = CStringGetTextDatum(exprsString);
+		pfree(exprsString);
+	}
+	else
+		exprsDatum = (Datum) 0;
+
 	statrel = table_open(StatisticExtRelationId, RowExclusiveLock);
 
 	/*
@@ -351,6 +472,10 @@ CreateStatistics(CreateStatsStmt *stmt)
 	values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys);
 	values[Anum_pg_statistic_ext_stxkind - 1] = PointerGetDatum(stxkind);
 
+	values[Anum_pg_statistic_ext_stxexprs - 1] = exprsDatum;
+	if (exprsDatum == (Datum) 0)
+		nulls[Anum_pg_statistic_ext_stxexprs - 1] = true;
+
 	/* insert it into pg_statistic_ext */
 	htup = heap_form_tuple(statrel->rd_att, values, nulls);
 	CatalogTupleInsert(statrel, htup);
@@ -373,6 +498,7 @@ CreateStatistics(CreateStatsStmt *stmt)
 	datanulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
 	datanulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
 	datanulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
+	datanulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = true;
 
 	/* insert it into pg_statistic_ext_data */
 	htup = heap_form_tuple(datarel->rd_att, datavalues, datanulls);
@@ -396,12 +522,41 @@ CreateStatistics(CreateStatsStmt *stmt)
 	 */
 	ObjectAddressSet(myself, StatisticExtRelationId, statoid);
 
-	for (i = 0; i < numcols; i++)
+	/* add dependencies for plain column references */
+	for (i = 0; i < nattnums; i++)
 	{
 		ObjectAddressSubSet(parentobject, RelationRelationId, relid, attnums[i]);
 		recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
 	}
 
+	/*
+	 * If there are no dependencies on a column, give the statistics an auto
+	 * dependency on the whole table.  In most cases, this will be redundant,
+	 * but it might not be if the statistics expressions contain no Vars
+	 * (which might seem strange but possible). This is consistent with what
+	 * we do for indexes in index_create.
+	 *
+	 * XXX We intentionally don't consider the expressions before adding this
+	 * dependency, because recordDependencyOnSingleRelExpr may not create any
+	 * dependencies for whole-row Vars.
+	 */
+	if (!nattnums)
+	{
+		ObjectAddressSet(parentobject, RelationRelationId, relid);
+		recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
+	}
+
+	/*
+	 * Store dependencies on anything mentioned in statistics expressions,
+	 * just like we do for index expressions.
+	 */
+	if (stxexprs)
+		recordDependencyOnSingleRelExpr(&myself,
+										(Node *) stxexprs,
+										relid,
+										DEPENDENCY_NORMAL,
+										DEPENDENCY_AUTO, false, true);
+
 	/*
 	 * Also add dependencies on namespace and owner.  These are required
 	 * because the stats object might have a different namespace and/or owner
@@ -582,87 +737,6 @@ RemoveStatisticsById(Oid statsOid)
 	table_close(relation, RowExclusiveLock);
 }
 
-/*
- * Update a statistics object for ALTER COLUMN TYPE on a source column.
- *
- * This could throw an error if the type change can't be supported.
- * If it can be supported, but the stats must be recomputed, a likely choice
- * would be to set the relevant column(s) of the pg_statistic_ext_data tuple
- * to null until the next ANALYZE.  (Note that the type change hasn't actually
- * happened yet, so one option that's *not* on the table is to recompute
- * immediately.)
- *
- * For both ndistinct and functional-dependencies stats, the on-disk
- * representation is independent of the source column data types, and it is
- * plausible to assume that the old statistic values will still be good for
- * the new column contents.  (Obviously, if the ALTER COLUMN TYPE has a USING
- * expression that substantially alters the semantic meaning of the column
- * values, this assumption could fail.  But that seems like a corner case
- * that doesn't justify zapping the stats in common cases.)
- *
- * For MCV lists that's not the case, as those statistics store the datums
- * internally. In this case we simply reset the statistics value to NULL.
- *
- * Note that "type change" includes collation change, which means we can rely
- * on the MCV list being consistent with the collation info in pg_attribute
- * during estimation.
- */
-void
-UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum,
-							  Oid oldColumnType, Oid newColumnType)
-{
-	HeapTuple	stup,
-				oldtup;
-
-	Relation	rel;
-
-	Datum		values[Natts_pg_statistic_ext_data];
-	bool		nulls[Natts_pg_statistic_ext_data];
-	bool		replaces[Natts_pg_statistic_ext_data];
-
-	oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid));
-	if (!HeapTupleIsValid(oldtup))
-		elog(ERROR, "cache lookup failed for statistics object %u", statsOid);
-
-	/*
-	 * When none of the defined statistics types contain datum values from the
-	 * table's columns then there's no need to reset the stats. Functional
-	 * dependencies and ndistinct stats should still hold true.
-	 */
-	if (!statext_is_kind_built(oldtup, STATS_EXT_MCV))
-	{
-		ReleaseSysCache(oldtup);
-		return;
-	}
-
-	/*
-	 * OK, we need to reset some statistics. So let's build the new tuple,
-	 * replacing the affected statistics types with NULL.
-	 */
-	memset(nulls, 0, Natts_pg_statistic_ext_data * sizeof(bool));
-	memset(replaces, 0, Natts_pg_statistic_ext_data * sizeof(bool));
-	memset(values, 0, Natts_pg_statistic_ext_data * sizeof(Datum));
-
-	replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
-	nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
-
-	rel = table_open(StatisticExtDataRelationId, RowExclusiveLock);
-
-	/* replace the old tuple */
-	stup = heap_modify_tuple(oldtup,
-							 RelationGetDescr(rel),
-							 values,
-							 nulls,
-							 replaces);
-
-	ReleaseSysCache(oldtup);
-	CatalogTupleUpdate(rel, &stup->t_self, stup);
-
-	heap_freetuple(stup);
-
-	table_close(rel, RowExclusiveLock);
-}
-
 /*
  * Select a nonconflicting name for a new statistics.
  *
@@ -731,18 +805,27 @@ ChooseExtendedStatisticNameAddition(List *exprs)
 	buf[0] = '\0';
 	foreach(lc, exprs)
 	{
-		ColumnRef  *cref = (ColumnRef *) lfirst(lc);
+		StatsElem  *selem = (StatsElem *) lfirst(lc);
 		const char *name;
 
 		/* It should be one of these, but just skip if it happens not to be */
-		if (!IsA(cref, ColumnRef))
+		if (!IsA(selem, StatsElem))
 			continue;
 
-		name = strVal((Value *) linitial(cref->fields));
+		name = selem->name;
 
 		if (buflen > 0)
 			buf[buflen++] = '_';	/* insert _ between names */
 
+		/*
+		 * We use fixed 'expr' for expressions, which have empty column names.
+		 * For indexes this is handled in ChooseIndexColumnNames, but we have
+		 * no such function for stats and it does not seem worth adding. If a
+		 * better name is needed, the user can specify it explicitly.
+		 */
+		if (!name)
+			name = "expr";
+
 		/*
 		 * At this point we have buflen <= NAMEDATALEN.  name should be less
 		 * than NAMEDATALEN already, but use strlcpy for paranoia.
@@ -754,3 +837,29 @@ ChooseExtendedStatisticNameAddition(List *exprs)
 	}
 	return pstrdup(buf);
 }
+
+/*
+ * StatisticsGetRelation: given a statistics's relation OID, get the OID of
+ * the relation it is an statistics on.  Uses the system cache.
+ */
+Oid
+StatisticsGetRelation(Oid statId, bool missing_ok)
+{
+	HeapTuple	tuple;
+	Form_pg_statistic_ext stx;
+	Oid			result;
+
+	tuple = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statId));
+	if (!HeapTupleIsValid(tuple))
+	{
+		if (missing_ok)
+			return InvalidOid;
+		elog(ERROR, "cache lookup failed for statistics object %u", statId);
+	}
+	stx = (Form_pg_statistic_ext) GETSTRUCT(tuple);
+	Assert(stx->oid == statId);
+
+	result = stx->stxrelid;
+	ReleaseSysCache(tuple);
+	return result;
+}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index efac06f72c..88a68a4697 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -41,6 +41,7 @@
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_tablespace.h"
+#include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_trigger.h"
 #include "catalog/pg_type.h"
 #include "catalog/storage.h"
@@ -188,6 +189,8 @@ typedef struct AlteredTableInfo
 	List	   *changedIndexDefs;	/* string definitions of same */
 	char	   *replicaIdentityIndex;	/* index to reset as REPLICA IDENTITY */
 	char	   *clusterOnIndex; /* index to use for CLUSTER */
+	List	   *changedStatisticsOids;	/* OIDs of statistics to rebuild */
+	List	   *changedStatisticsDefs;	/* string definitions of same */
 } AlteredTableInfo;
 
 /* Struct describing one new constraint to check in Phase 3 scan */
@@ -440,6 +443,8 @@ static ObjectAddress ATExecDropColumn(List **wqueue, Relation rel, const char *c
 									  ObjectAddresses *addrs);
 static ObjectAddress ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
 									IndexStmt *stmt, bool is_rebuild, LOCKMODE lockmode);
+static ObjectAddress ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
+										 CreateStatsStmt *stmt, bool is_rebuild, LOCKMODE lockmode);
 static ObjectAddress ATExecAddConstraint(List **wqueue,
 										 AlteredTableInfo *tab, Relation rel,
 										 Constraint *newConstraint, bool recurse, bool is_readd,
@@ -496,6 +501,7 @@ static ObjectAddress ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
 										   AlterTableCmd *cmd, LOCKMODE lockmode);
 static void RememberConstraintForRebuilding(Oid conoid, AlteredTableInfo *tab);
 static void RememberIndexForRebuilding(Oid indoid, AlteredTableInfo *tab);
+static void RememberStatisticsForRebuilding(Oid indoid, AlteredTableInfo *tab);
 static void ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab,
 								   LOCKMODE lockmode);
 static void ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId,
@@ -4756,6 +4762,10 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab,
 			address = ATExecAddIndex(tab, rel, (IndexStmt *) cmd->def, true,
 									 lockmode);
 			break;
+		case AT_ReAddStatistics:	/* ADD STATISTICS */
+			address = ATExecAddStatistics(tab, rel, (CreateStatsStmt *) cmd->def,
+										  true, lockmode);
+			break;
 		case AT_AddConstraint:	/* ADD CONSTRAINT */
 			/* Transform the command only during initial examination */
 			if (cur_pass == AT_PASS_ADD_CONSTR)
@@ -8283,6 +8293,29 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
 	return address;
 }
 
+/*
+ * ALTER TABLE ADD STATISTICS
+ *
+ * This is no such command in the grammar, but we use this internally to add
+ * AT_ReAddStatistics subcommands to rebuild extended statistics after a table
+ * column type change.
+ */
+static ObjectAddress
+ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
+					CreateStatsStmt *stmt, bool is_rebuild, LOCKMODE lockmode)
+{
+	ObjectAddress address;
+
+	Assert(IsA(stmt, CreateStatsStmt));
+
+	/* The CreateStatsStmt has already been through transformStatsStmt */
+	Assert(stmt->transformed);
+
+	address = CreateStatistics(stmt);
+
+	return address;
+}
+
 /*
  * ALTER TABLE ADD CONSTRAINT USING INDEX
  *
@@ -11830,9 +11863,7 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
 				 * Give the extended-stats machinery a chance to fix anything
 				 * that this column type change would break.
 				 */
-				UpdateStatisticsForTypeChange(foundObject.objectId,
-											  RelationGetRelid(rel), attnum,
-											  attTup->atttypid, targettype);
+				RememberStatisticsForRebuilding(foundObject.objectId, tab);
 				break;
 
 			case OCLASS_PROC:
@@ -12202,6 +12233,32 @@ RememberIndexForRebuilding(Oid indoid, AlteredTableInfo *tab)
 	}
 }
 
+/*
+ * Subroutine for ATExecAlterColumnType: remember that a statistics object
+ * needs to be rebuilt (which we might already know).
+ */
+static void
+RememberStatisticsForRebuilding(Oid stxoid, AlteredTableInfo *tab)
+{
+	/*
+	 * This de-duplication check is critical for two independent reasons: we
+	 * mustn't try to recreate the same statistics object twice, and if the
+	 * statistics depends on more than one column whose type is to be altered,
+	 * we must capture its definition string before applying any of the type
+	 * changes. ruleutils.c will get confused if we ask again later.
+	 */
+	if (!list_member_oid(tab->changedStatisticsOids, stxoid))
+	{
+		/* OK, capture the index's existing definition string */
+		char	   *defstring = pg_get_statisticsobjdef_string(stxoid);
+
+		tab->changedStatisticsOids = lappend_oid(tab->changedStatisticsOids,
+												 stxoid);
+		tab->changedStatisticsDefs = lappend(tab->changedStatisticsDefs,
+											 defstring);
+	}
+}
+
 /*
  * Cleanup after we've finished all the ALTER TYPE operations for a
  * particular relation.  We have to drop and recreate all the indexes
@@ -12306,6 +12363,22 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
 		add_exact_object_address(&obj, objects);
 	}
 
+	/* add dependencies for new statistics */
+	forboth(oid_item, tab->changedStatisticsOids,
+			def_item, tab->changedStatisticsDefs)
+	{
+		Oid			oldId = lfirst_oid(oid_item);
+		Oid			relid;
+
+		relid = StatisticsGetRelation(oldId, false);
+		ATPostAlterTypeParse(oldId, relid, InvalidOid,
+							 (char *) lfirst(def_item),
+							 wqueue, lockmode, tab->rewrite);
+
+		ObjectAddressSet(obj, StatisticExtRelationId, oldId);
+		add_exact_object_address(&obj, objects);
+	}
+
 	/*
 	 * Queue up command to restore replica identity index marking
 	 */
@@ -12354,9 +12427,9 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
 }
 
 /*
- * Parse the previously-saved definition string for a constraint or index
- * against the newly-established column data type(s), and queue up the
- * resulting command parsetrees for execution.
+ * Parse the previously-saved definition string for a constraint, index or
+ * statistics object against the newly-established column data type(s), and
+ * queue up the resulting command parsetrees for execution.
  *
  * This might fail if, for example, you have a WHERE clause that uses an
  * operator that's not available for the new column type.
@@ -12402,6 +12475,11 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
 			querytree_list = lappend(querytree_list, stmt);
 			querytree_list = list_concat(querytree_list, afterStmts);
 		}
+		else if (IsA(stmt, CreateStatsStmt))
+			querytree_list = lappend(querytree_list,
+									 transformStatsStmt(oldRelId,
+														(CreateStatsStmt *) stmt,
+														cmd));
 		else
 			querytree_list = lappend(querytree_list, stmt);
 	}
@@ -12540,6 +12618,20 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
 				elog(ERROR, "unexpected statement subtype: %d",
 					 (int) stmt->subtype);
 		}
+		else if (IsA(stm, CreateStatsStmt))
+		{
+			CreateStatsStmt  *stmt = (CreateStatsStmt *) stm;
+			AlterTableCmd *newcmd;
+
+			/* keep the statistics object's comment */
+			stmt->stxcomment = GetComment(oldId, StatisticExtRelationId, 0);
+
+			newcmd = makeNode(AlterTableCmd);
+			newcmd->subtype = AT_ReAddStatistics;
+			newcmd->def = (Node *) stmt;
+			tab->subcmds[AT_PASS_MISC] =
+				lappend(tab->subcmds[AT_PASS_MISC], newcmd);
+		}
 		else
 			elog(ERROR, "unexpected statement type: %d",
 				 (int) nodeTag(stm));
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 38b56231b7..d5b1ad4567 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2980,6 +2980,17 @@ _copyIndexElem(const IndexElem *from)
 	return newnode;
 }
 
+static StatsElem *
+_copyStatsElem(const StatsElem *from)
+{
+	StatsElem  *newnode = makeNode(StatsElem);
+
+	COPY_STRING_FIELD(name);
+	COPY_NODE_FIELD(expr);
+
+	return newnode;
+}
+
 static ColumnDef *
 _copyColumnDef(const ColumnDef *from)
 {
@@ -5699,6 +5710,9 @@ copyObjectImpl(const void *from)
 		case T_IndexElem:
 			retval = _copyIndexElem(from);
 			break;
+		case T_StatsElem:
+			retval = _copyStatsElem(from);
+			break;
 		case T_ColumnDef:
 			retval = _copyColumnDef(from);
 			break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 3292dda342..d46909bbc4 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2596,6 +2596,16 @@ _equalIndexElem(const IndexElem *a, const IndexElem *b)
 	return true;
 }
 
+
+static bool
+_equalStatsElem(const StatsElem *a, const StatsElem *b)
+{
+	COMPARE_STRING_FIELD(name);
+	COMPARE_NODE_FIELD(expr);
+
+	return true;
+}
+
 static bool
 _equalColumnDef(const ColumnDef *a, const ColumnDef *b)
 {
@@ -3724,6 +3734,9 @@ equal(const void *a, const void *b)
 		case T_IndexElem:
 			retval = _equalIndexElem(a, b);
 			break;
+		case T_StatsElem:
+			retval = _equalStatsElem(a, b);
+			break;
 		case T_ColumnDef:
 			retval = _equalColumnDef(a, b);
 			break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 9f7918c7e9..12561c4757 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2943,6 +2943,15 @@ _outIndexElem(StringInfo str, const IndexElem *node)
 	WRITE_ENUM_FIELD(nulls_ordering, SortByNulls);
 }
 
+static void
+_outStatsElem(StringInfo str, const StatsElem *node)
+{
+	WRITE_NODE_TYPE("STATSELEM");
+
+	WRITE_STRING_FIELD(name);
+	WRITE_NODE_FIELD(expr);
+}
+
 static void
 _outQuery(StringInfo str, const Query *node)
 {
@@ -4286,6 +4295,9 @@ outNode(StringInfo str, const void *obj)
 			case T_IndexElem:
 				_outIndexElem(str, obj);
 				break;
+			case T_StatsElem:
+				_outStatsElem(str, obj);
+				break;
 			case T_Query:
 				_outQuery(str, obj);
 				break;
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 6c39bf893f..0fa8875f09 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -34,6 +34,7 @@
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "nodes/supportnodes.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
@@ -1308,6 +1309,7 @@ get_relation_constraints(PlannerInfo *root,
 static List *
 get_relation_statistics(RelOptInfo *rel, Relation relation)
 {
+	Index		varno = rel->relid;
 	List	   *statoidlist;
 	List	   *stainfos = NIL;
 	ListCell   *l;
@@ -1321,6 +1323,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 		HeapTuple	htup;
 		HeapTuple	dtup;
 		Bitmapset  *keys = NULL;
+		List	   *exprs = NIL;
 		int			i;
 
 		htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
@@ -1340,6 +1343,49 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 		for (i = 0; i < staForm->stxkeys.dim1; i++)
 			keys = bms_add_member(keys, staForm->stxkeys.values[i]);
 
+		/*
+		 * Preprocess expressions (if any). We read the expressions, run them
+		 * through eval_const_expressions, and fix the varnos.
+		 */
+		{
+			bool		isnull;
+			Datum		datum;
+
+			/* decode expression (if any) */
+			datum = SysCacheGetAttr(STATEXTOID, htup,
+									Anum_pg_statistic_ext_stxexprs, &isnull);
+
+			if (!isnull)
+			{
+				char	   *exprsString;
+
+				exprsString = TextDatumGetCString(datum);
+				exprs = (List *) stringToNode(exprsString);
+				pfree(exprsString);
+
+				/*
+				 * Run the expressions through eval_const_expressions. This is
+				 * not just an optimization, but is necessary, because the
+				 * planner will be comparing them to similarly-processed qual
+				 * clauses, and may fail to detect valid matches without this.
+				 * We must not use canonicalize_qual, however, since these
+				 * aren't qual expressions.
+				 */
+				exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
+
+				/* May as well fix opfuncids too */
+				fix_opfuncids((Node *) exprs);
+
+				/*
+				 * Modify the copies we obtain from the relcache to have the
+				 * correct varno for the parent relation, so that they match
+				 * up correctly against qual clauses.
+				 */
+				if (varno != 1)
+					ChangeVarNodes((Node *) exprs, 1, varno, 0);
+			}
+		}
+
 		/* add one StatisticExtInfo for each kind built */
 		if (statext_is_kind_built(dtup, STATS_EXT_NDISTINCT))
 		{
@@ -1349,6 +1395,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			info->rel = rel;
 			info->kind = STATS_EXT_NDISTINCT;
 			info->keys = bms_copy(keys);
+			info->exprs = exprs;
 
 			stainfos = lappend(stainfos, info);
 		}
@@ -1361,6 +1408,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			info->rel = rel;
 			info->kind = STATS_EXT_DEPENDENCIES;
 			info->keys = bms_copy(keys);
+			info->exprs = exprs;
 
 			stainfos = lappend(stainfos, info);
 		}
@@ -1373,6 +1421,20 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			info->rel = rel;
 			info->kind = STATS_EXT_MCV;
 			info->keys = bms_copy(keys);
+			info->exprs = exprs;
+
+			stainfos = lappend(stainfos, info);
+		}
+
+		if (statext_is_kind_built(dtup, STATS_EXT_EXPRESSIONS))
+		{
+			StatisticExtInfo *info = makeNode(StatisticExtInfo);
+
+			info->statOid = statOid;
+			info->rel = rel;
+			info->kind = STATS_EXT_EXPRESSIONS;
+			info->keys = bms_copy(keys);
+			info->exprs = exprs;
 
 			stainfos = lappend(stainfos, info);
 		}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 2132cf4d82..7ff36bc842 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -239,6 +239,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	WindowDef			*windef;
 	JoinExpr			*jexpr;
 	IndexElem			*ielem;
+	StatsElem			*selem;
 	Alias				*alias;
 	RangeVar			*range;
 	IntoClause			*into;
@@ -405,7 +406,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 				old_aggr_definition old_aggr_list
 				oper_argtypes RuleActionList RuleActionMulti
 				opt_column_list columnList opt_name_list
-				sort_clause opt_sort_clause sortby_list index_params
+				sort_clause opt_sort_clause sortby_list index_params stats_params
 				opt_include opt_c_include index_including_params
 				name_list role_list from_clause from_list opt_array_bounds
 				qualified_name_list any_name any_name_list type_name_list
@@ -512,6 +513,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <list>	func_alias_clause
 %type <sortby>	sortby
 %type <ielem>	index_elem index_elem_options
+%type <selem>	stats_param
 %type <node>	table_ref
 %type <jexpr>	joined_table
 %type <range>	relation_expr
@@ -4097,7 +4099,7 @@ ExistingIndex:   USING INDEX name					{ $$ = $3; }
 
 CreateStatsStmt:
 			CREATE STATISTICS any_name
-			opt_name_list ON expr_list FROM from_list
+			opt_name_list ON stats_params FROM from_list
 				{
 					CreateStatsStmt *n = makeNode(CreateStatsStmt);
 					n->defnames = $3;
@@ -4109,7 +4111,7 @@ CreateStatsStmt:
 					$$ = (Node *)n;
 				}
 			| CREATE STATISTICS IF_P NOT EXISTS any_name
-			opt_name_list ON expr_list FROM from_list
+			opt_name_list ON stats_params FROM from_list
 				{
 					CreateStatsStmt *n = makeNode(CreateStatsStmt);
 					n->defnames = $6;
@@ -4122,6 +4124,36 @@ CreateStatsStmt:
 				}
 			;
 
+/*
+ * Statistics attributes can be either simple column references, or arbitrary
+ * expressions in parens.  For compatibility with index attributes permitted
+ * in CREATE INDEX, we allow an expression that's just a function call to be
+ * written without parens.
+ */
+
+stats_params:	stats_param							{ $$ = list_make1($1); }
+			| stats_params ',' stats_param			{ $$ = lappend($1, $3); }
+		;
+
+stats_param:	ColId
+				{
+					$$ = makeNode(StatsElem);
+					$$->name = $1;
+					$$->expr = NULL;
+				}
+			| func_expr_windowless
+				{
+					$$ = makeNode(StatsElem);
+					$$->name = NULL;
+					$$->expr = $1;
+				}
+			| '(' a_expr ')'
+				{
+					$$ = makeNode(StatsElem);
+					$$->name = NULL;
+					$$->expr = $2;
+				}
+		;
 
 /*****************************************************************************
  *
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 7c3e01aa22..ceb0bf597d 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -484,6 +484,13 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
 			else
 				err = _("grouping operations are not allowed in index predicates");
 
+			break;
+		case EXPR_KIND_STATS_EXPRESSION:
+			if (isAgg)
+				err = _("aggregate functions are not allowed in statistics expressions");
+			else
+				err = _("grouping operations are not allowed in statistics expressions");
+
 			break;
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 			if (isAgg)
@@ -910,6 +917,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
 		case EXPR_KIND_INDEX_EXPRESSION:
 			err = _("window functions are not allowed in index expressions");
 			break;
+		case EXPR_KIND_STATS_EXPRESSION:
+			err = _("window functions are not allowed in statistics expressions");
+			break;
 		case EXPR_KIND_INDEX_PREDICATE:
 			err = _("window functions are not allowed in index predicates");
 			break;
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index f869e159d6..03373d551f 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -500,6 +500,7 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 		case EXPR_KIND_FUNCTION_DEFAULT:
 		case EXPR_KIND_INDEX_EXPRESSION:
 		case EXPR_KIND_INDEX_PREDICATE:
+		case EXPR_KIND_STATS_EXPRESSION:
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 		case EXPR_KIND_EXECUTE_PARAMETER:
 		case EXPR_KIND_TRIGGER_WHEN:
@@ -1741,6 +1742,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
 		case EXPR_KIND_INDEX_PREDICATE:
 			err = _("cannot use subquery in index predicate");
 			break;
+		case EXPR_KIND_STATS_EXPRESSION:
+			err = _("cannot use subquery in statistics expression");
+			break;
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 			err = _("cannot use subquery in transform expression");
 			break;
@@ -3030,6 +3034,8 @@ ParseExprKindName(ParseExprKind exprKind)
 			return "index expression";
 		case EXPR_KIND_INDEX_PREDICATE:
 			return "index predicate";
+		case EXPR_KIND_STATS_EXPRESSION:
+			return "statistics expression";
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 			return "USING";
 		case EXPR_KIND_EXECUTE_PARAMETER:
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 37cebc7d82..debef1d14f 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -2503,6 +2503,9 @@ check_srf_call_placement(ParseState *pstate, Node *last_srf, int location)
 		case EXPR_KIND_INDEX_PREDICATE:
 			err = _("set-returning functions are not allowed in index predicates");
 			break;
+		case EXPR_KIND_STATS_EXPRESSION:
+			err = _("set-returning functions are not allowed in statistics expressions");
+			break;
 		case EXPR_KIND_ALTER_COL_TRANSFORM:
 			err = _("set-returning functions are not allowed in transform expressions");
 			break;
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index aa6c19adad..b968c25dd6 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -1917,6 +1917,9 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid,
 			stat_types = lappend(stat_types, makeString("dependencies"));
 		else if (enabled[i] == STATS_EXT_MCV)
 			stat_types = lappend(stat_types, makeString("mcv"));
+		else if (enabled[i] == STATS_EXT_EXPRESSIONS)
+			/* expression stats are not exposed to users */
+			continue;
 		else
 			elog(ERROR, "unrecognized statistics kind %c", enabled[i]);
 	}
@@ -1924,14 +1927,47 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid,
 	/* Determine which columns the statistics are on */
 	for (i = 0; i < statsrec->stxkeys.dim1; i++)
 	{
-		ColumnRef  *cref = makeNode(ColumnRef);
+		StatsElem  *selem = makeNode(StatsElem);
 		AttrNumber	attnum = statsrec->stxkeys.values[i];
 
-		cref->fields = list_make1(makeString(get_attname(heapRelid,
-														 attnum, false)));
-		cref->location = -1;
+		selem->name = get_attname(heapRelid, attnum, false);
+		selem->expr = NULL;
 
-		def_names = lappend(def_names, cref);
+		def_names = lappend(def_names, selem);
+	}
+
+	/*
+	 * Now handle expressions, if there are any. The order (with respect to
+	 * regular attributes) does not really matter for extended stats, so we
+	 * simply append them after simple column references.
+	 *
+	 * XXX Some places during build/estimation treat expressions as if they
+	 * are before atttibutes, but for the CREATE command that's entirely
+	 * irrelevant.
+	 */
+	datum = SysCacheGetAttr(STATEXTOID, ht_stats,
+							Anum_pg_statistic_ext_stxexprs, &isnull);
+
+	if (!isnull)
+	{
+		ListCell   *lc;
+		List	   *exprs = NIL;
+		char	   *exprsString;
+
+		exprsString = TextDatumGetCString(datum);
+		exprs = (List *) stringToNode(exprsString);
+
+		foreach(lc, exprs)
+		{
+			StatsElem  *selem = makeNode(StatsElem);
+
+			selem->name = NULL;
+			selem->expr = (Node *) lfirst(lc);
+
+			def_names = lappend(def_names, selem);
+		}
+
+		pfree(exprsString);
 	}
 
 	/* finally, build the output node */
@@ -1942,6 +1978,7 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid,
 	stats->relations = list_make1(heapRel);
 	stats->stxcomment = NULL;
 	stats->if_not_exists = false;
+	stats->transformed = true;	/* don't need transformStatsStmt again */
 
 	/* Clean up */
 	ReleaseSysCache(ht_stats);
@@ -2866,6 +2903,84 @@ transformIndexStmt(Oid relid, IndexStmt *stmt, const char *queryString)
 	return stmt;
 }
 
+/*
+ * transformStatsStmt - parse analysis for CREATE STATISTICS
+ *
+ * To avoid race conditions, it's important that this function rely only on
+ * the passed-in relid (and not on stmt->relation) to determine the target
+ * relation.
+ */
+CreateStatsStmt *
+transformStatsStmt(Oid relid, CreateStatsStmt *stmt, const char *queryString)
+{
+	ParseState *pstate;
+	ParseNamespaceItem *nsitem;
+	ListCell   *l;
+	Relation	rel;
+
+	/* Nothing to do if statement already transformed. */
+	if (stmt->transformed)
+		return stmt;
+
+	/*
+	 * We must not scribble on the passed-in CreateStatsStmt, so copy it.
+	 * (This is overkill, but easy.)
+	 */
+	stmt = copyObject(stmt);
+
+	/* Set up pstate */
+	pstate = make_parsestate(NULL);
+	pstate->p_sourcetext = queryString;
+
+	/*
+	 * Put the parent table into the rtable so that the expressions can refer
+	 * to its fields without qualification.  Caller is responsible for locking
+	 * relation, but we still need to open it.
+	 */
+	rel = relation_open(relid, NoLock);
+	nsitem = addRangeTableEntryForRelation(pstate, rel,
+										   AccessShareLock,
+										   NULL, false, true);
+
+	/* no to join list, yes to namespaces */
+	addNSItemToQuery(pstate, nsitem, false, true, true);
+
+	/* take care of any expressions */
+	foreach(l, stmt->exprs)
+	{
+		StatsElem  *selem = (StatsElem *) lfirst(l);
+
+		if (selem->expr)
+		{
+			/* Now do parse transformation of the expression */
+			selem->expr = transformExpr(pstate, selem->expr,
+										EXPR_KIND_STATS_EXPRESSION);
+
+			/* We have to fix its collations too */
+			assign_expr_collations(pstate, selem->expr);
+		}
+	}
+
+	/*
+	 * Check that only the base rel is mentioned.  (This should be dead code
+	 * now that add_missing_from is history.)
+	 */
+	if (list_length(pstate->p_rtable) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+				 errmsg("statistics expressions can refer only to the table being indexed")));
+
+	free_parsestate(pstate);
+
+	/* Close relation */
+	table_close(rel, NoLock);
+
+	/* Mark statement as successfully transformed */
+	stmt->transformed = true;
+
+	return stmt;
+}
+
 
 /*
  * transformRuleStmt -
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index eac9285165..cf8a6d5f68 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -70,15 +70,15 @@ static void generate_dependencies(DependencyGenerator state);
 static DependencyGenerator DependencyGenerator_init(int n, int k);
 static void DependencyGenerator_free(DependencyGenerator state);
 static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
-static double dependency_degree(int numrows, HeapTuple *rows, int k,
-								AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs);
+static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
 static bool dependency_is_fully_matched(MVDependency *dependency,
 										Bitmapset *attnums);
 static bool dependency_is_compatible_clause(Node *clause, Index relid,
 											AttrNumber *attnum);
+static bool dependency_is_compatible_expression(Node *clause, Index relid,
+												List *statlist, Node **expr);
 static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
-											   int ndependencies,
-											   Bitmapset *attnums);
+											   int ndependencies, Bitmapset *attnums);
 static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
 												 int varRelid, JoinType jointype,
 												 SpecialJoinInfo *sjinfo,
@@ -219,16 +219,13 @@ DependencyGenerator_next(DependencyGenerator state)
  * the last one.
  */
 static double
-dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
-				  VacAttrStats **stats, Bitmapset *attrs)
+dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
 {
 	int			i,
 				nitems;
 	MultiSortSupport mss;
 	SortItem   *items;
-	AttrNumber *attnums;
 	AttrNumber *attnums_dep;
-	int			numattrs;
 
 	/* counters valid within a group */
 	int			group_size = 0;
@@ -244,15 +241,12 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
 	mss = multi_sort_init(k);
 
 	/*
-	 * Transform the attrs from bitmap to an array to make accessing the i-th
-	 * member easier, and then construct a filtered version with only attnums
-	 * referenced by the dependency we validate.
+	 * Translate the array of indexes to regular attnums for the dependency (we
+	 * will need this to identify the columns in StatsBuildData).
 	 */
-	attnums = build_attnums_array(attrs, &numattrs);
-
 	attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber));
 	for (i = 0; i < k; i++)
-		attnums_dep[i] = attnums[dependency[i]];
+		attnums_dep[i] = data->attnums[dependency[i]];
 
 	/*
 	 * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
@@ -270,7 +264,7 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
 	/* prepare the sort function for the dimensions */
 	for (i = 0; i < k; i++)
 	{
-		VacAttrStats *colstat = stats[dependency[i]];
+		VacAttrStats *colstat = data->stats[dependency[i]];
 		TypeCacheEntry *type;
 
 		type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
@@ -289,8 +283,7 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
 	 * descriptor.  For now that assumption holds, but it might change in the
 	 * future for example if we support statistics on multiple tables.
 	 */
-	items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
-							   mss, k, attnums_dep);
+	items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
 
 	/*
 	 * Walk through the sorted array, split it into rows according to the
@@ -336,11 +329,10 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
 		pfree(items);
 
 	pfree(mss);
-	pfree(attnums);
 	pfree(attnums_dep);
 
 	/* Compute the 'degree of validity' as (supporting/total). */
-	return (n_supporting_rows * 1.0 / numrows);
+	return (n_supporting_rows * 1.0 / data->numrows);
 }
 
 /*
@@ -360,23 +352,15 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
  *	   (c) -> b
  */
 MVDependencies *
-statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
-						   VacAttrStats **stats)
+statext_dependencies_build(StatsBuildData *data)
 {
 	int			i,
 				k;
-	int			numattrs;
-	AttrNumber *attnums;
 
 	/* result */
 	MVDependencies *dependencies = NULL;
 
-	/*
-	 * Transform the bms into an array, to make accessing i-th member easier.
-	 */
-	attnums = build_attnums_array(attrs, &numattrs);
-
-	Assert(numattrs >= 2);
+	Assert(data->nattnums >= 2);
 
 	/*
 	 * We'll try build functional dependencies starting from the smallest ones
@@ -384,12 +368,12 @@ statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 	 * included in the statistics object.  We start from the smallest ones
 	 * because we want to be able to skip already implied ones.
 	 */
-	for (k = 2; k <= numattrs; k++)
+	for (k = 2; k <= data->nattnums; k++)
 	{
 		AttrNumber *dependency; /* array with k elements */
 
 		/* prepare a DependencyGenerator of variation */
-		DependencyGenerator DependencyGenerator = DependencyGenerator_init(numattrs, k);
+		DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);
 
 		/* generate all possible variations of k values (out of n) */
 		while ((dependency = DependencyGenerator_next(DependencyGenerator)))
@@ -398,7 +382,7 @@ statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 			MVDependency *d;
 
 			/* compute how valid the dependency seems */
-			degree = dependency_degree(numrows, rows, k, dependency, stats, attrs);
+			degree = dependency_degree(data, k, dependency);
 
 			/*
 			 * if the dependency seems entirely invalid, don't store it
@@ -413,7 +397,7 @@ statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 			d->degree = degree;
 			d->nattributes = k;
 			for (i = 0; i < k; i++)
-				d->attributes[i] = attnums[dependency[i]];
+				d->attributes[i] = data->attnums[dependency[i]];
 
 			/* initialize the list of dependencies */
 			if (dependencies == NULL)
@@ -747,6 +731,7 @@ static bool
 dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
 {
 	Var		   *var;
+	Node	   *clause_expr;
 
 	if (IsA(clause, RestrictInfo))
 	{
@@ -774,9 +759,9 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
 
 		/* Make sure non-selected argument is a pseudoconstant. */
 		if (is_pseudo_constant_clause(lsecond(expr->args)))
-			var = linitial(expr->args);
+			clause_expr = linitial(expr->args);
 		else if (is_pseudo_constant_clause(linitial(expr->args)))
-			var = lsecond(expr->args);
+			clause_expr = lsecond(expr->args);
 		else
 			return false;
 
@@ -805,8 +790,8 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
 		/*
 		 * Reject ALL() variant, we only care about ANY/IN.
 		 *
-		 * FIXME Maybe we should check if all the values are the same, and
-		 * allow ALL in that case? Doesn't seem very practical, though.
+		 * XXX Maybe we should check if all the values are the same, and allow
+		 * ALL in that case? Doesn't seem very practical, though.
 		 */
 		if (!expr->useOr)
 			return false;
@@ -822,7 +807,7 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
 		if (!is_pseudo_constant_clause(lsecond(expr->args)))
 			return false;
 
-		var = linitial(expr->args);
+		clause_expr = linitial(expr->args);
 
 		/*
 		 * If it's not an "=" operator, just ignore the clause, as it's not
@@ -838,13 +823,13 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
 	}
 	else if (is_orclause(clause))
 	{
-		BoolExpr   *expr = (BoolExpr *) clause;
+		BoolExpr   *bool_expr = (BoolExpr *) clause;
 		ListCell   *lc;
 
 		/* start with no attribute number */
 		*attnum = InvalidAttrNumber;
 
-		foreach(lc, expr->args)
+		foreach(lc, bool_expr->args)
 		{
 			AttrNumber	clause_attnum;
 
@@ -859,6 +844,7 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
 			if (*attnum == InvalidAttrNumber)
 				*attnum = clause_attnum;
 
+			/* ensure all the variables are the same (same attnum) */
 			if (*attnum != clause_attnum)
 				return false;
 		}
@@ -872,7 +858,7 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
 		 * "NOT x" can be interpreted as "x = false", so get the argument and
 		 * proceed with seeing if it's a suitable Var.
 		 */
-		var = (Var *) get_notclausearg(clause);
+		clause_expr = (Node *) get_notclausearg(clause);
 	}
 	else
 	{
@@ -880,20 +866,23 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
 		 * A boolean expression "x" can be interpreted as "x = true", so
 		 * proceed with seeing if it's a suitable Var.
 		 */
-		var = (Var *) clause;
+		clause_expr = (Node *) clause;
 	}
 
 	/*
 	 * We may ignore any RelabelType node above the operand.  (There won't be
 	 * more than one, since eval_const_expressions has been applied already.)
 	 */
-	if (IsA(var, RelabelType))
-		var = (Var *) ((RelabelType *) var)->arg;
+	if (IsA(clause_expr, RelabelType))
+		clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
 
 	/* We only support plain Vars for now */
-	if (!IsA(var, Var))
+	if (!IsA(clause_expr, Var))
 		return false;
 
+	/* OK, we know we have a Var */
+	var = (Var *) clause_expr;
+
 	/* Ensure Var is from the correct relation */
 	if (var->varno != relid)
 		return false;
@@ -1157,6 +1146,212 @@ clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
 	return s1;
 }
 
+/*
+ * dependency_is_compatible_expression
+ *		Determines if the expression is compatible with functional dependencies
+ *
+ * Similar to dependency_is_compatible_clause, but doesn't enforce that the
+ * expression is a simple Var. OTOH we check that there's at least one
+ * statistics object matching the expression.
+ */
+static bool
+dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
+{
+	List	   *vars;
+	ListCell   *lc,
+			   *lc2;
+	Node	   *clause_expr;
+
+	if (IsA(clause, RestrictInfo))
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) clause;
+
+		/* Pseudoconstants are not interesting (they couldn't contain a Var) */
+		if (rinfo->pseudoconstant)
+			return false;
+
+		/* Clauses referencing multiple, or no, varnos are incompatible */
+		if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
+			return false;
+
+		clause = (Node *) rinfo->clause;
+	}
+
+	if (is_opclause(clause))
+	{
+		/* If it's an opclause, check for Var = Const or Const = Var. */
+		OpExpr	   *expr = (OpExpr *) clause;
+
+		/* Only expressions with two arguments are candidates. */
+		if (list_length(expr->args) != 2)
+			return false;
+
+		/* Make sure non-selected argument is a pseudoconstant. */
+		if (is_pseudo_constant_clause(lsecond(expr->args)))
+			clause_expr = linitial(expr->args);
+		else if (is_pseudo_constant_clause(linitial(expr->args)))
+			clause_expr = lsecond(expr->args);
+		else
+			return false;
+
+		/*
+		 * If it's not an "=" operator, just ignore the clause, as it's not
+		 * compatible with functional dependencies.
+		 *
+		 * This uses the function for estimating selectivity, not the operator
+		 * directly (a bit awkward, but well ...).
+		 *
+		 * XXX this is pretty dubious; probably it'd be better to check btree
+		 * or hash opclass membership, so as not to be fooled by custom
+		 * selectivity functions, and to be more consistent with decisions
+		 * elsewhere in the planner.
+		 */
+		if (get_oprrest(expr->opno) != F_EQSEL)
+			return false;
+
+		/* OK to proceed with checking "var" */
+	}
+	else if (IsA(clause, ScalarArrayOpExpr))
+	{
+		/* If it's an scalar array operator, check for Var IN Const. */
+		ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
+
+		/*
+		 * Reject ALL() variant, we only care about ANY/IN.
+		 *
+		 * FIXME Maybe we should check if all the values are the same, and
+		 * allow ALL in that case? Doesn't seem very practical, though.
+		 */
+		if (!expr->useOr)
+			return false;
+
+		/* Only expressions with two arguments are candidates. */
+		if (list_length(expr->args) != 2)
+			return false;
+
+		/*
+		 * We know it's always (Var IN Const), so we assume the var is the
+		 * first argument, and pseudoconstant is the second one.
+		 */
+		if (!is_pseudo_constant_clause(lsecond(expr->args)))
+			return false;
+
+		clause_expr = linitial(expr->args);
+
+		/*
+		 * If it's not an "=" operator, just ignore the clause, as it's not
+		 * compatible with functional dependencies. The operator is identified
+		 * simply by looking at which function it uses to estimate
+		 * selectivity. That's a bit strange, but it's what other similar
+		 * places do.
+		 */
+		if (get_oprrest(expr->opno) != F_EQSEL)
+			return false;
+
+		/* OK to proceed with checking "var" */
+	}
+	else if (is_orclause(clause))
+	{
+		BoolExpr   *bool_expr = (BoolExpr *) clause;
+		ListCell   *lc;
+
+		/* start with no expression (we'll use the first match) */
+		*expr = NULL;
+
+		foreach(lc, bool_expr->args)
+		{
+			Node	   *or_expr = NULL;
+
+			/*
+			 * Had we found incompatible expression in the arguments, treat
+			 * the whole expression as incompatible.
+			 */
+			if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
+													 statlist, &or_expr))
+				return false;
+
+			if (*expr == NULL)
+				*expr = or_expr;
+
+			/* ensure all the expressions are the same */
+			if (!equal(or_expr, *expr))
+				return false;
+		}
+
+		/* the expression is already checked by the recursive call */
+		return true;
+	}
+	else if (is_notclause(clause))
+	{
+		/*
+		 * "NOT x" can be interpreted as "x = false", so get the argument and
+		 * proceed with seeing if it's a suitable Var.
+		 */
+		clause_expr = (Node *) get_notclausearg(clause);
+	}
+	else
+	{
+		/*
+		 * A boolean expression "x" can be interpreted as "x = true", so
+		 * proceed with seeing if it's a suitable Var.
+		 */
+		clause_expr = (Node *) clause;
+	}
+
+	/*
+	 * We may ignore any RelabelType node above the operand.  (There won't be
+	 * more than one, since eval_const_expressions has been applied already.)
+	 */
+	if (IsA(clause_expr, RelabelType))
+		clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
+
+	vars = pull_var_clause(clause_expr, 0);
+
+	foreach(lc, vars)
+	{
+		Var		   *var = (Var *) lfirst(lc);
+
+		/* Ensure Var is from the correct relation */
+		if (var->varno != relid)
+			return false;
+
+		/* We also better ensure the Var is from the current level */
+		if (var->varlevelsup != 0)
+			return false;
+
+		/* Also ignore system attributes (we don't allow stats on those) */
+		if (!AttrNumberIsForUserDefinedAttr(var->varattno))
+			return false;
+	}
+
+	/*
+	 * Check if we actually have a matching statistics for the expression.
+	 *
+	 * XXX Maybe this is an overkill. We'll eliminate the expressions later.
+	 */
+	foreach(lc, statlist)
+	{
+		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+
+		/* ignore stats without dependencies */
+		if (info->kind != STATS_EXT_DEPENDENCIES)
+			continue;
+
+		foreach(lc2, info->exprs)
+		{
+			Node	   *stat_expr = (Node *) lfirst(lc2);
+
+			if (equal(clause_expr, stat_expr))
+			{
+				*expr = stat_expr;
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
 /*
  * dependencies_clauselist_selectivity
  *		Return the estimated selectivity of (a subset of) the given clauses
@@ -1204,6 +1399,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	MVDependency **dependencies;
 	int			ndependencies;
 	int			i;
+	AttrNumber	attnum_offset;
+
+	/* unique expressions */
+	Node	  **unique_exprs;
+	int			unique_exprs_cnt;
 
 	/* check if there's any stats that might be useful for us. */
 	if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
@@ -1212,6 +1412,15 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
 										 list_length(clauses));
 
+	/*
+	 * We allocate space as if every clause was a unique expression, although
+	 * that's probably overkill. Some will be simple column references that
+	 * we'll translate to attnums, and there might be duplicates. But it's
+	 * easier and cheaper to just do one allocation than repalloc later.
+	 */
+	unique_exprs = (Node **) palloc(sizeof(Node *) * list_length(clauses));
+	unique_exprs_cnt = 0;
+
 	/*
 	 * Pre-process the clauses list to extract the attnums seen in each item.
 	 * We need to determine if there's any clauses which will be useful for
@@ -1222,29 +1431,127 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	 *
 	 * We also skip clauses that we already estimated using different types of
 	 * statistics (we treat them as incompatible).
+	 *
+	 * To handle expressions, we assign them negative attnums, as if it was a
+	 * system attribute (this is fine, as we only allow extended stats on user
+	 * attributes). And then we offset everything by the number of
+	 * expressions, so that we can store the values in a bitmapset.
 	 */
 	listidx = 0;
 	foreach(l, clauses)
 	{
 		Node	   *clause = (Node *) lfirst(l);
 		AttrNumber	attnum;
+		Node	   *expr = NULL;
 
-		if (!bms_is_member(listidx, *estimatedclauses) &&
-			dependency_is_compatible_clause(clause, rel->relid, &attnum))
+		/* ignore clause by default */
+		list_attnums[listidx] = InvalidAttrNumber;
+
+		if (!bms_is_member(listidx, *estimatedclauses))
 		{
-			list_attnums[listidx] = attnum;
-			clauses_attnums = bms_add_member(clauses_attnums, attnum);
+			/*
+			 * If it's a simple column refrence, just extract the attnum. If
+			 * it's an expression, assign a negative attnum as if it was a
+			 * system attribute.
+			 */
+			if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
+			{
+				list_attnums[listidx] = attnum;
+			}
+			else if (dependency_is_compatible_expression(clause, rel->relid,
+														 rel->statlist,
+														 &expr))
+			{
+				/* special attnum assigned to this expression */
+				attnum = InvalidAttrNumber;
+
+				Assert(expr != NULL);
+
+				/* If the expression is duplicate, use the same attnum. */
+				for (i = 0; i < unique_exprs_cnt; i++)
+				{
+					if (equal(unique_exprs[i], expr))
+					{
+						/* negative attribute number to expression */
+						attnum = -(i + 1);
+						break;
+					}
+				}
+
+				/* not found in the list, so add it */
+				if (attnum == InvalidAttrNumber)
+				{
+					unique_exprs[unique_exprs_cnt++] = expr;
+
+					/* after incrementing the value, to get -1, -2, ... */
+					attnum = (-unique_exprs_cnt);
+				}
+
+				/* remember which attnum was assigned to this clause */
+				list_attnums[listidx] = attnum;
+			}
 		}
-		else
-			list_attnums[listidx] = InvalidAttrNumber;
 
 		listidx++;
 	}
 
+	Assert(listidx == list_length(clauses));
+
 	/*
-	 * If there's not at least two distinct attnums then reject the whole list
-	 * of clauses. We must return 1.0 so the calling function's selectivity is
-	 * unaffected.
+	 * How much we need to offset the attnums? If there are no expressions,
+	 * then no offset is needed. Otherwise we need to offset enough for the
+	 * lowest value (-unique_exprs_cnt) to become 1.
+	 */
+	if (unique_exprs_cnt > 0)
+		attnum_offset = (unique_exprs_cnt + 1);
+	else
+		attnum_offset = 0;
+
+	/*
+	 * Now that we know how many expressions there are, we can offset the
+	 * values just enough to build the bitmapset.
+	 */
+	for (i = 0; i < list_length(clauses); i++)
+	{
+		AttrNumber	attnum;
+
+		/* ignore incompatible or already estimated clauses */
+		if (list_attnums[i] == InvalidAttrNumber)
+			continue;
+
+		/* make sure the attnum is in the expected range */
+		Assert(list_attnums[i] >= (-unique_exprs_cnt));
+		Assert(list_attnums[i] <= MaxHeapAttributeNumber);
+
+		/* make sure the attnum is positive (valid AttrNumber) */
+		attnum = list_attnums[i] + attnum_offset;
+
+		/*
+		 * Either it's a regular attribute, or it's an expression, in which
+		 * case we must not have seen it before (expressions are unique).
+		 *
+		 * XXX Check whether it's a regular attribute has to be done using the
+		 * original attnum, while the second check has to use the value with
+		 * an offset.
+		 */
+		Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
+			   !bms_is_member(attnum, clauses_attnums));
+
+		/*
+		 * Remember the offset attnum, both for attributes and expressions.
+		 * We'll pass list_attnums to clauselist_apply_dependencies, which
+		 * uses it to identify clauses in a bitmap. We could also pass the
+		 * offset, but this is more convenient.
+		 */
+		list_attnums[i] = attnum;
+
+		clauses_attnums = bms_add_member(clauses_attnums, attnum);
+	}
+
+	/*
+	 * If there's not at least two distinct attnums and expressions, then
+	 * reject the whole list of clauses. We must return 1.0 so the calling
+	 * function's selectivity is unaffected.
 	 */
 	if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
 	{
@@ -1272,26 +1579,203 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	foreach(l, rel->statlist)
 	{
 		StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
-		Bitmapset  *matched;
-		BMS_Membership membership;
+		int			nmatched;
+		int			nexprs;
+		int			k;
+		MVDependencies *deps;
 
 		/* skip statistics that are not of the correct type */
 		if (stat->kind != STATS_EXT_DEPENDENCIES)
 			continue;
 
-		matched = bms_intersect(clauses_attnums, stat->keys);
-		membership = bms_membership(matched);
-		bms_free(matched);
+		/*
+		 * Count matching attributes - we have to undo the attnum offsets. The
+		 * input attribute numbers are not offset (expressions are not
+		 * included in stat->keys, so it's not necessary). But we need to
+		 * offset it before checking against clauses_attnums.
+		 */
+		nmatched = 0;
+		k = -1;
+		while ((k = bms_next_member(stat->keys, k)) >= 0)
+		{
+			AttrNumber	attnum = (AttrNumber) k;
 
-		/* skip objects matching fewer than two attributes from clauses */
-		if (membership != BMS_MULTIPLE)
+			/* skip expressions */
+			if (!AttrNumberIsForUserDefinedAttr(attnum))
+				continue;
+
+			/* apply the same offset as above */
+			attnum += attnum_offset;
+
+			if (bms_is_member(attnum, clauses_attnums))
+				nmatched++;
+		}
+
+		/* count matching expressions */
+		nexprs = 0;
+		for (i = 0; i < unique_exprs_cnt; i++)
+		{
+			ListCell   *lc;
+
+			foreach(lc, stat->exprs)
+			{
+				Node	   *stat_expr = (Node *) lfirst(lc);
+
+				/* try to match it */
+				if (equal(stat_expr, unique_exprs[i]))
+					nexprs++;
+			}
+		}
+
+		/*
+		 * Skip objects matching fewer than two attributes/expressions from
+		 * clauses.
+		 */
+		if (nmatched + nexprs < 2)
 			continue;
 
-		func_dependencies[nfunc_dependencies]
-			= statext_dependencies_load(stat->statOid);
+		deps = statext_dependencies_load(stat->statOid);
 
-		total_ndeps += func_dependencies[nfunc_dependencies]->ndeps;
-		nfunc_dependencies++;
+		/*
+		 * The expressions may be represented by different attnums in the
+		 * stats, we need to remap them to be consistent with the clauses.
+		 * That will make the later steps (e.g. picking the strongest item and
+		 * so on) much simpler and cheaper, because it won't need to care
+		 * about the offset at all.
+		 *
+		 * When we're at it, we can ignore dependencies that are not fully
+		 * matched by clauses (i.e. referencing attributes or expressions that
+		 * are not in the clauses).
+		 *
+		 * We have to do this for all statistics, as long as there are any
+		 * expressions - we need to shift the attnums in all dependencies.
+		 *
+		 * XXX Maybe we should do this always, because it also eliminates some
+		 * of the dependencies early. It might be cheaper than having to walk
+		 * the longer list in find_strongest_dependency later, especially as
+		 * we need to do that repeatedly?
+		 *
+		 * XXX We have to do this even when there are no expressions in
+		 * clauses, otherwise find_strongest_dependency may fail for stats
+		 * with expressions (due to lookup of negative value in bitmap). So we
+		 * need to at least filter out those dependencies. Maybe we could do
+		 * it in a cheaper way (if there are no expr clauses, we can just
+		 * discard all negative attnums without any lookups).
+		 */
+		if (unique_exprs_cnt > 0 || stat->exprs != NIL)
+		{
+			int			ndeps = 0;
+
+			for (i = 0; i < deps->ndeps; i++)
+			{
+				bool		skip = false;
+				MVDependency *dep = deps->deps[i];
+				int			j;
+
+				for (j = 0; j < dep->nattributes; j++)
+				{
+					int			idx;
+					Node	   *expr;
+					int			k;
+					AttrNumber	unique_attnum = InvalidAttrNumber;
+					AttrNumber	attnum;
+
+					/* undo the per-statistics offset */
+					attnum = dep->attributes[j];
+
+					/*
+					 * For regular attributes we can simply check if it
+					 * matches any clause. If there's no matching clause, we
+					 * can just ignore it. We need to offset the attnum
+					 * though.
+					 */
+					if (AttrNumberIsForUserDefinedAttr(attnum))
+					{
+						dep->attributes[j] = attnum + attnum_offset;
+
+						if (!bms_is_member(dep->attributes[j], clauses_attnums))
+						{
+							skip = true;
+							break;
+						}
+
+						continue;
+					}
+
+					/*
+					 * the attnum should be a valid system attnum (-1, -2,
+					 * ...)
+					 */
+					Assert(AttributeNumberIsValid(attnum));
+
+					/*
+					 * For expressions, we need to do two translations. First
+					 * we have to translate the negative attnum to index in
+					 * the list of expressions (in the statistics object).
+					 * Then we need to see if there's a matching clause. The
+					 * index of the unique expression determines the attnum
+					 * (and we offset it).
+					 */
+					idx = -(1 + attnum);
+
+					/* Is the expression index is valid? */
+					Assert((idx >= 0) && (idx < list_length(stat->exprs)));
+
+					expr = (Node *) list_nth(stat->exprs, idx);
+
+					/* try to find the expression in the unique list */
+					for (k = 0; k < unique_exprs_cnt; k++)
+					{
+						/*
+						 * found a matching unique expression, use the attnum
+						 * (derived from index of the unique expression)
+						 */
+						if (equal(unique_exprs[k], expr))
+						{
+							unique_attnum = -(k + 1) + attnum_offset;
+							break;
+						}
+					}
+
+					/*
+					 * Found no matching expression, so we can simply skip
+					 * this dependency, because there's no chance it will be
+					 * fully covered.
+					 */
+					if (unique_attnum == InvalidAttrNumber)
+					{
+						skip = true;
+						break;
+					}
+
+					/* otherwise remap it to the new attnum */
+					dep->attributes[j] = unique_attnum;
+				}
+
+				/* if found a matching dependency, keep it */
+				if (!skip)
+				{
+					/* maybe we've skipped something earlier, so move it */
+					if (ndeps != i)
+						deps->deps[ndeps] = deps->deps[i];
+
+					ndeps++;
+				}
+			}
+
+			deps->ndeps = ndeps;
+		}
+
+		/*
+		 * It's possible we've removed all dependencies, in which case we
+		 * don't bother adding it to the list.
+		 */
+		if (deps->ndeps > 0)
+		{
+			func_dependencies[nfunc_dependencies] = deps;
+			total_ndeps += deps->ndeps;
+			nfunc_dependencies++;
+		}
 	}
 
 	/* if no matching stats could be found then we've nothing to do */
@@ -1300,6 +1784,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 		pfree(func_dependencies);
 		bms_free(clauses_attnums);
 		pfree(list_attnums);
+		pfree(unique_exprs);
 		return 1.0;
 	}
 
@@ -1347,6 +1832,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	pfree(func_dependencies);
 	bms_free(clauses_attnums);
 	pfree(list_attnums);
+	pfree(unique_exprs);
 
 	return s1;
 }
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 7808c6a09c..8c75690fce 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -24,6 +24,7 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_statistic_ext_data.h"
+#include "executor/executor.h"
 #include "commands/progress.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
@@ -35,13 +36,16 @@
 #include "statistics/statistics.h"
 #include "utils/acl.h"
 #include "utils/array.h"
+#include "utils/attoptcache.h"
 #include "utils/builtins.h"
+#include "utils/datum.h"
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
 #include "utils/selfuncs.h"
 #include "utils/syscache.h"
+#include "utils/typcache.h"
 
 /*
  * To avoid consuming too much memory during analysis and/or too much space
@@ -66,18 +70,38 @@ typedef struct StatExtEntry
 	Bitmapset  *columns;		/* attribute numbers covered by the object */
 	List	   *types;			/* 'char' list of enabled statistics kinds */
 	int			stattarget;		/* statistics target (-1 for default) */
+	List	   *exprs;			/* expressions */
 } StatExtEntry;
 
 
 static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
-static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
+static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
 											int nvacatts, VacAttrStats **vacatts);
-static void statext_store(Oid relid,
+static void statext_store(Oid statOid,
 						  MVNDistinct *ndistinct, MVDependencies *dependencies,
-						  MCVList *mcv, VacAttrStats **stats);
+						  MCVList *mcv, Datum exprs, VacAttrStats **stats);
 static int	statext_compute_stattarget(int stattarget,
 									   int natts, VacAttrStats **stats);
 
+/* Information needed to analyze a single simple expression. */
+typedef struct AnlExprData
+{
+	Node	   *expr;			/* expression to analyze */
+	VacAttrStats *vacattrstat;	/* statistics attrs to analyze */
+} AnlExprData;
+
+static void compute_expr_stats(Relation onerel, double totalrows,
+							   AnlExprData * exprdata, int nexprs,
+							   HeapTuple *rows, int numrows);
+static Datum serialize_expr_stats(AnlExprData * exprdata, int nexprs);
+static Datum expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
+static AnlExprData *build_expr_data(List *exprs, int stattarget);
+
+static StatsBuildData *make_build_data(Relation onerel, StatExtEntry *stat,
+									   int numrows, HeapTuple *rows,
+									   VacAttrStats **stats, int stattarget);
+
+
 /*
  * Compute requested extended stats, using the rows sampled for the plain
  * (single-column) stats.
@@ -92,21 +116,25 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 {
 	Relation	pg_stext;
 	ListCell   *lc;
-	List	   *stats;
+	List	   *statslist;
 	MemoryContext cxt;
 	MemoryContext oldcxt;
 	int64		ext_cnt;
 
+	/* Do nothing if there are no columns to analyze. */
+	if (!natts)
+		return;
+
 	cxt = AllocSetContextCreate(CurrentMemoryContext,
 								"BuildRelationExtStatistics",
 								ALLOCSET_DEFAULT_SIZES);
 	oldcxt = MemoryContextSwitchTo(cxt);
 
 	pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
-	stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
+	statslist = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
 
 	/* report this phase */
-	if (stats != NIL)
+	if (statslist != NIL)
 	{
 		const int	index[] = {
 			PROGRESS_ANALYZE_PHASE,
@@ -114,28 +142,30 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 		};
 		const int64 val[] = {
 			PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS,
-			list_length(stats)
+			list_length(statslist)
 		};
 
 		pgstat_progress_update_multi_param(2, index, val);
 	}
 
 	ext_cnt = 0;
-	foreach(lc, stats)
+	foreach(lc, statslist)
 	{
 		StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
 		MVNDistinct *ndistinct = NULL;
 		MVDependencies *dependencies = NULL;
 		MCVList    *mcv = NULL;
+		Datum		exprstats = (Datum) 0;
 		VacAttrStats **stats;
 		ListCell   *lc2;
 		int			stattarget;
+		StatsBuildData *data;
 
 		/*
 		 * Check if we can build these stats based on the column analyzed. If
 		 * not, report this fact (except in autovacuum) and move on.
 		 */
-		stats = lookup_var_attr_stats(onerel, stat->columns,
+		stats = lookup_var_attr_stats(onerel, stat->columns, stat->exprs,
 									  natts, vacattrstats);
 		if (!stats)
 		{
@@ -150,10 +180,6 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 			continue;
 		}
 
-		/* check allowed number of dimensions */
-		Assert(bms_num_members(stat->columns) >= 2 &&
-			   bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
-
 		/* compute statistics target for this statistics */
 		stattarget = statext_compute_stattarget(stat->stattarget,
 												bms_num_members(stat->columns),
@@ -167,28 +193,49 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 		if (stattarget == 0)
 			continue;
 
+		/* evaluate expressions (if the statistics has any) */
+		data = make_build_data(onerel, stat, numrows, rows, stats, stattarget);
+
 		/* compute statistic of each requested type */
 		foreach(lc2, stat->types)
 		{
 			char		t = (char) lfirst_int(lc2);
 
 			if (t == STATS_EXT_NDISTINCT)
-				ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
-													stat->columns, stats);
+				ndistinct = statext_ndistinct_build(totalrows, data);
 			else if (t == STATS_EXT_DEPENDENCIES)
-				dependencies = statext_dependencies_build(numrows, rows,
-														  stat->columns, stats);
+				dependencies = statext_dependencies_build(data);
 			else if (t == STATS_EXT_MCV)
-				mcv = statext_mcv_build(numrows, rows, stat->columns, stats,
-										totalrows, stattarget);
+				mcv = statext_mcv_build(data, totalrows, stattarget);
+			else if (t == STATS_EXT_EXPRESSIONS)
+			{
+				AnlExprData *exprdata;
+				int			nexprs;
+
+				/* should not happen, thanks to checks when defining stats */
+				if (!stat->exprs)
+					elog(ERROR, "requested expression stats, but there are no expressions");
+
+				exprdata = build_expr_data(stat->exprs, stattarget);
+				nexprs = list_length(stat->exprs);
+
+				compute_expr_stats(onerel, totalrows,
+								   exprdata, nexprs,
+								   rows, numrows);
+
+				exprstats = serialize_expr_stats(exprdata, nexprs);
+			}
 		}
 
 		/* store the statistics in the catalog */
-		statext_store(stat->statOid, ndistinct, dependencies, mcv, stats);
+		statext_store(stat->statOid, ndistinct, dependencies, mcv, exprstats, stats);
 
 		/* for reporting progress */
 		pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED,
 									 ++ext_cnt);
+
+		/* free the build data (allocated as a single chunk) */
+		pfree(data);
 	}
 
 	table_close(pg_stext, RowExclusiveLock);
@@ -221,6 +268,10 @@ ComputeExtStatisticsRows(Relation onerel,
 	MemoryContext oldcxt;
 	int			result = 0;
 
+	/* If there are no columns to analyze, just return 0. */
+	if (!natts)
+		return 0;
+
 	cxt = AllocSetContextCreate(CurrentMemoryContext,
 								"ComputeExtStatisticsRows",
 								ALLOCSET_DEFAULT_SIZES);
@@ -241,7 +292,7 @@ ComputeExtStatisticsRows(Relation onerel,
 		 * analyzed. If not, ignore it (don't report anything, we'll do that
 		 * during the actual build BuildRelationExtStatistics).
 		 */
-		stats = lookup_var_attr_stats(onerel, stat->columns,
+		stats = lookup_var_attr_stats(onerel, stat->columns, stat->exprs,
 									  natts, vacattrstats);
 
 		if (!stats)
@@ -349,6 +400,10 @@ statext_is_kind_built(HeapTuple htup, char type)
 			attnum = Anum_pg_statistic_ext_data_stxdmcv;
 			break;
 
+		case STATS_EXT_EXPRESSIONS:
+			attnum = Anum_pg_statistic_ext_data_stxdexpr;
+			break;
+
 		default:
 			elog(ERROR, "unexpected statistics type requested: %d", type);
 	}
@@ -388,6 +443,7 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
 		ArrayType  *arr;
 		char	   *enabled;
 		Form_pg_statistic_ext staForm;
+		List	   *exprs = NIL;
 
 		entry = palloc0(sizeof(StatExtEntry));
 		staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
@@ -415,10 +471,40 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
 		{
 			Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
 				   (enabled[i] == STATS_EXT_DEPENDENCIES) ||
-				   (enabled[i] == STATS_EXT_MCV));
+				   (enabled[i] == STATS_EXT_MCV) ||
+				   (enabled[i] == STATS_EXT_EXPRESSIONS));
 			entry->types = lappend_int(entry->types, (int) enabled[i]);
 		}
 
+		/* decode expression (if any) */
+		datum = SysCacheGetAttr(STATEXTOID, htup,
+								Anum_pg_statistic_ext_stxexprs, &isnull);
+
+		if (!isnull)
+		{
+			char	   *exprsString;
+
+			exprsString = TextDatumGetCString(datum);
+			exprs = (List *) stringToNode(exprsString);
+
+			pfree(exprsString);
+
+			/*
+			 * Run the expressions through eval_const_expressions. This is not
+			 * just an optimization, but is necessary, because the planner
+			 * will be comparing them to similarly-processed qual clauses, and
+			 * may fail to detect valid matches without this.  We must not use
+			 * canonicalize_qual, however, since these aren't qual
+			 * expressions.
+			 */
+			exprs = (List *) eval_const_expressions(NULL, (Node *) exprs);
+
+			/* May as well fix opfuncids too */
+			fix_opfuncids((Node *) exprs);
+		}
+
+		entry->exprs = exprs;
+
 		result = lappend(result, entry);
 	}
 
@@ -427,6 +513,187 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
 	return result;
 }
 
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ */
+static VacAttrStats *
+examine_attribute(Node *expr)
+{
+	HeapTuple	typtuple;
+	VacAttrStats *stats;
+	int			i;
+	bool		ok;
+
+	/*
+	 * Create the VacAttrStats struct.  Note that we only have a copy of the
+	 * fixed fields of the pg_attribute tuple.
+	 */
+	stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
+
+	/* fake the attribute */
+	stats->attr = (Form_pg_attribute) palloc0(ATTRIBUTE_FIXED_PART_SIZE);
+	stats->attr->attstattarget = -1;
+
+	/*
+	 * When analyzing an expression, believe the expression tree's type not
+	 * the column datatype --- the latter might be the opckeytype storage
+	 * type of the opclass, which is not interesting for our purposes.  (Note:
+	 * if we did anything with non-expression statistics columns, we'd need to
+	 * figure out where to get the correct type info from, but for now that's
+	 * not a problem.)	It's not clear whether anyone will care about the
+	 * typmod, but we store that too just in case.
+	 */
+	stats->attrtypid = exprType(expr);
+	stats->attrtypmod = exprTypmod(expr);
+	stats->attrcollid = exprCollation(expr);
+
+	typtuple = SearchSysCacheCopy1(TYPEOID,
+								   ObjectIdGetDatum(stats->attrtypid));
+	if (!HeapTupleIsValid(typtuple))
+		elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
+	stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
+
+	/*
+	 * We don't actually analyze individual attributes, so no need to set the
+	 * memory context.
+	 */
+	stats->anl_context = NULL;
+	stats->tupattnum = InvalidAttrNumber;
+
+	/*
+	 * The fields describing the stats->stavalues[n] element types default to
+	 * the type of the data being analyzed, but the type-specific typanalyze
+	 * function can change them if it wants to store something else.
+	 */
+	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+	{
+		stats->statypid[i] = stats->attrtypid;
+		stats->statyplen[i] = stats->attrtype->typlen;
+		stats->statypbyval[i] = stats->attrtype->typbyval;
+		stats->statypalign[i] = stats->attrtype->typalign;
+	}
+
+	/*
+	 * Call the type-specific typanalyze function.  If none is specified, use
+	 * std_typanalyze().
+	 */
+	if (OidIsValid(stats->attrtype->typanalyze))
+		ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
+										   PointerGetDatum(stats)));
+	else
+		ok = std_typanalyze(stats);
+
+	if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
+	{
+		heap_freetuple(typtuple);
+		pfree(stats->attr);
+		pfree(stats);
+		return NULL;
+	}
+
+	return stats;
+}
+
+/*
+ * examine_expression -- pre-analysis of a single expression
+ *
+ * Determine whether the expression is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ */
+static VacAttrStats *
+examine_expression(Node *expr, int stattarget)
+{
+	HeapTuple	typtuple;
+	VacAttrStats *stats;
+	int			i;
+	bool		ok;
+
+	Assert(expr != NULL);
+
+	/*
+	 * Create the VacAttrStats struct.
+	 */
+	stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
+
+	/*
+	 * When analyzing an expression, believe the expression tree's type.
+	 */
+	stats->attrtypid = exprType(expr);
+	stats->attrtypmod = exprTypmod(expr);
+
+	/*
+	 * We don't allow collation to be specified in CREATE STATISTICS, so we
+	 * have to use the collation specified for the expression. It's possible
+	 * to specify the collation in the expression "(col COLLATE "en_US")" in
+	 * which case exprCollation() does the right thing.
+	 */
+	stats->attrcollid = exprCollation(expr);
+
+	/*
+	 * We don't have any pg_attribute for expressions, so let's fake something
+	 * reasonable into attstattarget, which is the only thing std_typanalyze
+	 * needs.
+	 */
+	stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
+
+	/*
+	 * We can't have statistics target specified for the expression, so we
+	 * could use either the default_statistics_target, or the target computed
+	 * for the extended statistics. The second option seems more reasonable.
+	 */
+	stats->attr->attstattarget = stattarget;
+
+	/* initialize some basic fields */
+	stats->attr->attrelid = InvalidOid;
+	stats->attr->attnum = InvalidAttrNumber;
+	stats->attr->atttypid = stats->attrtypid;
+
+	typtuple = SearchSysCacheCopy1(TYPEOID,
+								   ObjectIdGetDatum(stats->attrtypid));
+	if (!HeapTupleIsValid(typtuple))
+		elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
+
+	stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
+	stats->anl_context = CurrentMemoryContext;	/* XXX should be using
+												 * something else? */
+	stats->tupattnum = InvalidAttrNumber;
+
+	/*
+	 * The fields describing the stats->stavalues[n] element types default to
+	 * the type of the data being analyzed, but the type-specific typanalyze
+	 * function can change them if it wants to store something else.
+	 */
+	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+	{
+		stats->statypid[i] = stats->attrtypid;
+		stats->statyplen[i] = stats->attrtype->typlen;
+		stats->statypbyval[i] = stats->attrtype->typbyval;
+		stats->statypalign[i] = stats->attrtype->typalign;
+	}
+
+	/*
+	 * Call the type-specific typanalyze function.  If none is specified, use
+	 * std_typanalyze().
+	 */
+	if (OidIsValid(stats->attrtype->typanalyze))
+		ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
+										   PointerGetDatum(stats)));
+	else
+		ok = std_typanalyze(stats);
+
+	if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
+	{
+		heap_freetuple(typtuple);
+		pfree(stats);
+		return NULL;
+	}
+
+	return stats;
+}
+
 /*
  * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
  * VacAttrStats array which includes only the items corresponding to
@@ -435,15 +702,18 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
  * to the caller that the stats should not be built.
  */
 static VacAttrStats **
-lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
+lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
 					  int nvacatts, VacAttrStats **vacatts)
 {
 	int			i = 0;
 	int			x = -1;
+	int			natts;
 	VacAttrStats **stats;
+	ListCell   *lc;
 
-	stats = (VacAttrStats **)
-		palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
+	natts = bms_num_members(attrs) + list_length(exprs);
+
+	stats = (VacAttrStats **) palloc(natts * sizeof(VacAttrStats *));
 
 	/* lookup VacAttrStats info for the requested columns (same attnum) */
 	while ((x = bms_next_member(attrs, x)) >= 0)
@@ -480,6 +750,24 @@ lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
 		i++;
 	}
 
+	/* also add info for expressions */
+	foreach(lc, exprs)
+	{
+		Node	   *expr = (Node *) lfirst(lc);
+
+		stats[i] = examine_attribute(expr);
+
+		/*
+		 * XXX We need tuple descriptor later, and we just grab it from
+		 * stats[0]->tupDesc (see e.g. statext_mcv_build). But as coded
+		 * examine_attribute does not set that, so just grab it from the first
+		 * vacatts element.
+		 */
+		stats[i]->tupDesc = vacatts[0]->tupDesc;
+
+		i++;
+	}
+
 	return stats;
 }
 
@@ -491,7 +779,7 @@ lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
 static void
 statext_store(Oid statOid,
 			  MVNDistinct *ndistinct, MVDependencies *dependencies,
-			  MCVList *mcv, VacAttrStats **stats)
+			  MCVList *mcv, Datum exprs, VacAttrStats **stats)
 {
 	Relation	pg_stextdata;
 	HeapTuple	stup,
@@ -532,11 +820,17 @@ statext_store(Oid statOid,
 		nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = (data == NULL);
 		values[Anum_pg_statistic_ext_data_stxdmcv - 1] = PointerGetDatum(data);
 	}
+	if (exprs != (Datum) 0)
+	{
+		nulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = false;
+		values[Anum_pg_statistic_ext_data_stxdexpr - 1] = exprs;
+	}
 
 	/* always replace the value (either by bytea or NULL) */
 	replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
 	replaces[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
 	replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
+	replaces[Anum_pg_statistic_ext_data_stxdexpr - 1] = true;
 
 	/* there should already be a pg_statistic_ext_data tuple */
 	oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statOid));
@@ -668,7 +962,7 @@ compare_datums_simple(Datum a, Datum b, SortSupport ssup)
  * is not necessary here (and when querying the bitmap).
  */
 AttrNumber *
-build_attnums_array(Bitmapset *attrs, int *numattrs)
+build_attnums_array(Bitmapset *attrs, int nexprs, int *numattrs)
 {
 	int			i,
 				j;
@@ -684,16 +978,19 @@ build_attnums_array(Bitmapset *attrs, int *numattrs)
 	j = -1;
 	while ((j = bms_next_member(attrs, j)) >= 0)
 	{
+		AttrNumber	attnum = (j - nexprs);
+
 		/*
 		 * Make sure the bitmap contains only user-defined attributes. As
 		 * bitmaps can't contain negative values, this can be violated in two
 		 * ways. Firstly, the bitmap might contain 0 as a member, and secondly
 		 * the integer value might be larger than MaxAttrNumber.
 		 */
-		Assert(AttrNumberIsForUserDefinedAttr(j));
-		Assert(j <= MaxAttrNumber);
+		Assert(AttributeNumberIsValid(attnum));
+		Assert(attnum <= MaxAttrNumber);
+		Assert(attnum >= (-nexprs));
 
-		attnums[i++] = (AttrNumber) j;
+		attnums[i++] = (AttrNumber) attnum;
 
 		/* protect against overflows */
 		Assert(i <= num);
@@ -710,29 +1007,31 @@ build_attnums_array(Bitmapset *attrs, int *numattrs)
  * can simply pfree the return value to release all of it.
  */
 SortItem *
-build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
-				   MultiSortSupport mss, int numattrs, AttrNumber *attnums)
+build_sorted_items(StatsBuildData *data, int *nitems,
+				   MultiSortSupport mss,
+				   int numattrs, AttrNumber *attnums)
 {
 	int			i,
 				j,
 				len,
-				idx;
-	int			nvalues = numrows * numattrs;
+				nrows;
+	int			nvalues = data->numrows * numattrs;
 
 	SortItem   *items;
 	Datum	   *values;
 	bool	   *isnull;
 	char	   *ptr;
+	int		   *typlen;
 
 	/* Compute the total amount of memory we need (both items and values). */
-	len = numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
+	len = data->numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
 
 	/* Allocate the memory and split it into the pieces. */
 	ptr = palloc0(len);
 
 	/* items to sort */
 	items = (SortItem *) ptr;
-	ptr += numrows * sizeof(SortItem);
+	ptr += data->numrows * sizeof(SortItem);
 
 	/* values and null flags */
 	values = (Datum *) ptr;
@@ -745,21 +1044,47 @@ build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
 	Assert((ptr - (char *) items) == len);
 
 	/* fix the pointers to Datum and bool arrays */
-	idx = 0;
-	for (i = 0; i < numrows; i++)
+	nrows = 0;
+	for (i = 0; i < data->numrows; i++)
+	{
+		items[nrows].values = &values[nrows * numattrs];
+		items[nrows].isnull = &isnull[nrows * numattrs];
+
+		nrows++;
+	}
+
+	/* build a local cache of typlen for all attributes */
+	typlen = (int *) palloc(sizeof(int) * data->nattnums);
+	for (i = 0; i < data->nattnums; i++)
+		typlen[i] = get_typlen(data->stats[i]->attrtypid);
+
+	nrows = 0;
+	for (i = 0; i < data->numrows; i++)
 	{
 		bool		toowide = false;
 
-		items[idx].values = &values[idx * numattrs];
-		items[idx].isnull = &isnull[idx * numattrs];
-
 		/* load the values/null flags from sample rows */
 		for (j = 0; j < numattrs; j++)
 		{
 			Datum		value;
 			bool		isnull;
+			int			attlen;
+			AttrNumber	attnum = attnums[j];
 
-			value = heap_getattr(rows[i], attnums[j], tdesc, &isnull);
+			int			idx;
+
+			/* match attnum to the pre-calculated data */
+			for (idx = 0; idx < data->nattnums; idx++)
+			{
+				if (attnum == data->attnums[idx])
+					break;
+			}
+
+			Assert(idx < data->nattnums);
+
+			value = data->values[idx][i];
+			isnull = data->nulls[idx][i];
+			attlen = typlen[idx];
 
 			/*
 			 * If this is a varlena value, check if it's too wide and if yes
@@ -770,8 +1095,7 @@ build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
 			 * on the assumption that those are small (below WIDTH_THRESHOLD)
 			 * and will be discarded at the end of analyze.
 			 */
-			if ((!isnull) &&
-				(TupleDescAttr(tdesc, attnums[j] - 1)->attlen == -1))
+			if ((!isnull) && (attlen == -1))
 			{
 				if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
 				{
@@ -782,21 +1106,21 @@ build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
 				value = PointerGetDatum(PG_DETOAST_DATUM(value));
 			}
 
-			items[idx].values[j] = value;
-			items[idx].isnull[j] = isnull;
+			items[nrows].values[j] = value;
+			items[nrows].isnull[j] = isnull;
 		}
 
 		if (toowide)
 			continue;
 
-		idx++;
+		nrows++;
 	}
 
 	/* store the actual number of items (ignoring the too-wide ones) */
-	*nitems = idx;
+	*nitems = nrows;
 
 	/* all items were too wide */
-	if (idx == 0)
+	if (nrows == 0)
 	{
 		/* everything is allocated as a single chunk */
 		pfree(items);
@@ -804,7 +1128,7 @@ build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
 	}
 
 	/* do the sort, using the multi-sort */
-	qsort_arg((void *) items, idx, sizeof(SortItem),
+	qsort_arg((void *) items, nrows, sizeof(SortItem),
 			  multi_sort_compare, mss);
 
 	return items;
@@ -830,6 +1154,63 @@ has_stats_of_kind(List *stats, char requiredkind)
 	return false;
 }
 
+/*
+ * stat_find_expression
+ *		Search for an expression in statistics object's list of expressions.
+ *
+ * Returns the index of the expression in the statistics object's list of
+ * expressions, or -1 if not found.
+ */
+static int
+stat_find_expression(StatisticExtInfo *stat, Node *expr)
+{
+	ListCell   *lc;
+	int			idx;
+
+	idx = 0;
+	foreach(lc, stat->exprs)
+	{
+		Node	   *stat_expr = (Node *) lfirst(lc);
+
+		if (equal(stat_expr, expr))
+			return idx;
+		idx++;
+	}
+
+	/* Expression not found */
+	return -1;
+}
+
+/*
+ * stat_covers_expressions
+ * 		Test whether a statistics object covers all expressions in a list.
+ *
+ * Returns true if all expressions are covered.  If expr_idxs is non-NULL, it
+ * is populated with the indexes of the expressions found.
+ */
+static bool
+stat_covers_expressions(StatisticExtInfo *stat, List *exprs,
+						Bitmapset **expr_idxs)
+{
+	ListCell   *lc;
+
+	foreach(lc, exprs)
+	{
+		Node	   *expr = (Node *) lfirst(lc);
+		int			expr_idx;
+
+		expr_idx = stat_find_expression(stat, expr);
+		if (expr_idx == -1)
+			return false;
+
+		if (expr_idxs != NULL)
+			*expr_idxs = bms_add_member(*expr_idxs, expr_idx);
+	}
+
+	/* If we reach here, all expressions are covered */
+	return true;
+}
+
 /*
  * choose_best_statistics
  *		Look for and return statistics with the specified 'requiredkind' which
@@ -850,7 +1231,8 @@ has_stats_of_kind(List *stats, char requiredkind)
  */
 StatisticExtInfo *
 choose_best_statistics(List *stats, char requiredkind,
-					   Bitmapset **clause_attnums, int nclauses)
+					   Bitmapset **clause_attnums, List **clause_exprs,
+					   int nclauses)
 {
 	ListCell   *lc;
 	StatisticExtInfo *best_match = NULL;
@@ -861,7 +1243,8 @@ choose_best_statistics(List *stats, char requiredkind,
 	{
 		int			i;
 		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
-		Bitmapset  *matched = NULL;
+		Bitmapset  *matched_attnums = NULL;
+		Bitmapset  *matched_exprs = NULL;
 		int			num_matched;
 		int			numkeys;
 
@@ -870,35 +1253,43 @@ choose_best_statistics(List *stats, char requiredkind,
 			continue;
 
 		/*
-		 * Collect attributes in remaining (unestimated) clauses fully covered
-		 * by this statistic object.
+		 * Collect attributes and expressions in remaining (unestimated)
+		 * clauses fully covered by this statistic object.
 		 */
 		for (i = 0; i < nclauses; i++)
 		{
+			Bitmapset  *expr_idxs = NULL;
+
 			/* ignore incompatible/estimated clauses */
-			if (!clause_attnums[i])
+			if (!clause_attnums[i] && !clause_exprs[i])
 				continue;
 
 			/* ignore clauses that are not covered by this object */
-			if (!bms_is_subset(clause_attnums[i], info->keys))
+			if (!bms_is_subset(clause_attnums[i], info->keys) ||
+				!stat_covers_expressions(info, clause_exprs[i], &expr_idxs))
 				continue;
 
-			matched = bms_add_members(matched, clause_attnums[i]);
+			/* record attnums and indexes of expressions covered */
+			matched_attnums = bms_add_members(matched_attnums, clause_attnums[i]);
+			matched_exprs = bms_add_members(matched_exprs, expr_idxs);
 		}
 
-		num_matched = bms_num_members(matched);
-		bms_free(matched);
+		num_matched = bms_num_members(matched_attnums) + bms_num_members(matched_exprs);
+
+		bms_free(matched_attnums);
+		bms_free(matched_exprs);
 
 		/*
 		 * save the actual number of keys in the stats so that we can choose
 		 * the narrowest stats with the most matching keys.
 		 */
-		numkeys = bms_num_members(info->keys);
+		numkeys = bms_num_members(info->keys) + list_length(info->exprs);
 
 		/*
-		 * Use this object when it increases the number of matched clauses or
-		 * when it matches the same number of attributes but these stats have
-		 * fewer keys than any previous match.
+		 * Use this object when it increases the number of matched attributes
+		 * and expressions or when it matches the same number of attributes
+		 * and expressions but these stats have fewer keys than any previous
+		 * match.
 		 */
 		if (num_matched > best_num_matched ||
 			(num_matched == best_num_matched && numkeys < best_match_keys))
@@ -923,7 +1314,8 @@ choose_best_statistics(List *stats, char requiredkind,
  */
 static bool
 statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
-									  Index relid, Bitmapset **attnums)
+									  Index relid, Bitmapset **attnums,
+									  List **exprs)
 {
 	/* Look inside any binary-compatible relabeling (as in examine_variable) */
 	if (IsA(clause, RelabelType))
@@ -951,19 +1343,19 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 		return true;
 	}
 
-	/* (Var op Const) or (Const op Var) */
+	/* (Var/Expr op Const) or (Const op Var/Expr) */
 	if (is_opclause(clause))
 	{
 		RangeTblEntry *rte = root->simple_rte_array[relid];
 		OpExpr	   *expr = (OpExpr *) clause;
-		Var		   *var;
+		Node	   *clause_expr;
 
 		/* Only expressions with two arguments are considered compatible. */
 		if (list_length(expr->args) != 2)
 			return false;
 
-		/* Check if the expression has the right shape (one Var, one Const) */
-		if (!examine_clause_args(expr->args, &var, NULL, NULL))
+		/* Check if the expression has the right shape */
+		if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL))
 			return false;
 
 		/*
@@ -981,7 +1373,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			case F_SCALARLESEL:
 			case F_SCALARGTSEL:
 			case F_SCALARGESEL:
-				/* supported, will continue with inspection of the Var */
+				/* supported, will continue with inspection of the Var/Expr */
 				break;
 
 			default:
@@ -1003,23 +1395,29 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			!get_func_leakproof(get_opcode(expr->opno)))
 			return false;
 
-		return statext_is_compatible_clause_internal(root, (Node *) var,
-													 relid, attnums);
+		/* Check (Var op Const) or (Const op Var) clauses by recursing. */
+		if (IsA(clause_expr, Var))
+			return statext_is_compatible_clause_internal(root, clause_expr,
+														 relid, attnums, exprs);
+
+		/* Otherwise we have (Expr op Const) or (Const op Expr). */
+		*exprs = lappend(*exprs, clause_expr);
+		return true;
 	}
 
-	/* Var IN Array */
+	/* Var/Expr IN Array */
 	if (IsA(clause, ScalarArrayOpExpr))
 	{
 		RangeTblEntry *rte = root->simple_rte_array[relid];
 		ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
-		Var		   *var;
+		Node	   *clause_expr;
 
 		/* Only expressions with two arguments are considered compatible. */
 		if (list_length(expr->args) != 2)
 			return false;
 
 		/* Check if the expression has the right shape (one Var, one Const) */
-		if (!examine_clause_args(expr->args, &var, NULL, NULL))
+		if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL))
 			return false;
 
 		/*
@@ -1037,7 +1435,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			case F_SCALARLESEL:
 			case F_SCALARGTSEL:
 			case F_SCALARGESEL:
-				/* supported, will continue with inspection of the Var */
+				/* supported, will continue with inspection of the Var/Expr */
 				break;
 
 			default:
@@ -1059,8 +1457,14 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			!get_func_leakproof(get_opcode(expr->opno)))
 			return false;
 
-		return statext_is_compatible_clause_internal(root, (Node *) var,
-													 relid, attnums);
+		/* Check Var IN Array clauses by recursing. */
+		if (IsA(clause_expr, Var))
+			return statext_is_compatible_clause_internal(root, clause_expr,
+														 relid, attnums, exprs);
+
+		/* Otherwise we have Expr IN Array. */
+		*exprs = lappend(*exprs, clause_expr);
+		return true;
 	}
 
 	/* AND/OR/NOT clause */
@@ -1093,54 +1497,62 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
 			 */
 			if (!statext_is_compatible_clause_internal(root,
 													   (Node *) lfirst(lc),
-													   relid, attnums))
+													   relid, attnums, exprs))
 				return false;
 		}
 
 		return true;
 	}
 
-	/* Var IS NULL */
+	/* Var/Expr IS NULL */
 	if (IsA(clause, NullTest))
 	{
 		NullTest   *nt = (NullTest *) clause;
 
-		/*
-		 * Only simple (Var IS NULL) expressions supported for now. Maybe we
-		 * could use examine_variable to fix this?
-		 */
-		if (!IsA(nt->arg, Var))
-			return false;
+		/* Check Var IS NULL clauses by recursing. */
+		if (IsA(nt->arg, Var))
+			return statext_is_compatible_clause_internal(root, (Node *) (nt->arg),
+														 relid, attnums, exprs);
 
-		return statext_is_compatible_clause_internal(root, (Node *) (nt->arg),
-													 relid, attnums);
+		/* Otherwise we have Expr IS NULL. */
+		*exprs = lappend(*exprs, nt->arg);
+		return true;
 	}
 
-	return false;
+	/*
+	 * Treat any other expressions as bare expressions to be matched against
+	 * expressions in statistics objects.
+	 */
+	*exprs = lappend(*exprs, clause);
+	return true;
 }
 
 /*
  * statext_is_compatible_clause
  *		Determines if the clause is compatible with MCV lists.
  *
- * Currently, we only support three types of clauses:
+ * Currently, we only support the following types of clauses:
  *
- * (a) OpExprs of the form (Var op Const), or (Const op Var), where the op
- * is one of ("=", "<", ">", ">=", "<=")
+ * (a) OpExprs of the form (Var/Expr op Const), or (Const op Var/Expr), where
+ * the op is one of ("=", "<", ">", ">=", "<=")
  *
- * (b) (Var IS [NOT] NULL)
+ * (b) (Var/Expr IS [NOT] NULL)
  *
  * (c) combinations using AND/OR/NOT
  *
+ * (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (array)) or (Var/Expr
+ * op ALL (array))
+ *
  * In the future, the range of supported clauses may be expanded to more
  * complex cases, for example (Var op Var).
  */
 static bool
 statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
-							 Bitmapset **attnums)
+							 Bitmapset **attnums, List **exprs)
 {
 	RangeTblEntry *rte = root->simple_rte_array[relid];
 	RestrictInfo *rinfo = (RestrictInfo *) clause;
+	int			clause_relid;
 	Oid			userid;
 
 	/*
@@ -1160,7 +1572,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
 		foreach(lc, expr->args)
 		{
 			if (!statext_is_compatible_clause(root, (Node *) lfirst(lc),
-											  relid, attnums))
+											  relid, attnums, exprs))
 				return false;
 		}
 
@@ -1175,25 +1587,36 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
 	if (rinfo->pseudoconstant)
 		return false;
 
-	/* clauses referencing multiple varnos are incompatible */
-	if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
+	/* Clauses referencing other varnos are incompatible. */
+	if (!bms_get_singleton_member(rinfo->clause_relids, &clause_relid) ||
+		clause_relid != relid)
 		return false;
 
 	/* Check the clause and determine what attributes it references. */
 	if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause,
-											   relid, attnums))
+											   relid, attnums, exprs))
 		return false;
 
 	/*
-	 * Check that the user has permission to read all these attributes.  Use
+	 * Check that the user has permission to read all required attributes. Use
 	 * checkAsUser if it's set, in case we're accessing the table via a view.
 	 */
 	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
 
 	if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) != ACLCHECK_OK)
 	{
+		Bitmapset  *clause_attnums;
+
 		/* Don't have table privilege, must check individual columns */
-		if (bms_is_member(InvalidAttrNumber, *attnums))
+		if (*exprs != NIL)
+		{
+			pull_varattnos((Node *) exprs, relid, &clause_attnums);
+			clause_attnums = bms_add_members(clause_attnums, *attnums);
+		}
+		else
+			clause_attnums = *attnums;
+
+		if (bms_is_member(InvalidAttrNumber, clause_attnums))
 		{
 			/* Have a whole-row reference, must have access to all columns */
 			if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT,
@@ -1205,7 +1628,7 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
 			/* Check the columns referenced by the clause */
 			int			attnum = -1;
 
-			while ((attnum = bms_next_member(*attnums, attnum)) >= 0)
+			while ((attnum = bms_next_member(clause_attnums, attnum)) >= 0)
 			{
 				if (pg_attribute_aclcheck(rte->relid, attnum, userid,
 										  ACL_SELECT) != ACLCHECK_OK)
@@ -1259,7 +1682,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 								   bool is_or)
 {
 	ListCell   *l;
-	Bitmapset **list_attnums;
+	Bitmapset **list_attnums;	/* attnums extracted from the clause */
+	List	  **list_exprs;		/* expressions matched to any statistic */
 	int			listidx;
 	Selectivity sel = (is_or) ? 0.0 : 1.0;
 
@@ -1270,13 +1694,16 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
 										 list_length(clauses));
 
+	/* expressions extracted from complex expressions */
+	list_exprs = (List **) palloc(sizeof(Node *) * list_length(clauses));
+
 	/*
-	 * Pre-process the clauses list to extract the attnums seen in each item.
-	 * We need to determine if there's any clauses which will be useful for
-	 * selectivity estimations with extended stats. Along the way we'll record
-	 * all of the attnums for each clause in a list which we'll reference
-	 * later so we don't need to repeat the same work again. We'll also keep
-	 * track of all attnums seen.
+	 * Pre-process the clauses list to extract the attnums and expressions
+	 * seen in each item.  We need to determine if there are any clauses which
+	 * will be useful for selectivity estimations with extended stats.  Along
+	 * the way we'll record all of the attnums and expressions for each clause
+	 * in lists which we'll reference later so we don't need to repeat the
+	 * same work again.
 	 *
 	 * We also skip clauses that we already estimated using different types of
 	 * statistics (we treat them as incompatible).
@@ -1286,12 +1713,19 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	{
 		Node	   *clause = (Node *) lfirst(l);
 		Bitmapset  *attnums = NULL;
+		List	   *exprs = NIL;
 
 		if (!bms_is_member(listidx, *estimatedclauses) &&
-			statext_is_compatible_clause(root, clause, rel->relid, &attnums))
+			statext_is_compatible_clause(root, clause, rel->relid, &attnums, &exprs))
+		{
 			list_attnums[listidx] = attnums;
+			list_exprs[listidx] = exprs;
+		}
 		else
+		{
 			list_attnums[listidx] = NULL;
+			list_exprs[listidx] = NIL;
+		}
 
 		listidx++;
 	}
@@ -1305,7 +1739,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 
 		/* find the best suited statistics object for these attnums */
 		stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV,
-									  list_attnums, list_length(clauses));
+									  list_attnums, list_exprs,
+									  list_length(clauses));
 
 		/*
 		 * if no (additional) matching stats could be found then we've nothing
@@ -1320,28 +1755,39 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 		/* now filter the clauses to be estimated using the selected MCV */
 		stat_clauses = NIL;
 
-		/* record which clauses are simple (single column) */
+		/* record which clauses are simple (single column or expression) */
 		simple_clauses = NULL;
 
 		listidx = 0;
 		foreach(l, clauses)
 		{
 			/*
-			 * If the clause is compatible with the selected statistics, mark
-			 * it as estimated and add it to the list to estimate.
+			 * If the clause is not already estimated and is compatible with
+			 * the selected statistics object (all attributes and expressions
+			 * covered), mark it as estimated and add it to the list to
+			 * estimate.
 			 */
-			if (list_attnums[listidx] != NULL &&
-				bms_is_subset(list_attnums[listidx], stat->keys))
+			if (!bms_is_member(listidx, *estimatedclauses) &&
+				bms_is_subset(list_attnums[listidx], stat->keys) &&
+				stat_covers_expressions(stat, list_exprs[listidx], NULL))
 			{
-				if (bms_membership(list_attnums[listidx]) == BMS_SINGLETON)
+				/* record simple clauses (single column or expression) */
+				if ((list_attnums[listidx] == NULL &&
+					 list_length(list_exprs[listidx]) == 1) ||
+					(list_exprs[listidx] == NIL &&
+					 bms_membership(list_attnums[listidx]) == BMS_SINGLETON))
 					simple_clauses = bms_add_member(simple_clauses,
 													list_length(stat_clauses));
 
+				/* add clause to list and mark as estimated */
 				stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
 				*estimatedclauses = bms_add_member(*estimatedclauses, listidx);
 
 				bms_free(list_attnums[listidx]);
 				list_attnums[listidx] = NULL;
+
+				list_free(list_exprs[listidx]);
+				list_exprs[listidx] = NULL;
 			}
 
 			listidx++;
@@ -1530,23 +1976,24 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
 }
 
 /*
- * examine_opclause_expression
- *		Split expression into Var and Const parts.
+ * examine_opclause_args
+ *		Split an operator expression's arguments into Expr and Const parts.
  *
- * Attempts to match the arguments to either (Var op Const) or (Const op Var),
- * possibly with a RelabelType on top. When the expression matches this form,
- * returns true, otherwise returns false.
+ * Attempts to match the arguments to either (Expr op Const) or (Const op
+ * Expr), possibly with a RelabelType on top. When the expression matches this
+ * form, returns true, otherwise returns false.
  *
- * Optionally returns pointers to the extracted Var/Const nodes, when passed
- * non-null pointers (varp, cstp and varonleftp). The varonleftp flag specifies
- * on which side of the operator we found the Var node.
+ * Optionally returns pointers to the extracted Expr/Const nodes, when passed
+ * non-null pointers (exprp, cstp and expronleftp). The expronleftp flag
+ * specifies on which side of the operator we found the expression node.
  */
 bool
-examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp)
+examine_opclause_args(List *args, Node **exprp, Const **cstp,
+					  bool *expronleftp)
 {
-	Var		   *var;
+	Node	   *expr;
 	Const	   *cst;
-	bool		varonleft;
+	bool		expronleft;
 	Node	   *leftop,
 			   *rightop;
 
@@ -1563,30 +2010,564 @@ examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp)
 	if (IsA(rightop, RelabelType))
 		rightop = (Node *) ((RelabelType *) rightop)->arg;
 
-	if (IsA(leftop, Var) && IsA(rightop, Const))
+	if (IsA(rightop, Const))
 	{
-		var = (Var *) leftop;
+		expr = (Node *) leftop;
 		cst = (Const *) rightop;
-		varonleft = true;
+		expronleft = true;
 	}
-	else if (IsA(leftop, Const) && IsA(rightop, Var))
+	else if (IsA(leftop, Const))
 	{
-		var = (Var *) rightop;
+		expr = (Node *) rightop;
 		cst = (Const *) leftop;
-		varonleft = false;
+		expronleft = false;
 	}
 	else
 		return false;
 
 	/* return pointers to the extracted parts if requested */
-	if (varp)
-		*varp = var;
+	if (exprp)
+		*exprp = expr;
 
 	if (cstp)
 		*cstp = cst;
 
-	if (varonleftp)
-		*varonleftp = varonleft;
+	if (expronleftp)
+		*expronleftp = expronleft;
 
 	return true;
 }
+
+
+/*
+ * Compute statistics about expressions of a relation.
+ */
+static void
+compute_expr_stats(Relation onerel, double totalrows,
+				   AnlExprData *exprdata, int nexprs,
+				   HeapTuple *rows, int numrows)
+{
+	MemoryContext expr_context,
+				old_context;
+	int			ind,
+				i;
+
+	expr_context = AllocSetContextCreate(CurrentMemoryContext,
+										 "Analyze Expression",
+										 ALLOCSET_DEFAULT_SIZES);
+	old_context = MemoryContextSwitchTo(expr_context);
+
+	for (ind = 0; ind < nexprs; ind++)
+	{
+		AnlExprData *thisdata = &exprdata[ind];
+		VacAttrStats *stats = thisdata->vacattrstat;
+		Node	   *expr = thisdata->expr;
+		TupleTableSlot *slot;
+		EState	   *estate;
+		ExprContext *econtext;
+		Datum	   *exprvals;
+		bool	   *exprnulls;
+		ExprState  *exprstate;
+		int			tcnt;
+
+		/* Are we still in the main context? */
+		Assert(CurrentMemoryContext == expr_context);
+
+		/*
+		 * Need an EState for evaluation of expressions.  Create it in the
+		 * per-expression context to be sure it gets cleaned up at the bottom
+		 * of the loop.
+		 */
+		estate = CreateExecutorState();
+		econtext = GetPerTupleExprContext(estate);
+
+		/* Set up expression evaluation state */
+		exprstate = ExecPrepareExpr((Expr *) expr, estate);
+
+		/* Need a slot to hold the current heap tuple, too */
+		slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel),
+										&TTSOpsHeapTuple);
+
+		/* Arrange for econtext's scan tuple to be the tuple under test */
+		econtext->ecxt_scantuple = slot;
+
+		/* Compute and save expression values */
+		exprvals = (Datum *) palloc(numrows * sizeof(Datum));
+		exprnulls = (bool *) palloc(numrows * sizeof(bool));
+
+		tcnt = 0;
+		for (i = 0; i < numrows; i++)
+		{
+			Datum		datum;
+			bool		isnull;
+
+			/*
+			 * Reset the per-tuple context each time, to reclaim any cruft
+			 * left behind by evaluating the statistics expressions.
+			 */
+			ResetExprContext(econtext);
+
+			/* Set up for expression evaluation */
+			ExecStoreHeapTuple(rows[i], slot, false);
+
+			/*
+			 * Evaluate the expression. We do this in the per-tuple context so
+			 * as not to leak memory, and then copy the result into the
+			 * context created at the beginning of this function.
+			 */
+			datum = ExecEvalExprSwitchContext(exprstate,
+											  GetPerTupleExprContext(estate),
+											  &isnull);
+			if (isnull)
+			{
+				exprvals[tcnt] = (Datum) 0;
+				exprnulls[tcnt] = true;
+			}
+			else
+			{
+				/* Make sure we copy the data into the context. */
+				Assert(CurrentMemoryContext == expr_context);
+
+				exprvals[tcnt] = datumCopy(datum,
+										   stats->attrtype->typbyval,
+										   stats->attrtype->typlen);
+				exprnulls[tcnt] = false;
+			}
+
+			tcnt++;
+		}
+
+		/*
+		 * Now we can compute the statistics for the expression columns.
+		 *
+		 * XXX Unlike compute_index_stats we don't need to switch and reset
+		 * memory contexts here, because we're only computing stats for a
+		 * single expression (and not iterating over many indexes), so we just
+		 * do it in expr_context. Note that compute_stats copies the result
+		 * into stats->anl_context, so it does not disappear.
+		 */
+		if (tcnt > 0)
+		{
+			AttributeOpts *aopt =
+			get_attribute_options(stats->attr->attrelid,
+								  stats->attr->attnum);
+
+			stats->exprvals = exprvals;
+			stats->exprnulls = exprnulls;
+			stats->rowstride = 1;
+			stats->compute_stats(stats,
+								 expr_fetch_func,
+								 tcnt,
+								 tcnt);
+
+			/*
+			 * If the n_distinct option is specified, it overrides the above
+			 * computation.
+			 */
+			if (aopt != NULL && aopt->n_distinct != 0.0)
+				stats->stadistinct = aopt->n_distinct;
+		}
+
+		/* And clean up */
+		MemoryContextSwitchTo(expr_context);
+
+		ExecDropSingleTupleTableSlot(slot);
+		FreeExecutorState(estate);
+		MemoryContextResetAndDeleteChildren(expr_context);
+	}
+
+	MemoryContextSwitchTo(old_context);
+	MemoryContextDelete(expr_context);
+}
+
+
+/*
+ * Fetch function for analyzing statistics object expressions.
+ *
+ * We have not bothered to construct tuples from the data, instead the data
+ * is just in Datum arrays.
+ */
+static Datum
+expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
+{
+	int			i;
+
+	/* exprvals and exprnulls are already offset for proper column */
+	i = rownum * stats->rowstride;
+	*isNull = stats->exprnulls[i];
+	return stats->exprvals[i];
+}
+
+/*
+ * Build analyze data for a list of expressions. As this is not tied
+ * directly to a relation (table or index), we have to fake some of
+ * the fields in examine_expression().
+ */
+static AnlExprData *
+build_expr_data(List *exprs, int stattarget)
+{
+	int			idx;
+	int			nexprs = list_length(exprs);
+	AnlExprData *exprdata;
+	ListCell   *lc;
+
+	exprdata = (AnlExprData *) palloc0(nexprs * sizeof(AnlExprData));
+
+	idx = 0;
+	foreach(lc, exprs)
+	{
+		Node	   *expr = (Node *) lfirst(lc);
+		AnlExprData *thisdata = &exprdata[idx];
+
+		thisdata->expr = expr;
+		thisdata->vacattrstat = examine_expression(expr, stattarget);
+		idx++;
+	}
+
+	return exprdata;
+}
+
+/* form an array of pg_statistic rows (per update_attstats) */
+static Datum
+serialize_expr_stats(AnlExprData *exprdata, int nexprs)
+{
+	int			exprno;
+	Oid			typOid;
+	Relation	sd;
+
+	ArrayBuildState *astate = NULL;
+
+	sd = table_open(StatisticRelationId, RowExclusiveLock);
+
+	/* lookup OID of composite type for pg_statistic */
+	typOid = get_rel_type_id(StatisticRelationId);
+	if (!OidIsValid(typOid))
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("relation \"pg_statistic\" does not have a composite type")));
+
+	for (exprno = 0; exprno < nexprs; exprno++)
+	{
+		int			i,
+					k;
+		VacAttrStats *stats = exprdata[exprno].vacattrstat;
+
+		Datum		values[Natts_pg_statistic];
+		bool		nulls[Natts_pg_statistic];
+		HeapTuple	stup;
+
+		if (!stats->stats_valid)
+		{
+			astate = accumArrayResult(astate,
+									  (Datum) 0,
+									  true,
+									  typOid,
+									  CurrentMemoryContext);
+			continue;
+		}
+
+		/*
+		 * Construct a new pg_statistic tuple
+		 */
+		for (i = 0; i < Natts_pg_statistic; ++i)
+		{
+			nulls[i] = false;
+		}
+
+		values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(InvalidOid);
+		values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(InvalidAttrNumber);
+		values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(false);
+		values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
+		values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
+		values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
+		i = Anum_pg_statistic_stakind1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+		}
+		i = Anum_pg_statistic_staop1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			values[i++] = ObjectIdGetDatum(stats->staop[k]);	/* staopN */
+		}
+		i = Anum_pg_statistic_stacoll1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			values[i++] = ObjectIdGetDatum(stats->stacoll[k]);	/* stacollN */
+		}
+		i = Anum_pg_statistic_stanumbers1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			int			nnum = stats->numnumbers[k];
+
+			if (nnum > 0)
+			{
+				int			n;
+				Datum	   *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
+				ArrayType  *arry;
+
+				for (n = 0; n < nnum; n++)
+					numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+				/* XXX knows more than it should about type float4: */
+				arry = construct_array(numdatums, nnum,
+									   FLOAT4OID,
+									   sizeof(float4), true, TYPALIGN_INT);
+				values[i++] = PointerGetDatum(arry);	/* stanumbersN */
+			}
+			else
+			{
+				nulls[i] = true;
+				values[i++] = (Datum) 0;
+			}
+		}
+		i = Anum_pg_statistic_stavalues1 - 1;
+		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+		{
+			if (stats->numvalues[k] > 0)
+			{
+				ArrayType  *arry;
+
+				arry = construct_array(stats->stavalues[k],
+									   stats->numvalues[k],
+									   stats->statypid[k],
+									   stats->statyplen[k],
+									   stats->statypbyval[k],
+									   stats->statypalign[k]);
+				values[i++] = PointerGetDatum(arry);	/* stavaluesN */
+			}
+			else
+			{
+				nulls[i] = true;
+				values[i++] = (Datum) 0;
+			}
+		}
+
+		stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
+
+		astate = accumArrayResult(astate,
+								  heap_copy_tuple_as_datum(stup, RelationGetDescr(sd)),
+								  false,
+								  typOid,
+								  CurrentMemoryContext);
+	}
+
+	table_close(sd, RowExclusiveLock);
+
+	return makeArrayResult(astate, CurrentMemoryContext);
+}
+
+/*
+ * Loads pg_statistic record from expression statistics for expression
+ * identified by the supplied index.
+ */
+HeapTuple
+statext_expressions_load(Oid stxoid, int idx)
+{
+	bool		isnull;
+	Datum		value;
+	HeapTuple	htup;
+	ExpandedArrayHeader *eah;
+	HeapTupleHeader td;
+	HeapTupleData tmptup;
+	HeapTuple	tup;
+
+	htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(stxoid));
+	if (!HeapTupleIsValid(htup))
+		elog(ERROR, "cache lookup failed for statistics object %u", stxoid);
+
+	value = SysCacheGetAttr(STATEXTDATASTXOID, htup,
+							Anum_pg_statistic_ext_data_stxdexpr, &isnull);
+	if (isnull)
+		elog(ERROR,
+			 "requested statistics kind \"%c\" is not yet built for statistics object %u",
+			 STATS_EXT_DEPENDENCIES, stxoid);
+
+	eah = DatumGetExpandedArray(value);
+
+	deconstruct_expanded_array(eah);
+
+	td = DatumGetHeapTupleHeader(eah->dvalues[idx]);
+
+	/* Build a temporary HeapTuple control structure */
+	tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
+	tmptup.t_data = td;
+
+	tup = heap_copytuple(&tmptup);
+
+	ReleaseSysCache(htup);
+
+	return tup;
+}
+
+/*
+ * Evaluate the expressions, so that we can use the results to build
+ * all the requested statistics types. This matters especially for
+ * expensive expressions, of course.
+ */
+static StatsBuildData *
+make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows,
+				VacAttrStats **stats, int stattarget)
+{
+	/* evaluated expressions */
+	StatsBuildData *result;
+	char	   *ptr;
+	Size		len;
+
+	int			i;
+	int			k;
+	int			idx;
+	TupleTableSlot *slot;
+	EState	   *estate;
+	ExprContext *econtext;
+	List	   *exprstates = NIL;
+	int			nkeys = bms_num_members(stat->columns) + list_length(stat->exprs);
+	ListCell   *lc;
+
+	/* allocate everything as a single chunk, so we can free it easily */
+	len = MAXALIGN(sizeof(StatsBuildData));
+	len += MAXALIGN(sizeof(AttrNumber) * nkeys);	/* attnums */
+	len += MAXALIGN(sizeof(VacAttrStats *) * nkeys);	/* stats */
+
+	/* values */
+	len += MAXALIGN(sizeof(Datum *) * nkeys);
+	len += nkeys * MAXALIGN(sizeof(Datum) * numrows);
+
+	/* nulls */
+	len += MAXALIGN(sizeof(bool *) * nkeys);
+	len += nkeys * MAXALIGN(sizeof(bool) * numrows);
+
+	ptr = palloc(len);
+
+	/* set the pointers */
+	result = (StatsBuildData *) ptr;
+	ptr += MAXALIGN(sizeof(StatsBuildData));
+
+	/* attnums */
+	result->attnums = (AttrNumber *) ptr;
+	ptr += MAXALIGN(sizeof(AttrNumber) * nkeys);
+
+	/* stats */
+	result->stats = (VacAttrStats **) ptr;
+	ptr += MAXALIGN(sizeof(VacAttrStats *) * nkeys);
+
+	/* values */
+	result->values = (Datum **) ptr;
+	ptr += MAXALIGN(sizeof(Datum *) * nkeys);
+
+	/* nulls */
+	result->nulls = (bool **) ptr;
+	ptr += MAXALIGN(sizeof(bool *) * nkeys);
+
+	for (i = 0; i < nkeys; i++)
+	{
+		result->values[i] = (Datum *) ptr;
+		ptr += MAXALIGN(sizeof(Datum) * numrows);
+
+		result->nulls[i] = (bool *) ptr;
+		ptr += MAXALIGN(sizeof(bool) * numrows);
+	}
+
+	Assert((ptr - (char *) result) == len);
+
+	/* we have it allocated, so let's fill the values */
+	result->nattnums = nkeys;
+	result->numrows = numrows;
+
+	/* fill the attribute info - first attributes, then expressions */
+	idx = 0;
+	k = -1;
+	while ((k = bms_next_member(stat->columns, k)) >= 0)
+	{
+		result->attnums[idx] = k;
+		result->stats[idx] = stats[idx];
+
+		idx++;
+	}
+
+	k = -1;
+	foreach(lc, stat->exprs)
+	{
+		Node	   *expr = (Node *) lfirst(lc);
+
+		result->attnums[idx] = k;
+		result->stats[idx] = examine_expression(expr, stattarget);
+
+		idx++;
+		k--;
+	}
+
+	/* first extract values for all the regular attributes */
+	for (i = 0; i < numrows; i++)
+	{
+		idx = 0;
+		k = -1;
+		while ((k = bms_next_member(stat->columns, k)) >= 0)
+		{
+			result->values[idx][i] = heap_getattr(rows[i], k,
+												  result->stats[idx]->tupDesc,
+												  &result->nulls[idx][i]);
+
+			idx++;
+		}
+	}
+
+	/* Need an EState for evaluation expressions. */
+	estate = CreateExecutorState();
+	econtext = GetPerTupleExprContext(estate);
+
+	/* Need a slot to hold the current heap tuple, too */
+	slot = MakeSingleTupleTableSlot(RelationGetDescr(rel),
+									&TTSOpsHeapTuple);
+
+	/* Arrange for econtext's scan tuple to be the tuple under test */
+	econtext->ecxt_scantuple = slot;
+
+	/* Set up expression evaluation state */
+	exprstates = ExecPrepareExprList(stat->exprs, estate);
+
+	for (i = 0; i < numrows; i++)
+	{
+		/*
+		 * Reset the per-tuple context each time, to reclaim any cruft left
+		 * behind by evaluating the statistics object expressions.
+		 */
+		ResetExprContext(econtext);
+
+		/* Set up for expression evaluation */
+		ExecStoreHeapTuple(rows[i], slot, false);
+
+		idx = bms_num_members(stat->columns);
+		foreach(lc, exprstates)
+		{
+			Datum		datum;
+			bool		isnull;
+			ExprState  *exprstate = (ExprState *) lfirst(lc);
+
+			/*
+			 * XXX This probably leaks memory. Maybe we should use
+			 * ExecEvalExprSwitchContext but then we need to copy the result
+			 * somewhere else.
+			 */
+			datum = ExecEvalExpr(exprstate,
+								 GetPerTupleExprContext(estate),
+								 &isnull);
+			if (isnull)
+			{
+				result->values[idx][i] = (Datum) 0;
+				result->nulls[idx][i] = true;
+			}
+			else
+			{
+				result->values[idx][i] = (Datum) datum;
+				result->nulls[idx][i] = false;
+			}
+
+			idx++;
+		}
+	}
+
+	ExecDropSingleTupleTableSlot(slot);
+	FreeExecutorState(estate);
+
+	return result;
+}
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 8335dff241..2a00fb4848 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -74,7 +74,7 @@
 	 ((ndims) * sizeof(DimensionInfo)) + \
 	 ((nitems) * ITEM_SIZE(ndims)))
 
-static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs);
+static MultiSortSupport build_mss(StatsBuildData *data);
 
 static SortItem *build_distinct_groups(int numrows, SortItem *items,
 									   MultiSortSupport mss, int *ndistinct);
@@ -181,32 +181,33 @@ get_mincount_for_mcv_list(int samplerows, double totalrows)
  *
  */
 MCVList *
-statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
-				  VacAttrStats **stats, double totalrows, int stattarget)
+statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
 {
 	int			i,
 				numattrs,
+				numrows,
 				ngroups,
 				nitems;
-	AttrNumber *attnums;
 	double		mincount;
 	SortItem   *items;
 	SortItem   *groups;
 	MCVList    *mcvlist = NULL;
 	MultiSortSupport mss;
 
-	attnums = build_attnums_array(attrs, &numattrs);
-
 	/* comparator for all the columns */
-	mss = build_mss(stats, numattrs);
+	mss = build_mss(data);
 
 	/* sort the rows */
-	items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
-							   mss, numattrs, attnums);
+	items = build_sorted_items(data, &nitems, mss,
+							   data->nattnums, data->attnums);
 
 	if (!items)
 		return NULL;
 
+	/* for convenience */
+	numattrs = data->nattnums;
+	numrows = data->numrows;
+
 	/* transform the sorted rows into groups (sorted by frequency) */
 	groups = build_distinct_groups(nitems, items, mss, &ngroups);
 
@@ -289,7 +290,7 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
 
 		/* store info about data type OIDs */
 		for (i = 0; i < numattrs; i++)
-			mcvlist->types[i] = stats[i]->attrtypid;
+			mcvlist->types[i] = data->stats[i]->attrtypid;
 
 		/* Copy the first chunk of groups into the result. */
 		for (i = 0; i < nitems; i++)
@@ -347,9 +348,10 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
  *	build MultiSortSupport for the attributes passed in attrs
  */
 static MultiSortSupport
-build_mss(VacAttrStats **stats, int numattrs)
+build_mss(StatsBuildData *data)
 {
 	int			i;
+	int			numattrs = data->nattnums;
 
 	/* Sort by multiple columns (using array of SortSupport) */
 	MultiSortSupport mss = multi_sort_init(numattrs);
@@ -357,7 +359,7 @@ build_mss(VacAttrStats **stats, int numattrs)
 	/* prepare the sort functions for all the attributes */
 	for (i = 0; i < numattrs; i++)
 	{
-		VacAttrStats *colstat = stats[i];
+		VacAttrStats *colstat = data->stats[i];
 		TypeCacheEntry *type;
 
 		type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
@@ -1523,6 +1525,59 @@ pg_mcv_list_send(PG_FUNCTION_ARGS)
 	return byteasend(fcinfo);
 }
 
+/*
+ * match the attribute/expression to a dimension of the statistic
+ *
+ * Match the attribute/expression to statistics dimension. Optionally
+ * determine the collation.
+ */
+static int
+mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
+{
+	int			idx = -1;
+
+	if (IsA(expr, Var))
+	{
+		/* simple Var, so just lookup using varattno */
+		Var		   *var = (Var *) expr;
+
+		if (collid)
+			*collid = var->varcollid;
+
+		idx = bms_member_index(keys, var->varattno);
+
+		/* make sure the index is valid */
+		Assert((idx >= 0) && (idx <= bms_num_members(keys)));
+	}
+	else
+	{
+		ListCell   *lc;
+
+		/* expressions are stored after the simple columns */
+		idx = bms_num_members(keys);
+
+		if (collid)
+			*collid = exprCollation(expr);
+
+		/* expression - lookup in stats expressions */
+		foreach(lc, exprs)
+		{
+			Node	   *stat_expr = (Node *) lfirst(lc);
+
+			if (equal(expr, stat_expr))
+				break;
+
+			idx++;
+		}
+
+		/* make sure the index is valid */
+		Assert((idx >= bms_num_members(keys)) &&
+			   (idx <= bms_num_members(keys) + list_length(exprs)));
+	}
+
+	return idx;
+}
+
 /*
  * mcv_get_match_bitmap
  *	Evaluate clauses using the MCV list, and update the match bitmap.
@@ -1544,7 +1599,8 @@ pg_mcv_list_send(PG_FUNCTION_ARGS)
  */
 static bool *
 mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
-					 Bitmapset *keys, MCVList *mcvlist, bool is_or)
+					 Bitmapset *keys, List *exprs,
+					 MCVList *mcvlist, bool is_or)
 {
 	int			i;
 	ListCell   *l;
@@ -1582,77 +1638,78 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			OpExpr	   *expr = (OpExpr *) clause;
 			FmgrInfo	opproc;
 
-			/* valid only after examine_clause_args returns true */
-			Var		   *var;
+			/* valid only after examine_opclause_args returns true */
+			Node	   *clause_expr;
 			Const	   *cst;
-			bool		varonleft;
+			bool		expronleft;
+			int			idx;
+			Oid			collid;
 
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
-			/* extract the var and const from the expression */
-			if (examine_clause_args(expr->args, &var, &cst, &varonleft))
-			{
-				int			idx;
+			/* extract the var/expr and const from the expression */
+			if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
+				elog(ERROR, "incompatible clause");
 
-				/* match the attribute to a dimension of the statistic */
-				idx = bms_member_index(keys, var->varattno);
+			/* match the attribute/expression to a dimension of the statistic */
+			idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
+
+			/*
+			 * Walk through the MCV items and evaluate the current clause. We
+			 * can skip items that were already ruled out, and terminate if
+			 * there are no remaining MCV items that might possibly match.
+			 */
+			for (i = 0; i < mcvlist->nitems; i++)
+			{
+				bool		match = true;
+				MCVItem    *item = &mcvlist->items[i];
+
+				Assert(idx >= 0);
 
 				/*
-				 * Walk through the MCV items and evaluate the current clause.
-				 * We can skip items that were already ruled out, and
-				 * terminate if there are no remaining MCV items that might
-				 * possibly match.
+				 * When the MCV item or the Const value is NULL we can treat
+				 * this as a mismatch. We must not call the operator because
+				 * of strictness.
 				 */
-				for (i = 0; i < mcvlist->nitems; i++)
+				if (item->isnull[idx] || cst->constisnull)
 				{
-					bool		match = true;
-					MCVItem    *item = &mcvlist->items[i];
-
-					/*
-					 * When the MCV item or the Const value is NULL we can
-					 * treat this as a mismatch. We must not call the operator
-					 * because of strictness.
-					 */
-					if (item->isnull[idx] || cst->constisnull)
-					{
-						matches[i] = RESULT_MERGE(matches[i], is_or, false);
-						continue;
-					}
-
-					/*
-					 * Skip MCV items that can't change result in the bitmap.
-					 * Once the value gets false for AND-lists, or true for
-					 * OR-lists, we don't need to look at more clauses.
-					 */
-					if (RESULT_IS_FINAL(matches[i], is_or))
-						continue;
-
-					/*
-					 * First check whether the constant is below the lower
-					 * boundary (in that case we can skip the bucket, because
-					 * there's no overlap).
-					 *
-					 * We don't store collations used to build the statistics,
-					 * but we can use the collation for the attribute itself,
-					 * as stored in varcollid. We do reset the statistics
-					 * after a type change (including collation change), so
-					 * this is OK. We may need to relax this after allowing
-					 * extended statistics on expressions.
-					 */
-					if (varonleft)
-						match = DatumGetBool(FunctionCall2Coll(&opproc,
-															   var->varcollid,
-															   item->values[idx],
-															   cst->constvalue));
-					else
-						match = DatumGetBool(FunctionCall2Coll(&opproc,
-															   var->varcollid,
-															   cst->constvalue,
-															   item->values[idx]));
-
-					/* update the match bitmap with the result */
-					matches[i] = RESULT_MERGE(matches[i], is_or, match);
+					matches[i] = RESULT_MERGE(matches[i], is_or, false);
+					continue;
 				}
+
+				/*
+				 * Skip MCV items that can't change result in the bitmap. Once
+				 * the value gets false for AND-lists, or true for OR-lists,
+				 * we don't need to look at more clauses.
+				 */
+				if (RESULT_IS_FINAL(matches[i], is_or))
+					continue;
+
+				/*
+				 * First check whether the constant is below the lower
+				 * boundary (in that case we can skip the bucket, because
+				 * there's no overlap).
+				 *
+				 * We don't store collations used to build the statistics, but
+				 * we can use the collation for the attribute itself, as
+				 * stored in varcollid. We do reset the statistics after a
+				 * type change (including collation change), so this is OK.
+				 * For expressions we use the collation extracted from the
+				 * expression itself.
+				 */
+				if (expronleft)
+					match = DatumGetBool(FunctionCall2Coll(&opproc,
+														   collid,
+														   item->values[idx],
+														   cst->constvalue));
+				else
+					match = DatumGetBool(FunctionCall2Coll(&opproc,
+														   collid,
+														   cst->constvalue,
+														   item->values[idx]));
+
+				/* update the match bitmap with the result */
+				matches[i] = RESULT_MERGE(matches[i], is_or, match);
 			}
 		}
 		else if (IsA(clause, ScalarArrayOpExpr))
@@ -1660,115 +1717,116 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
 			FmgrInfo	opproc;
 
-			/* valid only after examine_clause_args returns true */
-			Var		   *var;
+			/* valid only after examine_opclause_args returns true */
+			Node	   *clause_expr;
 			Const	   *cst;
-			bool		varonleft;
+			bool		expronleft;
+			Oid			collid;
+			int			idx;
+
+			/* array evaluation */
+			ArrayType  *arrayval;
+			int16		elmlen;
+			bool		elmbyval;
+			char		elmalign;
+			int			num_elems;
+			Datum	   *elem_values;
+			bool	   *elem_nulls;
 
 			fmgr_info(get_opcode(expr->opno), &opproc);
 
-			/* extract the var and const from the expression */
-			if (examine_clause_args(expr->args, &var, &cst, &varonleft))
+			/* extract the var/expr and const from the expression */
+			if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
+				elog(ERROR, "incompatible clause");
+
+			/* ScalarArrayOpExpr has the Var always on the left */
+			Assert(expronleft);
+
+			/* XXX what if (cst->constisnull == NULL)? */
+			if (!cst->constisnull)
 			{
-				int			idx;
+				arrayval = DatumGetArrayTypeP(cst->constvalue);
+				get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
+									 &elmlen, &elmbyval, &elmalign);
+				deconstruct_array(arrayval,
+								  ARR_ELEMTYPE(arrayval),
+								  elmlen, elmbyval, elmalign,
+								  &elem_values, &elem_nulls, &num_elems);
+			}
 
-				ArrayType  *arrayval;
-				int16		elmlen;
-				bool		elmbyval;
-				char		elmalign;
-				int			num_elems;
-				Datum	   *elem_values;
-				bool	   *elem_nulls;
+			/* match the attribute/expression to a dimension of the statistic */
+			idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
 
-				/* ScalarArrayOpExpr has the Var always on the left */
-				Assert(varonleft);
-
-				if (!cst->constisnull)
-				{
-					arrayval = DatumGetArrayTypeP(cst->constvalue);
-					get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
-										 &elmlen, &elmbyval, &elmalign);
-					deconstruct_array(arrayval,
-									  ARR_ELEMTYPE(arrayval),
-									  elmlen, elmbyval, elmalign,
-									  &elem_values, &elem_nulls, &num_elems);
-				}
-
-				/* match the attribute to a dimension of the statistic */
-				idx = bms_member_index(keys, var->varattno);
+			/*
+			 * Walk through the MCV items and evaluate the current clause. We
+			 * can skip items that were already ruled out, and terminate if
+			 * there are no remaining MCV items that might possibly match.
+			 */
+			for (i = 0; i < mcvlist->nitems; i++)
+			{
+				int			j;
+				bool		match = (expr->useOr ? false : true);
+				MCVItem    *item = &mcvlist->items[i];
 
 				/*
-				 * Walk through the MCV items and evaluate the current clause.
-				 * We can skip items that were already ruled out, and
-				 * terminate if there are no remaining MCV items that might
-				 * possibly match.
+				 * When the MCV item or the Const value is NULL we can treat
+				 * this as a mismatch. We must not call the operator because
+				 * of strictness.
 				 */
-				for (i = 0; i < mcvlist->nitems; i++)
+				if (item->isnull[idx] || cst->constisnull)
 				{
-					int			j;
-					bool		match = (expr->useOr ? false : true);
-					MCVItem    *item = &mcvlist->items[i];
-
-					/*
-					 * When the MCV item or the Const value is NULL we can
-					 * treat this as a mismatch. We must not call the operator
-					 * because of strictness.
-					 */
-					if (item->isnull[idx] || cst->constisnull)
-					{
-						matches[i] = RESULT_MERGE(matches[i], is_or, false);
-						continue;
-					}
-
-					/*
-					 * Skip MCV items that can't change result in the bitmap.
-					 * Once the value gets false for AND-lists, or true for
-					 * OR-lists, we don't need to look at more clauses.
-					 */
-					if (RESULT_IS_FINAL(matches[i], is_or))
-						continue;
-
-					for (j = 0; j < num_elems; j++)
-					{
-						Datum		elem_value = elem_values[j];
-						bool		elem_isnull = elem_nulls[j];
-						bool		elem_match;
-
-						/* NULL values always evaluate as not matching. */
-						if (elem_isnull)
-						{
-							match = RESULT_MERGE(match, expr->useOr, false);
-							continue;
-						}
-
-						/*
-						 * Stop evaluating the array elements once we reach
-						 * match value that can't change - ALL() is the same
-						 * as AND-list, ANY() is the same as OR-list.
-						 */
-						if (RESULT_IS_FINAL(match, expr->useOr))
-							break;
-
-						elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
-																	var->varcollid,
-																	item->values[idx],
-																	elem_value));
-
-						match = RESULT_MERGE(match, expr->useOr, elem_match);
-					}
-
-					/* update the match bitmap with the result */
-					matches[i] = RESULT_MERGE(matches[i], is_or, match);
+					matches[i] = RESULT_MERGE(matches[i], is_or, false);
+					continue;
 				}
+
+				/*
+				 * Skip MCV items that can't change result in the bitmap. Once
+				 * the value gets false for AND-lists, or true for OR-lists,
+				 * we don't need to look at more clauses.
+				 */
+				if (RESULT_IS_FINAL(matches[i], is_or))
+					continue;
+
+				for (j = 0; j < num_elems; j++)
+				{
+					Datum		elem_value = elem_values[j];
+					bool		elem_isnull = elem_nulls[j];
+					bool		elem_match;
+
+					/* NULL values always evaluate as not matching. */
+					if (elem_isnull)
+					{
+						match = RESULT_MERGE(match, expr->useOr, false);
+						continue;
+					}
+
+					/*
+					 * Stop evaluating the array elements once we reach match
+					 * value that can't change - ALL() is the same as
+					 * AND-list, ANY() is the same as OR-list.
+					 */
+					if (RESULT_IS_FINAL(match, expr->useOr))
+						break;
+
+					elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
+																collid,
+																item->values[idx],
+																elem_value));
+
+					match = RESULT_MERGE(match, expr->useOr, elem_match);
+				}
+
+				/* update the match bitmap with the result */
+				matches[i] = RESULT_MERGE(matches[i], is_or, match);
 			}
 		}
 		else if (IsA(clause, NullTest))
 		{
 			NullTest   *expr = (NullTest *) clause;
-			Var		   *var = (Var *) (expr->arg);
+			Node	   *clause_expr = (Node *) (expr->arg);
 
-			/* match the attribute to a dimension of the statistic */
-			int			idx = bms_member_index(keys, var->varattno);
+			/* match the attribute/expression to a dimension of the statistic */
+			int			idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
 
 			/*
 			 * Walk through the MCV items and evaluate the current clause. We
@@ -1811,7 +1869,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			Assert(list_length(bool_clauses) >= 2);
 
 			/* build the match bitmap for the OR-clauses */
-			bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys,
+			bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
 												mcvlist, is_orclause(clause));
 
 			/*
@@ -1839,7 +1897,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 			Assert(list_length(not_args) == 1);
 
 			/* build the match bitmap for the NOT-clause */
-			not_matches = mcv_get_match_bitmap(root, not_args, keys,
+			not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
 											   mcvlist, false);
 
 			/*
@@ -1982,7 +2040,8 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
 	mcv = statext_mcv_load(stat->statOid);
 
 	/* build a match bitmap for the clauses */
-	matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, false);
+	matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
+								   mcv, false);
 
 	/* sum frequencies for all the matching MCV items */
 	*basesel = 0.0;
@@ -2056,7 +2115,7 @@ mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
 
 	/* build the match bitmap for the new clause */
 	new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
-									   mcv, false);
+									   stat->exprs, mcv, false);
 
 	/*
 	 * Sum the frequencies for all the MCV items matching this clause and also
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
index e08c001e3f..4481312d61 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -36,8 +36,7 @@
 #include "utils/syscache.h"
 #include "utils/typcache.h"
 
-static double ndistinct_for_combination(double totalrows, int numrows,
-										HeapTuple *rows, VacAttrStats **stats,
+static double ndistinct_for_combination(double totalrows, StatsBuildData *data,
 										int k, int *combination);
 static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
 static int	n_choose_k(int n, int k);
@@ -81,15 +80,18 @@ static void generate_combinations(CombinationGenerator *state);
  *
  * This computes the ndistinct estimate using the same estimator used
  * in analyze.c and then computes the coefficient.
+ *
+ * To handle expressions easily, we treat them as system attributes with
+ * negative attnums, and offset everything by number of expressions to
+ * allow using Bitmapsets.
  */
 MVNDistinct *
-statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
-						Bitmapset *attrs, VacAttrStats **stats)
+statext_ndistinct_build(double totalrows, StatsBuildData *data)
 {
 	MVNDistinct *result;
 	int			k;
 	int			itemcnt;
-	int			numattrs = bms_num_members(attrs);
+	int			numattrs = data->nattnums;
 	int			numcombs = num_combinations(numattrs);
 
 	result = palloc(offsetof(MVNDistinct, items) +
@@ -112,13 +114,19 @@ statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
 			MVNDistinctItem *item = &result->items[itemcnt];
 			int			j;
 
-			item->attrs = NULL;
+			item->attributes = palloc(sizeof(AttrNumber) * k);
+			item->nattributes = k;
+
+			/* translate the indexes to attnums */
 			for (j = 0; j < k; j++)
-				item->attrs = bms_add_member(item->attrs,
-											 stats[combination[j]]->attr->attnum);
+			{
+				item->attributes[j] = data->attnums[combination[j]];
+
+				Assert(AttributeNumberIsValid(item->attributes[j]));
+			}
+
 			item->ndistinct =
-				ndistinct_for_combination(totalrows, numrows, rows,
-										  stats, k, combination);
+				ndistinct_for_combination(totalrows, data, k, combination);
 
 			itemcnt++;
 			Assert(itemcnt <= result->nitems);
@@ -189,7 +197,7 @@ statext_ndistinct_serialize(MVNDistinct *ndistinct)
 	{
 		int			nmembers;
 
-		nmembers = bms_num_members(ndistinct->items[i].attrs);
+		nmembers = ndistinct->items[i].nattributes;
 		Assert(nmembers >= 2);
 
 		len += SizeOfItem(nmembers);
@@ -214,22 +222,15 @@ statext_ndistinct_serialize(MVNDistinct *ndistinct)
 	for (i = 0; i < ndistinct->nitems; i++)
 	{
 		MVNDistinctItem item = ndistinct->items[i];
-		int			nmembers = bms_num_members(item.attrs);
-		int			x;
+		int			nmembers = item.nattributes;
 
 		memcpy(tmp, &item.ndistinct, sizeof(double));
 		tmp += sizeof(double);
 		memcpy(tmp, &nmembers, sizeof(int));
 		tmp += sizeof(int);
 
-		x = -1;
-		while ((x = bms_next_member(item.attrs, x)) >= 0)
-		{
-			AttrNumber	value = (AttrNumber) x;
-
-			memcpy(tmp, &value, sizeof(AttrNumber));
-			tmp += sizeof(AttrNumber);
-		}
+		memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers);
+		tmp += nmembers * sizeof(AttrNumber);
 
 		/* protect against overflows */
 		Assert(tmp <= ((char *) output + len));
@@ -301,27 +302,21 @@ statext_ndistinct_deserialize(bytea *data)
 	for (i = 0; i < ndistinct->nitems; i++)
 	{
 		MVNDistinctItem *item = &ndistinct->items[i];
-		int			nelems;
-
-		item->attrs = NULL;
 
 		/* ndistinct value */
 		memcpy(&item->ndistinct, tmp, sizeof(double));
 		tmp += sizeof(double);
 
 		/* number of attributes */
-		memcpy(&nelems, tmp, sizeof(int));
+		memcpy(&item->nattributes, tmp, sizeof(int));
 		tmp += sizeof(int);
-		Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS));
+		Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS));
 
-		while (nelems-- > 0)
-		{
-			AttrNumber	attno;
+		item->attributes
+			= (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber));
 
-			memcpy(&attno, tmp, sizeof(AttrNumber));
-			tmp += sizeof(AttrNumber);
-			item->attrs = bms_add_member(item->attrs, attno);
-		}
+		memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes);
+		tmp += sizeof(AttrNumber) * item->nattributes;
 
 		/* still within the bytea */
 		Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
@@ -369,17 +364,17 @@ pg_ndistinct_out(PG_FUNCTION_ARGS)
 
 	for (i = 0; i < ndist->nitems; i++)
 	{
+		int			j;
 		MVNDistinctItem item = ndist->items[i];
-		int			x = -1;
-		bool		first = true;
 
 		if (i > 0)
 			appendStringInfoString(&str, ", ");
 
-		while ((x = bms_next_member(item.attrs, x)) >= 0)
+		for (j = 0; j < item.nattributes; j++)
 		{
-			appendStringInfo(&str, "%s%d", first ? "\"" : ", ", x);
-			first = false;
+			AttrNumber	attnum = item.attributes[j];
+
+			appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum);
 		}
 		appendStringInfo(&str, "\": %d", (int) item.ndistinct);
 	}
@@ -427,8 +422,8 @@ pg_ndistinct_send(PG_FUNCTION_ARGS)
  * combination of multiple columns.
  */
 static double
-ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
-						  VacAttrStats **stats, int k, int *combination)
+ndistinct_for_combination(double totalrows, StatsBuildData *data,
+						  int k, int *combination)
 {
 	int			i,
 				j;
@@ -439,6 +434,7 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
 	Datum	   *values;
 	SortItem   *items;
 	MultiSortSupport mss;
+	int			numrows = data->numrows;
 
 	mss = multi_sort_init(k);
 
@@ -467,25 +463,27 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
 	 */
 	for (i = 0; i < k; i++)
 	{
-		VacAttrStats *colstat = stats[combination[i]];
+		Oid			typid;
 		TypeCacheEntry *type;
+		Oid			collid = InvalidOid;
+		VacAttrStats *colstat = data->stats[combination[i]];
 
-		type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
+		typid = colstat->attrtypid;
+		collid = colstat->attrcollid;
+
+		type = lookup_type_cache(typid, TYPECACHE_LT_OPR);
 		if (type->lt_opr == InvalidOid) /* shouldn't happen */
 			elog(ERROR, "cache lookup failed for ordering operator for type %u",
-				 colstat->attrtypid);
+				 typid);
 
 		/* prepare the sort function for this dimension */
-		multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
+		multi_sort_add_dimension(mss, i, type->lt_opr, collid);
 
 		/* accumulate all the data for this dimension into the arrays */
 		for (j = 0; j < numrows; j++)
 		{
-			items[j].values[i] =
-				heap_getattr(rows[j],
-							 colstat->attr->attnum,
-							 colstat->tupDesc,
-							 &items[j].isnull[i]);
+			items[j].values[i] = data->values[combination[i]][j];
+			items[j].isnull[i] = data->nulls[combination[i]][j];
 		}
 	}
 
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 729274b330..16c6f17e23 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1816,7 +1816,34 @@ ProcessUtilitySlow(ParseState *pstate,
 				break;
 
 			case T_CreateStatsStmt:
-				address = CreateStatistics((CreateStatsStmt *) parsetree);
+				{
+					Oid			relid;
+					CreateStatsStmt *stmt = (CreateStatsStmt *) parsetree;
+					RangeVar   *rel = (RangeVar *) linitial(stmt->relations);
+
+					if (!IsA(rel, RangeVar))
+						ereport(ERROR,
+								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								 errmsg("only a single relation is allowed in CREATE STATISTICS")));
+
+					/*
+					 * CREATE STATISTICS will influence future execution plans
+					 * but does not interfere with currently executing plans.
+					 * So it should be enough to take ShareUpdateExclusiveLock
+					 * on relation, conflicting with ANALYZE and other DDL
+					 * that sets statistical information, but not with normal
+					 * queries.
+					 *
+					 * XXX RangeVarCallbackOwnsRelation not needed here, to
+					 * keep the same behavior as before.
+					 */
+					relid = RangeVarGetRelid(rel, ShareUpdateExclusiveLock, false);
+
+					/* Run parse analysis ... */
+					stmt = transformStatsStmt(relid, stmt, queryString);
+
+					address = CreateStatistics(stmt);
+				}
 				break;
 
 			case T_AlterStatsStmt:
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index f0de2a25c9..3de98d2333 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -336,7 +336,8 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno,
 									bool attrsOnly, bool keysOnly,
 									bool showTblSpc, bool inherits,
 									int prettyFlags, bool missing_ok);
-static char *pg_get_statisticsobj_worker(Oid statextid, bool missing_ok);
+static char *pg_get_statisticsobj_worker(Oid statextid, bool columns_only,
+										 bool missing_ok);
 static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags,
 									  bool attrsOnly, bool missing_ok);
 static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
@@ -1507,7 +1508,36 @@ pg_get_statisticsobjdef(PG_FUNCTION_ARGS)
 	Oid			statextid = PG_GETARG_OID(0);
 	char	   *res;
 
-	res = pg_get_statisticsobj_worker(statextid, true);
+	res = pg_get_statisticsobj_worker(statextid, false, true);
+
+	if (res == NULL)
+		PG_RETURN_NULL();
+
+	PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+/*
+ * Internal version for use by ALTER TABLE.
+ * Includes a tablespace clause in the result.
+ * Returns a palloc'd C string; no pretty-printing.
+ */
+char *
+pg_get_statisticsobjdef_string(Oid statextid)
+{
+	return pg_get_statisticsobj_worker(statextid, false, false);
+}
+
+/*
+ * pg_get_statisticsobjdef_columns
+ *		Get columns and expressions for an extended statistics object
+ */
+Datum
+pg_get_statisticsobjdef_columns(PG_FUNCTION_ARGS)
+{
+	Oid			statextid = PG_GETARG_OID(0);
+	char	   *res;
+
+	res = pg_get_statisticsobj_worker(statextid, true, true);
 
 	if (res == NULL)
 		PG_RETURN_NULL();
@@ -1519,7 +1549,7 @@ pg_get_statisticsobjdef(PG_FUNCTION_ARGS)
  * Internal workhorse to decompile an extended statistics object.
  */
 static char *
-pg_get_statisticsobj_worker(Oid statextid, bool missing_ok)
+pg_get_statisticsobj_worker(Oid statextid, bool columns_only, bool missing_ok)
 {
 	Form_pg_statistic_ext statextrec;
 	HeapTuple	statexttup;
@@ -1534,6 +1564,11 @@ pg_get_statisticsobj_worker(Oid statextid, bool missing_ok)
 	bool		dependencies_enabled;
 	bool		mcv_enabled;
 	int			i;
+	List	   *context;
+	ListCell   *lc;
+	List	   *exprs = NIL;
+	bool		has_exprs;
+	int			ncolumns;
 
 	statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
 
@@ -1544,75 +1579,114 @@ pg_get_statisticsobj_worker(Oid statextid, bool missing_ok)
 		elog(ERROR, "cache lookup failed for statistics object %u", statextid);
 	}
 
+	/* has the statistics expressions? */
+	has_exprs = !heap_attisnull(statexttup, Anum_pg_statistic_ext_stxexprs, NULL);
+
 	statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup);
 
+	/*
+	 * Get the statistics expressions, if any.  (NOTE: we do not use the
+	 * relcache versions of the expressions, because we want to display
+	 * non-const-folded expressions.)
+	 */
+	if (has_exprs)
+	{
+		Datum		exprsDatum;
+		bool		isnull;
+		char	   *exprsString;
+
+		exprsDatum = SysCacheGetAttr(STATEXTOID, statexttup,
+									 Anum_pg_statistic_ext_stxexprs, &isnull);
+		Assert(!isnull);
+		exprsString = TextDatumGetCString(exprsDatum);
+		exprs = (List *) stringToNode(exprsString);
+		pfree(exprsString);
+	}
+	else
+		exprs = NIL;
+
+	/* count the number of columns (attributes and expressions) */
+	ncolumns = statextrec->stxkeys.dim1 + list_length(exprs);
+
 	initStringInfo(&buf);
 
-	nsp = get_namespace_name(statextrec->stxnamespace);
-	appendStringInfo(&buf, "CREATE STATISTICS %s",
-					 quote_qualified_identifier(nsp,
-												NameStr(statextrec->stxname)));
-
-	/*
-	 * Decode the stxkind column so that we know which stats types to print.
-	 */
-	datum = SysCacheGetAttr(STATEXTOID, statexttup,
-							Anum_pg_statistic_ext_stxkind, &isnull);
-	Assert(!isnull);
-	arr = DatumGetArrayTypeP(datum);
-	if (ARR_NDIM(arr) != 1 ||
-		ARR_HASNULL(arr) ||
-		ARR_ELEMTYPE(arr) != CHAROID)
-		elog(ERROR, "stxkind is not a 1-D char array");
-	enabled = (char *) ARR_DATA_PTR(arr);
-
-	ndistinct_enabled = false;
-	dependencies_enabled = false;
-	mcv_enabled = false;
-
-	for (i = 0; i < ARR_DIMS(arr)[0]; i++)
+	if (!columns_only)
 	{
-		if (enabled[i] == STATS_EXT_NDISTINCT)
-			ndistinct_enabled = true;
-		if (enabled[i] == STATS_EXT_DEPENDENCIES)
-			dependencies_enabled = true;
-		if (enabled[i] == STATS_EXT_MCV)
-			mcv_enabled = true;
-	}
+		nsp = get_namespace_name(statextrec->stxnamespace);
+		appendStringInfo(&buf, "CREATE STATISTICS %s",
+						 quote_qualified_identifier(nsp,
+													NameStr(statextrec->stxname)));
 
-	/*
-	 * If any option is disabled, then we'll need to append the types clause
-	 * to show which options are enabled.  We omit the types clause on purpose
-	 * when all options are enabled, so a pg_dump/pg_restore will create all
-	 * statistics types on a newer postgres version, if the statistics had all
-	 * options enabled on the original version.
-	 */
-	if (!ndistinct_enabled || !dependencies_enabled || !mcv_enabled)
-	{
-		bool		gotone = false;
+		/*
+		 * Decode the stxkind column so that we know which stats types to
+		 * print.
+		 */
+		datum = SysCacheGetAttr(STATEXTOID, statexttup,
+								Anum_pg_statistic_ext_stxkind, &isnull);
+		Assert(!isnull);
+		arr = DatumGetArrayTypeP(datum);
+		if (ARR_NDIM(arr) != 1 ||
+			ARR_HASNULL(arr) ||
+			ARR_ELEMTYPE(arr) != CHAROID)
+			elog(ERROR, "stxkind is not a 1-D char array");
+		enabled = (char *) ARR_DATA_PTR(arr);
 
-		appendStringInfoString(&buf, " (");
+		ndistinct_enabled = false;
+		dependencies_enabled = false;
+		mcv_enabled = false;
 
-		if (ndistinct_enabled)
+		for (i = 0; i < ARR_DIMS(arr)[0]; i++)
 		{
-			appendStringInfoString(&buf, "ndistinct");
-			gotone = true;
+			if (enabled[i] == STATS_EXT_NDISTINCT)
+				ndistinct_enabled = true;
+			else if (enabled[i] == STATS_EXT_DEPENDENCIES)
+				dependencies_enabled = true;
+			else if (enabled[i] == STATS_EXT_MCV)
+				mcv_enabled = true;
+
+			/* ignore STATS_EXT_EXPRESSIONS (it's built automatically) */
 		}
 
-		if (dependencies_enabled)
+		/*
+		 * If any option is disabled, then we'll need to append the types
+		 * clause to show which options are enabled.  We omit the types clause
+		 * on purpose when all options are enabled, so a pg_dump/pg_restore
+		 * will create all statistics types on a newer postgres version, if
+		 * the statistics had all options enabled on the original version.
+		 *
+		 * But if the statistics is defined on just a single column, it has to
+		 * be an expression statistics. In that case we don't need to specify
+		 * kinds.
+		 */
+		if ((!ndistinct_enabled || !dependencies_enabled || !mcv_enabled) &&
+			(ncolumns > 1))
 		{
-			appendStringInfo(&buf, "%sdependencies", gotone ? ", " : "");
-			gotone = true;
+			bool		gotone = false;
+
+			appendStringInfoString(&buf, " (");
+
+			if (ndistinct_enabled)
+			{
+				appendStringInfoString(&buf, "ndistinct");
+				gotone = true;
+			}
+
+			if (dependencies_enabled)
+			{
+				appendStringInfo(&buf, "%sdependencies", gotone ? ", " : "");
+				gotone = true;
+			}
+
+			if (mcv_enabled)
+				appendStringInfo(&buf, "%smcv", gotone ? ", " : "");
+
+			appendStringInfoChar(&buf, ')');
 		}
 
-		if (mcv_enabled)
-			appendStringInfo(&buf, "%smcv", gotone ? ", " : "");
-
-		appendStringInfoChar(&buf, ')');
+		appendStringInfoString(&buf, " ON ");
 	}
 
-	appendStringInfoString(&buf, " ON ");
-
+	/* decode simple column references */
 	for (colno = 0; colno < statextrec->stxkeys.dim1; colno++)
 	{
 		AttrNumber	attnum = statextrec->stxkeys.values[colno];
@@ -1626,14 +1700,109 @@ pg_get_statisticsobj_worker(Oid statextid, bool missing_ok)
 		appendStringInfoString(&buf, quote_identifier(attname));
 	}
 
-	appendStringInfo(&buf, " FROM %s",
-					 generate_relation_name(statextrec->stxrelid, NIL));
+	context = deparse_context_for(get_relation_name(statextrec->stxrelid),
+								  statextrec->stxrelid);
+
+	foreach(lc, exprs)
+	{
+		Node	   *expr = (Node *) lfirst(lc);
+		char	   *str;
+		int			prettyFlags = PRETTYFLAG_INDENT;
+
+		str = deparse_expression_pretty(expr, context, false, false,
+										prettyFlags, 0);
+
+		if (colno > 0)
+			appendStringInfoString(&buf, ", ");
+
+		/* Need parens if it's not a bare function call */
+		if (looks_like_function(expr))
+			appendStringInfoString(&buf, str);
+		else
+			appendStringInfo(&buf, "(%s)", str);
+
+		colno++;
+	}
+
+	if (!columns_only)
+		appendStringInfo(&buf, " FROM %s",
+						 generate_relation_name(statextrec->stxrelid, NIL));
 
 	ReleaseSysCache(statexttup);
 
 	return buf.data;
 }
 
+/*
+ * Generate text array of expressions for statistics object.
+ */
+Datum
+pg_get_statisticsobjdef_expressions(PG_FUNCTION_ARGS)
+{
+	Oid			statextid = PG_GETARG_OID(0);
+	Form_pg_statistic_ext statextrec;
+	HeapTuple	statexttup;
+	Datum		datum;
+	bool		isnull;
+	List	   *context;
+	ListCell   *lc;
+	List	   *exprs = NIL;
+	bool		has_exprs;
+	char	   *tmp;
+	ArrayBuildState *astate = NULL;
+
+	statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
+
+	if (!HeapTupleIsValid(statexttup))
+		elog(ERROR, "cache lookup failed for statistics object %u", statextid);
+
+	/* has the statistics expressions? */
+	has_exprs = !heap_attisnull(statexttup, Anum_pg_statistic_ext_stxexprs, NULL);
+
+	/* no expressions? we're done */
+	if (!has_exprs)
+	{
+		ReleaseSysCache(statexttup);
+		PG_RETURN_NULL();
+	}
+
+	statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup);
+
+	/*
+	 * Get the statistics expressions, and deparse them into text values.
+	 */
+	datum = SysCacheGetAttr(STATEXTOID, statexttup,
+							Anum_pg_statistic_ext_stxexprs, &isnull);
+
+	Assert(!isnull);
+	tmp = TextDatumGetCString(datum);
+	exprs = (List *) stringToNode(tmp);
+	pfree(tmp);
+
+	context = deparse_context_for(get_relation_name(statextrec->stxrelid),
+								  statextrec->stxrelid);
+
+	foreach(lc, exprs)
+	{
+		Node	   *expr = (Node *) lfirst(lc);
+		char	   *str;
+		int			prettyFlags = PRETTYFLAG_INDENT;
+
+		str = deparse_expression_pretty(expr, context, false, false,
+										prettyFlags, 0);
+
+		astate = accumArrayResult(astate,
+								  PointerGetDatum(cstring_to_text(str)),
+								  false,
+								  TEXTOID,
+								  CurrentMemoryContext);
+	}
+
+	ReleaseSysCache(statexttup);
+
+	PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
+
 /*
  * pg_get_partkeydef
  *
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 2348d4a772..7e41bc5641 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3430,6 +3430,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 		 * If examine_variable is able to deduce anything about the GROUP BY
 		 * expression, treat it as a single variable even if it's really more
 		 * complicated.
+		 *
+		 * XXX This has the consequence that if there's a statistics on the
+		 * expression, we don't split it into individual Vars. This affects
+		 * our selection of statistics in estimate_multivariate_ndistinct,
+		 * because it's probably better to use more accurate estimate for
+		 * each expression and treat them as independent, than to combine
+		 * estimates for the extracted variables when we don't know how that
+		 * relates to the expressions.
 		 */
 		examine_variable(root, groupexpr, 0, &vardata);
 		if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
@@ -3880,50 +3888,77 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 								List **varinfos, double *ndistinct)
 {
 	ListCell   *lc;
-	Bitmapset  *attnums = NULL;
-	int			nmatches;
+	int			nmatches_vars;
+	int			nmatches_exprs;
 	Oid			statOid = InvalidOid;
 	MVNDistinct *stats;
-	Bitmapset  *matched = NULL;
+	StatisticExtInfo *matched_info = NULL;
 
 	/* bail out immediately if the table has no extended statistics */
 	if (!rel->statlist)
 		return false;
 
-	/* Determine the attnums we're looking for */
-	foreach(lc, *varinfos)
-	{
-		GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
-		AttrNumber	attnum;
-
-		Assert(varinfo->rel == rel);
-
-		if (!IsA(varinfo->var, Var))
-			continue;
-
-		attnum = ((Var *) varinfo->var)->varattno;
-
-		if (!AttrNumberIsForUserDefinedAttr(attnum))
-			continue;
-
-		attnums = bms_add_member(attnums, attnum);
-	}
-
 	/* look for the ndistinct statistics matching the most vars */
-	nmatches = 1;				/* we require at least two matches */
+	nmatches_vars = 0;			/* we require at least two matches */
+	nmatches_exprs = 0;
 	foreach(lc, rel->statlist)
 	{
+		ListCell   *lc2;
 		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
-		Bitmapset  *shared;
-		int			nshared;
+		int			nshared_vars = 0;
+		int			nshared_exprs = 0;
 
 		/* skip statistics of other kinds */
 		if (info->kind != STATS_EXT_NDISTINCT)
 			continue;
 
-		/* compute attnums shared by the vars and the statistics object */
-		shared = bms_intersect(info->keys, attnums);
-		nshared = bms_num_members(shared);
+		/*
+		 * Determine how many expressions (and variables in non-matched
+		 * expressions) match. We'll then use these numbers to pick the
+		 * statistics object that best matches the clauses.
+		 */
+		foreach(lc2, *varinfos)
+		{
+			ListCell   *lc3;
+			GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
+			AttrNumber	attnum;
+
+			Assert(varinfo->rel == rel);
+
+			/* simple Var, search in statistics keys directly */
+			if (IsA(varinfo->var, Var))
+			{
+				attnum = ((Var *) varinfo->var)->varattno;
+
+				/*
+				 * Ignore system attributes - we don't support statistics on
+				 * them, so can't match them (and it'd fail as the values are
+				 * negative).
+				 */
+				if (!AttrNumberIsForUserDefinedAttr(attnum))
+					continue;
+
+				if (bms_is_member(attnum, info->keys))
+					nshared_vars++;
+
+				continue;
+			}
+
+			/* expression - see if it's in the statistics */
+			foreach(lc3, info->exprs)
+			{
+				Node	   *expr = (Node *) lfirst(lc3);
+
+				if (equal(varinfo->var, expr))
+				{
+					nshared_exprs++;
+					break;
+				}
+			}
+		}
+
+		if (nshared_vars + nshared_exprs < 2)
+			continue;
 
 		/*
 		 * Does this statistics object match more columns than the currently
@@ -3932,18 +3967,21 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 		 * XXX This should break ties using name of the object, or something
 		 * like that, to make the outcome stable.
 		 */
-		if (nshared > nmatches)
+		if ((nshared_exprs > nmatches_exprs) ||
+			(((nshared_exprs == nmatches_exprs)) && (nshared_vars > nmatches_vars)))
 		{
 			statOid = info->statOid;
-			nmatches = nshared;
-			matched = shared;
+			nmatches_vars = nshared_vars;
+			nmatches_exprs = nshared_exprs;
+			matched_info = info;
 		}
 	}
 
 	/* No match? */
 	if (statOid == InvalidOid)
 		return false;
-	Assert(nmatches > 1 && matched != NULL);
+
+	Assert(nmatches_vars + nmatches_exprs > 1);
 
 	stats = statext_ndistinct_load(statOid);
 
@@ -3956,20 +3994,135 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 		int			i;
 		List	   *newlist = NIL;
 		MVNDistinctItem *item = NULL;
+		ListCell   *lc2;
+		Bitmapset  *matched = NULL;
+		AttrNumber	attnum_offset;
+
+		/*
+		 * How much we need to offset the attnums? If there are no
+		 * expressions, no offset is needed. Otherwise offset enough to move
+		 * the lowest one (which is equal to number of expressions) to 1.
+		 */
+		if (matched_info->exprs)
+			attnum_offset = (list_length(matched_info->exprs) + 1);
+		else
+			attnum_offset = 0;
+
+		/* see what actually matched */
+		foreach(lc2, *varinfos)
+		{
+			ListCell   *lc3;
+			int			idx;
+			bool		found = false;
+
+			GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
+
+			/*
+			 * Process a simple Var expression, by matching it to keys
+			 * directly. If there's a matchine expression, we'll try
+			 * matching it later.
+			 */
+			if (IsA(varinfo->var, Var))
+			{
+				AttrNumber	attnum = ((Var *) varinfo->var)->varattno;
+
+				/*
+				 * Ignore expressions on system attributes. Can't rely on
+				 * the bms check for negative values.
+				 */
+				if (!AttrNumberIsForUserDefinedAttr(attnum))
+					continue;
+
+				/* Is the variable covered by the statistics? */
+				if (!bms_is_member(attnum, matched_info->keys))
+					continue;
+
+				attnum = attnum + attnum_offset;
+
+				/* ensure sufficient offset */
+				Assert(AttrNumberIsForUserDefinedAttr(attnum));
+
+				matched = bms_add_member(matched, attnum);
+
+				found = true;
+			}
+
+			/*
+			 * XXX Maybe we should allow searching the expressions even if we
+			 * found an attribute matching the expression? That would handle
+			 * trivial expressions like "(a)" but it seems fairly useless.
+			 */
+			if (found)
+				continue;
+
+			/* expression - see if it's in the statistics */
+			idx = 0;
+			foreach(lc3, matched_info->exprs)
+			{
+				Node	   *expr = (Node *) lfirst(lc3);
+
+				if (equal(varinfo->var, expr))
+				{
+					AttrNumber	attnum = -(idx + 1);
+
+					attnum = attnum + attnum_offset;
+
+					/* ensure sufficient offset */
+					Assert(AttrNumberIsForUserDefinedAttr(attnum));
+
+					matched = bms_add_member(matched, attnum);
+
+					/* there should be just one matching expression */
+					break;
+				}
+
+				idx++;
+			}
+		}
 
 		/* Find the specific item that exactly matches the combination */
 		for (i = 0; i < stats->nitems; i++)
 		{
+			int			j;
 			MVNDistinctItem *tmpitem = &stats->items[i];
 
-			if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
+			if (tmpitem->nattributes != bms_num_members(matched))
+				continue;
+
+			/* assume it's the right item */
+			item = tmpitem;
+
+			/* check that all item attributes/expressions fit the match */
+			for (j = 0; j < tmpitem->nattributes; j++)
 			{
-				item = tmpitem;
-				break;
+				AttrNumber	attnum = tmpitem->attributes[j];
+
+				/*
+				 * Thanks to how we constructed the matched bitmap above, we
+				 * can just offset all attnums the same way.
+				 */
+				attnum = attnum + attnum_offset;
+
+				if (!bms_is_member(attnum, matched))
+				{
+					/* nah, it's not this item */
+					item = NULL;
+					break;
+				}
 			}
+
+			/*
+			 * If the item has all the matched attributes, we know it's the
+			 * right one - there can't be a better one. matching more.
+			 */
+			if (item)
+				break;
 		}
 
-		/* make sure we found an item */
+		/*
+		 * Make sure we found an item. There has to be one, because ndistinct
+		 * statistics includes all combinations of attributes.
+		 */
 		if (!item)
 			elog(ERROR, "corrupt MVNDistinct entry");
 
@@ -3977,18 +4130,63 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 		foreach(lc, *varinfos)
 		{
 			GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
-			AttrNumber	attnum;
+			ListCell   *lc3;
+			bool		found = false;
 
-			if (!IsA(varinfo->var, Var))
+			/*
+			 * Let's look at plain variables first, because it's the most
+			 * common case and the check is quite cheap. We can simply get the
+			 * attnum and check (with an offset) matched bitmap.
+			 */
+			if (IsA(varinfo->var, Var))
 			{
-				newlist = lappend(newlist, varinfo);
+				AttrNumber	attnum = ((Var *) varinfo->var)->varattno;
+
+				/*
+				 * If it's a system attribute, we're done. We don't support
+				 * extended statistics on system attributes, so it's clearly
+				 * not matched. Just keep the expression and continue.
+				 */
+				if (!AttrNumberIsForUserDefinedAttr(attnum))
+				{
+					newlist = lappend(newlist, varinfo);
+					continue;
+				}
+
+				/* apply the same offset as above */
+				attnum += attnum_offset;
+
+				/* if it's not matched, keep the varinfo */
+				if (!bms_is_member(attnum, matched))
+					newlist = lappend(newlist, varinfo);
+
+				/* The rest of the loop deals with complex expressions. */
 				continue;
 			}
 
-			attnum = ((Var *) varinfo->var)->varattno;
+			/*
+			 * Process complex expressions, not just simple Vars.
+			 *
+			 * First, we search for an exact match of an expression. If we
+			 * find one, we can just discard the whole GroupExprInfo, with all
+			 * the variables we extracted from it.
+			 *
+			 * Otherwise we inspect the individual vars, and try matching it
+			 * to variables in the item.
+			 */
+			foreach(lc3, matched_info->exprs)
+			{
+				Node	   *expr = (Node *) lfirst(lc3);
 
-			if (AttrNumberIsForUserDefinedAttr(attnum) &&
-				bms_is_member(attnum, matched))
+				if (equal(varinfo->var, expr))
+				{
+					found = true;
+					break;
+				}
+			}
+
+			/* found exact match, skip */
+			if (found)
 				continue;
 
 			newlist = lappend(newlist, varinfo);
@@ -4690,6 +4888,13 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
 		*join_is_reversed = false;
 }
 
+/* statext_expressions_load copies the tuple, so just pfree it. */
+static void
+ReleaseDummy(HeapTuple tuple)
+{
+	pfree(tuple);
+}
+
 /*
  * examine_variable
  *		Try to look up statistical data about an expression.
@@ -4830,6 +5035,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 		 * operator we are estimating for.  FIXME later.
 		 */
 		ListCell   *ilist;
+		ListCell   *slist;
 
 		foreach(ilist, onerel->indexlist)
 		{
@@ -4986,6 +5192,129 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 			if (vardata->statsTuple)
 				break;
 		}
+
+		/*
+		 * Search extended statistics for one with a matching expression.
+		 * There might be multiple ones, so just grab the first one. In the
+		 * future, we might consider the statistics target (and pick the most
+		 * accurate statistics) and maybe some other parameters.
+		 */
+		foreach(slist, onerel->statlist)
+		{
+			StatisticExtInfo *info = (StatisticExtInfo *) lfirst(slist);
+			ListCell   *expr_item;
+			int			pos;
+
+			/*
+			 * Stop once we've found statistics for the expression (either
+			 * from extended stats, or for an index in the preceding loop).
+			 */
+			if (vardata->statsTuple)
+				break;
+
+			/* skip stats without per-expression stats */
+			if (info->kind != STATS_EXT_EXPRESSIONS)
+				continue;
+
+			pos = 0;
+			foreach(expr_item, info->exprs)
+			{
+				Node	   *expr = (Node *) lfirst(expr_item);
+
+				Assert(expr);
+
+				/* strip RelabelType before comparing it */
+				if (expr && IsA(expr, RelabelType))
+					expr = (Node *) ((RelabelType *) expr)->arg;
+
+				/* found a match, see if we can extract pg_statistic row */
+				if (equal(node, expr))
+				{
+					HeapTuple	t = statext_expressions_load(info->statOid, pos);
+
+					/* Get index's table for permission check */
+					RangeTblEntry *rte;
+					Oid			userid;
+
+					vardata->statsTuple = t;
+
+					/*
+					 * XXX Not sure if we should cache the tuple somewhere.
+					 * Now we just create a new copy every time.
+					 */
+					vardata->freefunc = ReleaseDummy;
+
+					rte = planner_rt_fetch(onerel->relid, root);
+					Assert(rte->rtekind == RTE_RELATION);
+
+					/*
+					 * Use checkAsUser if it's set, in case we're accessing
+					 * the table via a view.
+					 */
+					userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+
+					/*
+					 * For simplicity, we insist on the whole table being
+					 * selectable, rather than trying to identify which
+					 * column(s) the statistics depends on.  Also require all
+					 * rows to be selectable --- there must be no
+					 * securityQuals from security barrier views or RLS
+					 * policies.
+					 */
+					vardata->acl_ok =
+						rte->securityQuals == NIL &&
+						(pg_class_aclcheck(rte->relid, userid,
+										   ACL_SELECT) == ACLCHECK_OK);
+
+					/*
+					 * If the user doesn't have permissions to access an
+					 * inheritance child relation, check the permissions of
+					 * the table actually mentioned in the query, since most
+					 * likely the user does have that permission.  Note that
+					 * whole-table select privilege on the parent doesn't
+					 * quite guarantee that the user could read all columns of
+					 * the child. But in practice it's unlikely that any
+					 * interesting security violation could result from
+					 * allowing access to the expression stats, so we allow it
+					 * anyway.  See similar code in examine_simple_variable()
+					 * for additional comments.
+					 */
+					if (!vardata->acl_ok &&
+						root->append_rel_array != NULL)
+					{
+						AppendRelInfo *appinfo;
+						Index		varno = onerel->relid;
+
+						appinfo = root->append_rel_array[varno];
+						while (appinfo &&
+							   planner_rt_fetch(appinfo->parent_relid,
+												root)->rtekind == RTE_RELATION)
+						{
+							varno = appinfo->parent_relid;
+							appinfo = root->append_rel_array[varno];
+						}
+						if (varno != onerel->relid)
+						{
+							/* Repeat access check on this rel */
+							rte = planner_rt_fetch(varno, root);
+							Assert(rte->rtekind == RTE_RELATION);
+
+							userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+
+							vardata->acl_ok =
+								rte->securityQuals == NIL &&
+								(pg_class_aclcheck(rte->relid,
+												   userid,
+												   ACL_SELECT) == ACLCHECK_OK);
+						}
+					}
+
+					break;
+				}
+
+				pos++;
+			}
+		}
 	}
 }
 
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index 737e46464a..86113df29c 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -2637,6 +2637,18 @@ my %tests = (
 		unlike => { exclude_dump_test_schema => 1, },
 	},
 
+	'CREATE STATISTICS extended_stats_expression' => {
+		create_order => 99,
+		create_sql   => 'CREATE STATISTICS dump_test.test_ext_stats_expr
+							ON (2 * col1) FROM dump_test.test_fifth_table',
+		regexp => qr/^
+			\QCREATE STATISTICS dump_test.test_ext_stats_expr ON ((2 * col1)) FROM dump_test.test_fifth_table;\E
+		    /xms,
+		like =>
+		  { %full_runs, %dump_test_schema_runs, section_post_data => 1, },
+		unlike => { exclude_dump_test_schema => 1, },
+	},
+
 	'CREATE SEQUENCE test_table_col1_seq' => {
 		regexp => qr/^
 			\QCREATE SEQUENCE dump_test.test_table_col1_seq\E
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index e56cc43e11..440249ff69 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -2712,7 +2712,104 @@ describeOneTableDetails(const char *schemaname,
 		}
 
 		/* print any extended statistics */
-		if (pset.sversion >= 100000)
+		if (pset.sversion >= 140000)
+		{
+			printfPQExpBuffer(&buf,
+							  "SELECT oid, "
+							  "stxrelid::pg_catalog.regclass, "
+							  "stxnamespace::pg_catalog.regnamespace AS nsp, "
+							  "stxname,\n"
+							  "pg_get_statisticsobjdef_columns(oid) AS columns,\n"
+							  "  'd' = any(stxkind) AS ndist_enabled,\n"
+							  "  'f' = any(stxkind) AS deps_enabled,\n"
+							  "  'm' = any(stxkind) AS mcv_enabled,\n"
+							  "stxstattarget\n"
+							  "FROM pg_catalog.pg_statistic_ext stat\n"
+							  "WHERE stxrelid = '%s'\n"
+							  "ORDER BY 1;",
+							  oid);
+
+			result = PSQLexec(buf.data);
+			if (!result)
+				goto error_return;
+			else
+				tuples = PQntuples(result);
+
+			if (tuples > 0)
+			{
+				printTableAddFooter(&cont, _("Statistics objects:"));
+
+				for (i = 0; i < tuples; i++)
+				{
+					bool		gotone = false;
+					bool		has_ndistinct;
+					bool		has_dependencies;
+					bool		has_mcv;
+					bool		has_all;
+					bool		has_some;
+
+					has_ndistinct = (strcmp(PQgetvalue(result, i, 5), "t") == 0);
+					has_dependencies = (strcmp(PQgetvalue(result, i, 6), "t") == 0);
+					has_mcv = (strcmp(PQgetvalue(result, i, 7), "t") == 0);
+
+					printfPQExpBuffer(&buf, "    ");
+
+					/* statistics object name (qualified with namespace) */
+					appendPQExpBuffer(&buf, "\"%s\".\"%s\"",
+									  PQgetvalue(result, i, 2),
+									  PQgetvalue(result, i, 3));
+
+					/*
+					 * When printing kinds we ignore expression statistics,
+					 * which is used only internally and can't be specified by
+					 * user. We don't print the kinds when either none are
+					 * specified (in which case it has to be statistics on a
+					 * single expr) or when all are specified (in which case
+					 * we assume it's expanded by CREATE STATISTICS).
+					 */
+					has_all = (has_ndistinct && has_dependencies && has_mcv);
+					has_some = (has_ndistinct || has_dependencies || has_mcv);
+
+					if (has_some && !has_all)
+					{
+						appendPQExpBuffer(&buf, " (");
+
+						/* options */
+						if (has_ndistinct)
+						{
+							appendPQExpBufferStr(&buf, "ndistinct");
+							gotone = true;
+						}
+
+						if (has_dependencies)
+						{
+							appendPQExpBuffer(&buf, "%sdependencies", gotone ? ", " : "");
+							gotone = true;
+						}
+
+						if (has_mcv)
+						{
+							appendPQExpBuffer(&buf, "%smcv", gotone ? ", " : "");
+						}
+
+						appendPQExpBuffer(&buf, ")");
+					}
+
+					appendPQExpBuffer(&buf, " ON %s FROM %s",
+									  PQgetvalue(result, i, 4),
+									  PQgetvalue(result, i, 1));
+
+					/* Show the stats target if it's not default */
+					if (strcmp(PQgetvalue(result, i, 8), "-1") != 0)
+						appendPQExpBuffer(&buf, "; STATISTICS %s",
+										  PQgetvalue(result, i, 8));
+
+					printTableAddFooter(&cont, buf.data);
+				}
+			}
+			PQclear(result);
+		}
+		else if (pset.sversion >= 100000)
 		{
 			printfPQExpBuffer(&buf,
 							  "SELECT oid, "
@@ -4468,18 +4565,27 @@ listExtendedStats(const char *pattern)
 	printfPQExpBuffer(&buf,
 					  "SELECT \n"
 					  "es.stxnamespace::pg_catalog.regnamespace::text AS \"%s\", \n"
-					  "es.stxname AS \"%s\", \n"
-					  "pg_catalog.format('%%s FROM %%s', \n"
-					  "  (SELECT pg_catalog.string_agg(pg_catalog.quote_ident(a.attname),', ') \n"
-					  "   FROM pg_catalog.unnest(es.stxkeys) s(attnum) \n"
-					  "   JOIN pg_catalog.pg_attribute a \n"
-					  "   ON (es.stxrelid = a.attrelid \n"
-					  "   AND a.attnum = s.attnum \n"
-					  "   AND NOT a.attisdropped)), \n"
-					  "es.stxrelid::regclass) AS \"%s\"",
+					  "es.stxname AS \"%s\", \n",
 					  gettext_noop("Schema"),
-					  gettext_noop("Name"),
-					  gettext_noop("Definition"));
+					  gettext_noop("Name"));
+
+	if (pset.sversion >= 140000)
+		appendPQExpBuffer(&buf,
+						  "pg_catalog.format('%%s FROM %%s', \n"
+						  "  pg_get_statisticsobjdef_columns(es.oid), \n"
+						  "  es.stxrelid::regclass) AS \"%s\"",
+						  gettext_noop("Definition"));
+	else
+		appendPQExpBuffer(&buf,
+						  "pg_catalog.format('%%s FROM %%s', \n"
+						  "  (SELECT pg_catalog.string_agg(pg_catalog.quote_ident(a.attname),', ') \n"
+						  "   FROM pg_catalog.unnest(es.stxkeys) s(attnum) \n"
+						  "   JOIN pg_catalog.pg_attribute a \n"
+						  "   ON (es.stxrelid = a.attrelid \n"
+						  "   AND a.attnum = s.attnum \n"
+						  "   AND NOT a.attisdropped)), \n"
+						  "es.stxrelid::regclass) AS \"%s\"",
+						  gettext_noop("Definition"));
 
 	appendPQExpBuffer(&buf,
 					  ",\nCASE WHEN 'd' = any(es.stxkind) THEN 'defined' \n"
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 474ee2982b..4a39da3c9d 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202103265
+#define CATALOG_VERSION_NO	202103266
 
 #endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index ecf12f4639..cc7d90d2b0 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3658,6 +3658,14 @@
   proname => 'pg_get_statisticsobjdef', provolatile => 's',
   prorettype => 'text', proargtypes => 'oid',
   prosrc => 'pg_get_statisticsobjdef' },
+{ oid => '8887', descr => 'extended statistics columns',
+  proname => 'pg_get_statisticsobjdef_columns', provolatile => 's',
+  prorettype => 'text', proargtypes => 'oid',
+  prosrc => 'pg_get_statisticsobjdef_columns' },
+{ oid => '8886', descr => 'extended statistics expressions',
+  proname => 'pg_get_statisticsobjdef_expressions', provolatile => 's',
+  prorettype => '_text', proargtypes => 'oid',
+  prosrc => 'pg_get_statisticsobjdef_expressions' },
 { oid => '3352', descr => 'partition key description',
   proname => 'pg_get_partkeydef', provolatile => 's', prorettype => 'text',
   proargtypes => 'oid', prosrc => 'pg_get_partkeydef' },
diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h
index 29649f5814..36912ce528 100644
--- a/src/include/catalog/pg_statistic_ext.h
+++ b/src/include/catalog/pg_statistic_ext.h
@@ -54,6 +54,9 @@ CATALOG(pg_statistic_ext,3381,StatisticExtRelationId)
 #ifdef CATALOG_VARLEN
 	char		stxkind[1] BKI_FORCE_NOT_NULL;	/* statistics kinds requested
 												 * to build */
+	pg_node_tree stxexprs;		/* A list of expression trees for stats
+								 * attributes that are not simple column
+								 * references. */
 #endif
 
 } FormData_pg_statistic_ext;
@@ -81,6 +84,7 @@ DECLARE_ARRAY_FOREIGN_KEY((stxrelid, stxkeys), pg_attribute, (attrelid, attnum))
 #define STATS_EXT_NDISTINCT			'd'
 #define STATS_EXT_DEPENDENCIES		'f'
 #define STATS_EXT_MCV				'm'
+#define STATS_EXT_EXPRESSIONS		'e'
 
 #endif							/* EXPOSE_TO_CLIENT_CODE */
 
diff --git a/src/include/catalog/pg_statistic_ext_data.h b/src/include/catalog/pg_statistic_ext_data.h
index 2f2577c218..5729154383 100644
--- a/src/include/catalog/pg_statistic_ext_data.h
+++ b/src/include/catalog/pg_statistic_ext_data.h
@@ -38,6 +38,7 @@ CATALOG(pg_statistic_ext_data,3429,StatisticExtDataRelationId)
 	pg_ndistinct stxdndistinct; /* ndistinct coefficients (serialized) */
 	pg_dependencies stxddependencies;	/* dependencies (serialized) */
 	pg_mcv_list stxdmcv;		/* MCV (serialized) */
+	pg_statistic stxdexpr[1];	/* stats for expressions */
 
 #endif
 
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index 1a79540c94..339f29f4c8 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -81,9 +81,7 @@ extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt);
 extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt);
 extern ObjectAddress AlterStatistics(AlterStatsStmt *stmt);
 extern void RemoveStatisticsById(Oid statsOid);
-extern void UpdateStatisticsForTypeChange(Oid statsOid,
-										  Oid relationOid, int attnum,
-										  Oid oldColumnType, Oid newColumnType);
+extern Oid StatisticsGetRelation(Oid statId, bool missing_ok);
 
 /* commands/aggregatecmds.c */
 extern ObjectAddress DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle,
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index e22df890ef..299956f329 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -454,6 +454,7 @@ typedef enum NodeTag
 	T_TypeName,
 	T_ColumnDef,
 	T_IndexElem,
+	T_StatsElem,
 	T_Constraint,
 	T_DefElem,
 	T_RangeTblEntry,
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 0ce19d98ec..12e0e026dc 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1914,7 +1914,8 @@ typedef enum AlterTableType
 	AT_AddIdentity,				/* ADD IDENTITY */
 	AT_SetIdentity,				/* SET identity column options */
 	AT_DropIdentity,			/* DROP IDENTITY */
-	AT_AlterCollationRefreshVersion /* ALTER COLLATION ... REFRESH VERSION */
+	AT_AlterCollationRefreshVersion, /* ALTER COLLATION ... REFRESH VERSION */
+	AT_ReAddStatistics			/* internal to commands/tablecmds.c */
 } AlterTableType;
 
 typedef struct ReplicaIdentityStmt
@@ -2872,8 +2873,24 @@ typedef struct CreateStatsStmt
 	List	   *relations;		/* rels to build stats on (list of RangeVar) */
 	char	   *stxcomment;		/* comment to apply to stats, or NULL */
 	bool		if_not_exists;	/* do nothing if stats name already exists */
+	bool		transformed;	/* true when transformStatsStmt is finished */
 } CreateStatsStmt;
 
+/*
+ * StatsElem - statistics parameters (used in CREATE STATISTICS)
+ *
+ * For a plain attribute, 'name' is the name of the referenced table column
+ * and 'expr' is NULL.  For an expression, 'name' is NULL and 'expr' is the
+ * expression tree.
+ */
+typedef struct StatsElem
+{
+	NodeTag		type;
+	char	   *name;			/* name of attribute to index, or NULL */
+	Node	   *expr;			/* expression to index, or NULL */
+} StatsElem;
+
+
 /* ----------------------
  *		Alter Statistics Statement
  * ----------------------
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index c13642e35e..e4b554f811 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -923,6 +923,7 @@ typedef struct StatisticExtInfo
 	RelOptInfo *rel;			/* back-link to statistic's table */
 	char		kind;			/* statistics kind of this entry */
 	Bitmapset  *keys;			/* attnums of the columns covered */
+	List	   *exprs;			/* expressions */
 } StatisticExtInfo;
 
 /*
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index 176b9f37c1..a71d7e1f74 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -69,6 +69,7 @@ typedef enum ParseExprKind
 	EXPR_KIND_FUNCTION_DEFAULT, /* default parameter value for function */
 	EXPR_KIND_INDEX_EXPRESSION, /* index expression */
 	EXPR_KIND_INDEX_PREDICATE,	/* index predicate */
+	EXPR_KIND_STATS_EXPRESSION, /* extended statistics expression */
 	EXPR_KIND_ALTER_COL_TRANSFORM,	/* transform expr in ALTER COLUMN TYPE */
 	EXPR_KIND_EXECUTE_PARAMETER,	/* parameter value in EXECUTE */
 	EXPR_KIND_TRIGGER_WHEN,		/* WHEN condition in CREATE TRIGGER */
diff --git a/src/include/parser/parse_utilcmd.h b/src/include/parser/parse_utilcmd.h
index bfa4a6b0f2..1056bf081b 100644
--- a/src/include/parser/parse_utilcmd.h
+++ b/src/include/parser/parse_utilcmd.h
@@ -26,6 +26,8 @@ extern AlterTableStmt *transformAlterTableStmt(Oid relid, AlterTableStmt *stmt,
 											   List **afterStmts);
 extern IndexStmt *transformIndexStmt(Oid relid, IndexStmt *stmt,
 									 const char *queryString);
+extern CreateStatsStmt *transformStatsStmt(Oid relid, CreateStatsStmt *stmt,
+										   const char *queryString);
 extern void transformRuleStmt(RuleStmt *stmt, const char *queryString,
 							  List **actions, Node **whereClause);
 extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt);
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index a0a3cf5b0f..55cd9252a5 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -57,19 +57,27 @@ typedef struct SortItem
 	int			count;
 } SortItem;
 
-extern MVNDistinct *statext_ndistinct_build(double totalrows,
-											int numrows, HeapTuple *rows,
-											Bitmapset *attrs, VacAttrStats **stats);
+/* a unified representation of the data the statistics is built on */
+typedef struct StatsBuildData
+{
+	int			numrows;
+	int			nattnums;
+	AttrNumber *attnums;
+	VacAttrStats **stats;
+	Datum	  **values;
+	bool	  **nulls;
+} StatsBuildData;
+
+
+extern MVNDistinct *statext_ndistinct_build(double totalrows, StatsBuildData *data);
 extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
 extern MVNDistinct *statext_ndistinct_deserialize(bytea *data);
 
-extern MVDependencies *statext_dependencies_build(int numrows, HeapTuple *rows,
-												  Bitmapset *attrs, VacAttrStats **stats);
+extern MVDependencies *statext_dependencies_build(StatsBuildData *data);
 extern bytea *statext_dependencies_serialize(MVDependencies *dependencies);
 extern MVDependencies *statext_dependencies_deserialize(bytea *data);
 
-extern MCVList *statext_mcv_build(int numrows, HeapTuple *rows,
-								  Bitmapset *attrs, VacAttrStats **stats,
+extern MCVList *statext_mcv_build(StatsBuildData *data,
 								  double totalrows, int stattarget);
 extern bytea *statext_mcv_serialize(MCVList *mcv, VacAttrStats **stats);
 extern MCVList *statext_mcv_deserialize(bytea *data);
@@ -85,14 +93,14 @@ extern int	multi_sort_compare_dims(int start, int end, const SortItem *a,
 extern int	compare_scalars_simple(const void *a, const void *b, void *arg);
 extern int	compare_datums_simple(Datum a, Datum b, SortSupport ssup);
 
-extern AttrNumber *build_attnums_array(Bitmapset *attrs, int *numattrs);
+extern AttrNumber *build_attnums_array(Bitmapset *attrs, int nexprs, int *numattrs);
 
-extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
-									TupleDesc tdesc, MultiSortSupport mss,
+extern SortItem *build_sorted_items(StatsBuildData *data, int *nitems,
+									MultiSortSupport mss,
 									int numattrs, AttrNumber *attnums);
 
-extern bool examine_clause_args(List *args, Var **varp,
-								Const **cstp, bool *varonleftp);
+extern bool examine_opclause_args(List *args, Node **exprp,
+								  Const **cstp, bool *expronleftp);
 
 extern Selectivity mcv_combine_selectivities(Selectivity simple_sel,
 											 Selectivity mcv_sel,
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index fec50688ea..326cf26fea 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -26,7 +26,8 @@
 typedef struct MVNDistinctItem
 {
 	double		ndistinct;		/* ndistinct value for this combination */
-	Bitmapset  *attrs;			/* attr numbers of items */
+	int			nattributes;	/* number of attributes */
+	AttrNumber *attributes;		/* attribute numbers */
 } MVNDistinctItem;
 
 /* A MVNDistinct object, comprising all possible combinations of columns */
@@ -121,6 +122,8 @@ extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
 extern bool has_stats_of_kind(List *stats, char requiredkind);
 extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind,
 												Bitmapset **clause_attnums,
+												List **clause_exprs,
 												int nclauses);
+extern HeapTuple statext_expressions_load(Oid stxoid, int idx);
 
 #endif							/* STATISTICS_H */
diff --git a/src/include/utils/ruleutils.h b/src/include/utils/ruleutils.h
index ac3d0a6742..d333e5e8a5 100644
--- a/src/include/utils/ruleutils.h
+++ b/src/include/utils/ruleutils.h
@@ -41,4 +41,6 @@ extern char *generate_collation_name(Oid collid);
 extern char *generate_opclass_name(Oid opclass);
 extern char *get_range_partbound_string(List *bound_datums);
 
+extern char *pg_get_statisticsobjdef_string(Oid statextid);
+
 #endif							/* RULEUTILS_H */
diff --git a/src/test/regress/expected/create_table_like.out b/src/test/regress/expected/create_table_like.out
index 10d17be23c..4dc5e6aa5f 100644
--- a/src/test/regress/expected/create_table_like.out
+++ b/src/test/regress/expected/create_table_like.out
@@ -304,7 +304,9 @@ CREATE TABLE ctlt1 (a text CHECK (length(a) > 2) PRIMARY KEY, b text);
 CREATE INDEX ctlt1_b_key ON ctlt1 (b);
 CREATE INDEX ctlt1_fnidx ON ctlt1 ((a || b));
 CREATE STATISTICS ctlt1_a_b_stat ON a,b FROM ctlt1;
+CREATE STATISTICS ctlt1_expr_stat ON (a || b) FROM ctlt1;
 COMMENT ON STATISTICS ctlt1_a_b_stat IS 'ab stats';
+COMMENT ON STATISTICS ctlt1_expr_stat IS 'ab expr stats';
 COMMENT ON COLUMN ctlt1.a IS 'A';
 COMMENT ON COLUMN ctlt1.b IS 'B';
 COMMENT ON CONSTRAINT ctlt1_a_check ON ctlt1 IS 't1_a_check';
@@ -414,7 +416,8 @@ Indexes:
 Check constraints:
     "ctlt1_a_check" CHECK (length(a) > 2)
 Statistics objects:
-    "public"."ctlt_all_a_b_stat" (ndistinct, dependencies, mcv) ON a, b FROM ctlt_all
+    "public"."ctlt_all_a_b_stat" ON a, b FROM ctlt_all
+    "public"."ctlt_all_expr_stat" ON ((a || b)) FROM ctlt_all
 
 SELECT c.relname, objsubid, description FROM pg_description, pg_index i, pg_class c WHERE classoid = 'pg_class'::regclass AND objoid = i.indexrelid AND c.oid = i.indexrelid AND i.indrelid = 'ctlt_all'::regclass ORDER BY c.relname, objsubid;
     relname     | objsubid | description 
@@ -424,10 +427,11 @@ SELECT c.relname, objsubid, description FROM pg_description, pg_index i, pg_clas
 (2 rows)
 
 SELECT s.stxname, objsubid, description FROM pg_description, pg_statistic_ext s WHERE classoid = 'pg_statistic_ext'::regclass AND objoid = s.oid AND s.stxrelid = 'ctlt_all'::regclass ORDER BY s.stxname, objsubid;
-      stxname      | objsubid | description 
--------------------+----------+-------------
- ctlt_all_a_b_stat |        0 | ab stats
-(1 row)
+      stxname       | objsubid |  description  
+--------------------+----------+---------------
+ ctlt_all_a_b_stat  |        0 | ab stats
+ ctlt_all_expr_stat |        0 | ab expr stats
+(2 rows)
 
 CREATE TABLE inh_error1 () INHERITS (ctlt1, ctlt4);
 NOTICE:  merging multiple inherited definitions of column "a"
@@ -452,7 +456,8 @@ Indexes:
 Check constraints:
     "ctlt1_a_check" CHECK (length(a) > 2)
 Statistics objects:
-    "public"."pg_attrdef_a_b_stat" (ndistinct, dependencies, mcv) ON a, b FROM public.pg_attrdef
+    "public"."pg_attrdef_a_b_stat" ON a, b FROM public.pg_attrdef
+    "public"."pg_attrdef_expr_stat" ON ((a || b)) FROM public.pg_attrdef
 
 DROP TABLE public.pg_attrdef;
 -- Check that LIKE isn't confused when new table masks the old, either
@@ -473,7 +478,8 @@ Indexes:
 Check constraints:
     "ctlt1_a_check" CHECK (length(a) > 2)
 Statistics objects:
-    "ctl_schema"."ctlt1_a_b_stat" (ndistinct, dependencies, mcv) ON a, b FROM ctlt1
+    "ctl_schema"."ctlt1_a_b_stat" ON a, b FROM ctlt1
+    "ctl_schema"."ctlt1_expr_stat" ON ((a || b)) FROM ctlt1
 
 ROLLBACK;
 DROP TABLE ctlt1, ctlt2, ctlt3, ctlt4, ctlt12_storage, ctlt12_comments, ctlt1_inh, ctlt13_inh, ctlt13_like, ctlt_all, ctla, ctlb CASCADE;
diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out
index 50d046d3ef..1461e947cd 100644
--- a/src/test/regress/expected/oidjoins.out
+++ b/src/test/regress/expected/oidjoins.out
@@ -151,11 +151,6 @@ NOTICE:  checking pg_aggregate {aggmfinalfn} => pg_proc {oid}
 NOTICE:  checking pg_aggregate {aggsortop} => pg_operator {oid}
 NOTICE:  checking pg_aggregate {aggtranstype} => pg_type {oid}
 NOTICE:  checking pg_aggregate {aggmtranstype} => pg_type {oid}
-NOTICE:  checking pg_statistic_ext {stxrelid} => pg_class {oid}
-NOTICE:  checking pg_statistic_ext {stxnamespace} => pg_namespace {oid}
-NOTICE:  checking pg_statistic_ext {stxowner} => pg_authid {oid}
-NOTICE:  checking pg_statistic_ext {stxrelid,stxkeys} => pg_attribute {attrelid,attnum}
-NOTICE:  checking pg_statistic_ext_data {stxoid} => pg_statistic_ext {oid}
 NOTICE:  checking pg_statistic {starelid} => pg_class {oid}
 NOTICE:  checking pg_statistic {staop1} => pg_operator {oid}
 NOTICE:  checking pg_statistic {staop2} => pg_operator {oid}
@@ -168,6 +163,11 @@ NOTICE:  checking pg_statistic {stacoll3} => pg_collation {oid}
 NOTICE:  checking pg_statistic {stacoll4} => pg_collation {oid}
 NOTICE:  checking pg_statistic {stacoll5} => pg_collation {oid}
 NOTICE:  checking pg_statistic {starelid,staattnum} => pg_attribute {attrelid,attnum}
+NOTICE:  checking pg_statistic_ext {stxrelid} => pg_class {oid}
+NOTICE:  checking pg_statistic_ext {stxnamespace} => pg_namespace {oid}
+NOTICE:  checking pg_statistic_ext {stxowner} => pg_authid {oid}
+NOTICE:  checking pg_statistic_ext {stxrelid,stxkeys} => pg_attribute {attrelid,attnum}
+NOTICE:  checking pg_statistic_ext_data {stxoid} => pg_statistic_ext {oid}
 NOTICE:  checking pg_rewrite {ev_class} => pg_class {oid}
 NOTICE:  checking pg_trigger {tgrelid} => pg_class {oid}
 NOTICE:  checking pg_trigger {tgparentid} => pg_trigger {oid}
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 9b12cc122a..9b59a7b4a5 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2418,6 +2418,7 @@ pg_stats_ext| SELECT cn.nspname AS schemaname,
     ( SELECT array_agg(a.attname ORDER BY a.attnum) AS array_agg
            FROM (unnest(s.stxkeys) k(k)
              JOIN pg_attribute a ON (((a.attrelid = s.stxrelid) AND (a.attnum = k.k))))) AS attnames,
+    pg_get_statisticsobjdef_expressions(s.oid) AS exprs,
     s.stxkind AS kinds,
     sd.stxdndistinct AS n_distinct,
     sd.stxddependencies AS dependencies,
@@ -2439,6 +2440,78 @@ pg_stats_ext| SELECT cn.nspname AS schemaname,
            FROM (unnest(s.stxkeys) k(k)
              JOIN pg_attribute a ON (((a.attrelid = s.stxrelid) AND (a.attnum = k.k))))
           WHERE (NOT has_column_privilege(c.oid, a.attnum, 'select'::text))))) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid))));
+pg_stats_ext_exprs| SELECT cn.nspname AS schemaname,
+    c.relname AS tablename,
+    sn.nspname AS statistics_schemaname,
+    s.stxname AS statistics_name,
+    pg_get_userbyid(s.stxowner) AS statistics_owner,
+    stat.expr,
+    (stat.a).stanullfrac AS null_frac,
+    (stat.a).stawidth AS avg_width,
+    (stat.a).stadistinct AS n_distinct,
+        CASE
+            WHEN ((stat.a).stakind1 = 1) THEN (stat.a).stavalues1
+            WHEN ((stat.a).stakind2 = 1) THEN (stat.a).stavalues2
+            WHEN ((stat.a).stakind3 = 1) THEN (stat.a).stavalues3
+            WHEN ((stat.a).stakind4 = 1) THEN (stat.a).stavalues4
+            WHEN ((stat.a).stakind5 = 1) THEN (stat.a).stavalues5
+            ELSE NULL::anyarray
+        END AS most_common_vals,
+        CASE
+            WHEN ((stat.a).stakind1 = 1) THEN (stat.a).stanumbers1
+            WHEN ((stat.a).stakind2 = 1) THEN (stat.a).stanumbers2
+            WHEN ((stat.a).stakind3 = 1) THEN (stat.a).stanumbers3
+            WHEN ((stat.a).stakind4 = 1) THEN (stat.a).stanumbers4
+            WHEN ((stat.a).stakind5 = 1) THEN (stat.a).stanumbers5
+            ELSE NULL::real[]
+        END AS most_common_freqs,
+        CASE
+            WHEN ((stat.a).stakind1 = 2) THEN (stat.a).stavalues1
+            WHEN ((stat.a).stakind2 = 2) THEN (stat.a).stavalues2
+            WHEN ((stat.a).stakind3 = 2) THEN (stat.a).stavalues3
+            WHEN ((stat.a).stakind4 = 2) THEN (stat.a).stavalues4
+            WHEN ((stat.a).stakind5 = 2) THEN (stat.a).stavalues5
+            ELSE NULL::anyarray
+        END AS histogram_bounds,
+        CASE
+            WHEN ((stat.a).stakind1 = 3) THEN (stat.a).stanumbers1[1]
+            WHEN ((stat.a).stakind2 = 3) THEN (stat.a).stanumbers2[1]
+            WHEN ((stat.a).stakind3 = 3) THEN (stat.a).stanumbers3[1]
+            WHEN ((stat.a).stakind4 = 3) THEN (stat.a).stanumbers4[1]
+            WHEN ((stat.a).stakind5 = 3) THEN (stat.a).stanumbers5[1]
+            ELSE NULL::real
+        END AS correlation,
+        CASE
+            WHEN ((stat.a).stakind1 = 4) THEN (stat.a).stavalues1
+            WHEN ((stat.a).stakind2 = 4) THEN (stat.a).stavalues2
+            WHEN ((stat.a).stakind3 = 4) THEN (stat.a).stavalues3
+            WHEN ((stat.a).stakind4 = 4) THEN (stat.a).stavalues4
+            WHEN ((stat.a).stakind5 = 4) THEN (stat.a).stavalues5
+            ELSE NULL::anyarray
+        END AS most_common_elems,
+        CASE
+            WHEN ((stat.a).stakind1 = 4) THEN (stat.a).stanumbers1
+            WHEN ((stat.a).stakind2 = 4) THEN (stat.a).stanumbers2
+            WHEN ((stat.a).stakind3 = 4) THEN (stat.a).stanumbers3
+            WHEN ((stat.a).stakind4 = 4) THEN (stat.a).stanumbers4
+            WHEN ((stat.a).stakind5 = 4) THEN (stat.a).stanumbers5
+            ELSE NULL::real[]
+        END AS most_common_elem_freqs,
+        CASE
+            WHEN ((stat.a).stakind1 = 5) THEN (stat.a).stanumbers1
+            WHEN ((stat.a).stakind2 = 5) THEN (stat.a).stanumbers2
+            WHEN ((stat.a).stakind3 = 5) THEN (stat.a).stanumbers3
+            WHEN ((stat.a).stakind4 = 5) THEN (stat.a).stanumbers4
+            WHEN ((stat.a).stakind5 = 5) THEN (stat.a).stanumbers5
+            ELSE NULL::real[]
+        END AS elem_count_histogram
+   FROM (((((pg_statistic_ext s
+     JOIN pg_class c ON ((c.oid = s.stxrelid)))
+     LEFT JOIN pg_statistic_ext_data sd ON ((s.oid = sd.stxoid)))
+     LEFT JOIN pg_namespace cn ON ((cn.oid = c.relnamespace)))
+     LEFT JOIN pg_namespace sn ON ((sn.oid = s.stxnamespace)))
+     JOIN LATERAL ( SELECT unnest(pg_get_statisticsobjdef_expressions(s.oid)) AS expr,
+            unnest(sd.stxdexpr) AS a) stat ON ((stat.expr IS NOT NULL)));
 pg_tables| SELECT n.nspname AS schemaname,
     c.relname AS tablename,
     pg_get_userbyid(c.relowner) AS tableowner,
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 07af1e29e3..60e9bfcd78 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -25,7 +25,7 @@ begin
 end;
 $$;
 -- Verify failures
-CREATE TABLE ext_stats_test (x int, y int, z int);
+CREATE TABLE ext_stats_test (x text, y int, z int);
 CREATE STATISTICS tst;
 ERROR:  syntax error at or near ";"
 LINE 1: CREATE STATISTICS tst;
@@ -44,12 +44,25 @@ CREATE STATISTICS tst ON a, b FROM ext_stats_test;
 ERROR:  column "a" does not exist
 CREATE STATISTICS tst ON x, x, y FROM ext_stats_test;
 ERROR:  duplicate column name in statistics definition
-CREATE STATISTICS tst ON x + y FROM ext_stats_test;
-ERROR:  only simple column references are allowed in CREATE STATISTICS
-CREATE STATISTICS tst ON (x, y) FROM ext_stats_test;
-ERROR:  only simple column references are allowed in CREATE STATISTICS
+CREATE STATISTICS tst ON x, x, y, x, x, y, x, x, y FROM ext_stats_test;
+ERROR:  cannot have more than 8 columns in statistics
+CREATE STATISTICS tst ON x, x, y, x, x, (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test;
+ERROR:  cannot have more than 8 columns in statistics
+CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test;
+ERROR:  cannot have more than 8 columns in statistics
+CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), y FROM ext_stats_test;
+ERROR:  duplicate expression in statistics definition
 CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test;
 ERROR:  unrecognized statistics kind "unrecognized"
+-- incorrect expressions
+CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses
+ERROR:  syntax error at or near "+"
+LINE 1: CREATE STATISTICS tst ON y + z FROM ext_stats_test;
+                                   ^
+CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression
+ERROR:  syntax error at or near ","
+LINE 1: CREATE STATISTICS tst ON (x, y) FROM ext_stats_test;
+                                   ^
 DROP TABLE ext_stats_test;
 -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
@@ -79,7 +92,7 @@ ALTER TABLE ab1 DROP COLUMN a;
  b      | integer |           |          | 
  c      | integer |           |          | 
 Statistics objects:
-    "public"."ab1_b_c_stats" (ndistinct, dependencies, mcv) ON b, c FROM ab1
+    "public"."ab1_b_c_stats" ON b, c FROM ab1
 
 -- Ensure statistics are dropped when table is
 SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%';
@@ -111,7 +124,7 @@ ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0;
  a      | integer |           |          | 
  b      | integer |           |          | 
 Statistics objects:
-    "public"."ab1_a_b_stats" (ndistinct, dependencies, mcv) ON a, b FROM ab1; STATISTICS 0
+    "public"."ab1_a_b_stats" ON a, b FROM ab1; STATISTICS 0
 
 ANALYZE ab1;
 SELECT stxname, stxdndistinct, stxddependencies, stxdmcv
@@ -131,7 +144,7 @@ ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1;
  a      | integer |           |          |         | plain   |              | 
  b      | integer |           |          |         | plain   |              | 
 Statistics objects:
-    "public"."ab1_a_b_stats" (ndistinct, dependencies, mcv) ON a, b FROM ab1
+    "public"."ab1_a_b_stats" ON a, b FROM ab1
 
 -- partial analyze doesn't build stats either
 ANALYZE ab1 (a);
@@ -150,6 +163,39 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
 ANALYZE ab1;
 DROP TABLE ab1 CASCADE;
 NOTICE:  drop cascades to table ab1c
+-- basic test for statistics on expressions
+CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
+-- expression stats may be built on a single expression column
+CREATE STATISTICS ab1_exprstat_1 ON (a+b) FROM ab1;
+-- with a single expression, we only enable expression statistics
+CREATE STATISTICS ab1_exprstat_2 ON (a+b) FROM ab1;
+SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_2';
+ stxkind 
+---------
+ {e}
+(1 row)
+
+-- adding anything to the expression builds all statistics kinds
+CREATE STATISTICS ab1_exprstat_3 ON (a+b), a FROM ab1;
+SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_3';
+  stxkind  
+-----------
+ {d,f,m,e}
+(1 row)
+
+-- date_trunc on timestamptz is not immutable, but that should not matter
+CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1;
+-- date_trunc on timestamp is immutable
+CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1;
+-- insert some data and run analyze, to test that these cases build properly
+INSERT INTO ab1
+SELECT
+    generate_series(1,10),
+    generate_series(1,10),
+    generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'),
+    generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day');
+ANALYZE ab1;
+DROP TABLE ab1;
 -- Verify supported object types for extended statistics
 CREATE schema tststats;
 CREATE TABLE tststats.t (a int, b int, c text);
@@ -244,6 +290,30 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c
        200 |     11
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
+ estimated | actual 
+-----------+--------
+       100 |     11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+ estimated | actual 
+-----------+--------
+       100 |     11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+ estimated | actual 
+-----------+--------
+       100 |     11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+ estimated | actual 
+-----------+--------
+       100 |     11
+(1 row)
+
 -- correct command
 CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
 ANALYZE ndistinct;
@@ -282,6 +352,32 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b
         11 |     11
 (1 row)
 
+-- partial improvement (match on attributes)
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
+ estimated | actual 
+-----------+--------
+        11 |     11
+(1 row)
+
+-- expressions - no improvement
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+ estimated | actual 
+-----------+--------
+        11 |     11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+ estimated | actual 
+-----------+--------
+        11 |     11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+ estimated | actual 
+-----------+--------
+        11 |     11
+(1 row)
+
 -- last two plans keep using Group Aggregate, because 'd' is not covered
 -- by the statistic and while it's NULL-only we assume 200 values for it
 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
@@ -343,6 +439,30 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d
        200 |     13
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
+ estimated | actual 
+-----------+--------
+       221 |    221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+ estimated | actual 
+-----------+--------
+       221 |    221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+ estimated | actual 
+-----------+--------
+       221 |    221
+(1 row)
+
 DROP STATISTICS s10;
 SELECT s.stxkind, d.stxdndistinct
   FROM pg_statistic_ext s, pg_statistic_ext_data d
@@ -383,6 +503,413 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d
        200 |     13
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
+ estimated | actual 
+-----------+--------
+       100 |    221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+ estimated | actual 
+-----------+--------
+       100 |    221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+ estimated | actual 
+-----------+--------
+       100 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+ estimated | actual 
+-----------+--------
+       100 |    221
+(1 row)
+
+-- ndistinct estimates with statistics on expressions
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+ estimated | actual 
+-----------+--------
+       100 |    221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+ estimated | actual 
+-----------+--------
+       100 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+ estimated | actual 
+-----------+--------
+       100 |    221
+(1 row)
+
+CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct;
+ANALYZE ndistinct;
+SELECT s.stxkind, d.stxdndistinct
+  FROM pg_statistic_ext s, pg_statistic_ext_data d
+ WHERE s.stxrelid = 'ndistinct'::regclass
+   AND d.stxoid = s.oid;
+ stxkind |                           stxdndistinct                           
+---------+-------------------------------------------------------------------
+ {d,e}   | {"-1, -2": 221, "-1, -3": 247, "-2, -3": 323, "-1, -2, -3": 1000}
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+ estimated | actual 
+-----------+--------
+       221 |    221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+ estimated | actual 
+-----------+--------
+       221 |    221
+(1 row)
+
+DROP STATISTICS s10;
+-- a mix of attributes and expressions
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual 
+-----------+--------
+       100 |    221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (2*c)');
+ estimated | actual 
+-----------+--------
+       100 |    247
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (2*c)');
+ estimated | actual 
+-----------+--------
+       100 |   1000
+(1 row)
+
+CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct;
+ANALYZE ndistinct;
+SELECT s.stxkind, d.stxdndistinct
+  FROM pg_statistic_ext s, pg_statistic_ext_data d
+ WHERE s.stxrelid = 'ndistinct'::regclass
+   AND d.stxoid = s.oid;
+ stxkind |                        stxdndistinct                        
+---------+-------------------------------------------------------------
+ {d,e}   | {"3, 4": 221, "3, -1": 247, "4, -1": 323, "3, 4, -1": 1000}
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual 
+-----------+--------
+       221 |    221
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (2*c)');
+ estimated | actual 
+-----------+--------
+       247 |    247
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (2*c)');
+ estimated | actual 
+-----------+--------
+      1000 |   1000
+(1 row)
+
+DROP STATISTICS s10;
+-- combination of multiple ndistinct statistics, with/without expressions
+TRUNCATE ndistinct;
+-- two mostly independent groups of columns
+INSERT INTO ndistinct (a, b, c, d)
+     SELECT mod(i,3), mod(i,9), mod(i,5), mod(i,20)
+       FROM generate_series(1,1000) s(i);
+ANALYZE ndistinct;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual 
+-----------+--------
+        27 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+ estimated | actual 
+-----------+--------
+        27 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+ estimated | actual 
+-----------+--------
+        27 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+ estimated | actual 
+-----------+--------
+        27 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+ estimated | actual 
+-----------+--------
+       100 |     45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+ estimated | actual 
+-----------+--------
+       100 |     45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+ estimated | actual 
+-----------+--------
+       100 |    180
+(1 row)
+
+-- basic statistics on both attributes (no expressions)
+CREATE STATISTICS s11 (ndistinct) ON a, b FROM ndistinct;
+CREATE STATISTICS s12 (ndistinct) ON c, d FROM ndistinct;
+ANALYZE ndistinct;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+ estimated | actual 
+-----------+--------
+        45 |     45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+ estimated | actual 
+-----------+--------
+        45 |     45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+ estimated | actual 
+-----------+--------
+       100 |    180
+(1 row)
+
+-- replace the second statistics by statistics on expressions
+DROP STATISTICS s12;
+CREATE STATISTICS s12 (ndistinct) ON (c * 10), (d - 1) FROM ndistinct;
+ANALYZE ndistinct;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+ estimated | actual 
+-----------+--------
+        45 |     45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+ estimated | actual 
+-----------+--------
+        45 |     45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+ estimated | actual 
+-----------+--------
+       100 |    180
+(1 row)
+
+-- replace the second statistics by statistics on both attributes and expressions
+DROP STATISTICS s12;
+CREATE STATISTICS s12 (ndistinct) ON c, d, (c * 10), (d - 1) FROM ndistinct;
+ANALYZE ndistinct;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+ estimated | actual 
+-----------+--------
+        45 |     45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+ estimated | actual 
+-----------+--------
+        45 |     45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+ estimated | actual 
+-----------+--------
+       100 |    180
+(1 row)
+
+-- replace the other statistics by statistics on both attributes and expressions
+DROP STATISTICS s11;
+CREATE STATISTICS s11 (ndistinct) ON a, b, (a*5), (b+1) FROM ndistinct;
+ANALYZE ndistinct;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+ estimated | actual 
+-----------+--------
+        45 |     45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+ estimated | actual 
+-----------+--------
+        45 |     45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+ estimated | actual 
+-----------+--------
+       100 |    180
+(1 row)
+
+-- replace statistics by somewhat overlapping ones (this expected to get worse estimate
+-- because the first statistics shall be applied to 3 columns, and the second one can't
+-- be really applied)
+DROP STATISTICS s11;
+DROP STATISTICS s12;
+CREATE STATISTICS s11 (ndistinct) ON a, b, (a*5), (b+1) FROM ndistinct;
+CREATE STATISTICS s12 (ndistinct) ON a, (b+1), (c * 10) FROM ndistinct;
+ANALYZE ndistinct;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+ estimated | actual 
+-----------+--------
+         9 |      9
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+ estimated | actual 
+-----------+--------
+        45 |     45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+ estimated | actual 
+-----------+--------
+       100 |     45
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+ estimated | actual 
+-----------+--------
+       100 |    180
+(1 row)
+
+DROP STATISTICS s11;
+DROP STATISTICS s12;
 -- functional dependencies tests
 CREATE TABLE functional_dependencies (
     filler1 TEXT,
@@ -427,6 +954,40 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
          3 |      3
 (1 row)
 
+-- a => b, a => c, b => c
+TRUNCATE functional_dependencies;
+DROP STATISTICS func_deps_stat;
+-- now do the same thing, but with expressions
+INSERT INTO functional_dependencies (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+ANALYZE functional_dependencies;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     35
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1');
+ estimated | actual 
+-----------+--------
+         1 |      5
+(1 row)
+
+-- create statistics
+CREATE STATISTICS func_deps_stat (dependencies) ON (mod(a,11)), (mod(b::int, 13)), (mod(c, 7)) FROM functional_dependencies;
+ANALYZE functional_dependencies;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1');
+ estimated | actual 
+-----------+--------
+        35 |     35
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1');
+ estimated | actual 
+-----------+--------
+         5 |      5
+(1 row)
+
 -- a => b, a => c, b => c
 TRUNCATE functional_dependencies;
 DROP STATISTICS func_deps_stat;
@@ -755,14 +1316,14 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
          1 |      0
 (1 row)
 
--- changing the type of column c causes its single-column stats to be dropped,
--- giving a default estimate of 0.005 * 5000 = 25 for (c = 1); check multiple
--- clauses estimated with functional dependencies does not exceed this
+-- changing the type of column c causes all its stats to be dropped, reverting
+-- to default estimates without any statistics, i.e. 0.5% selectivity for each
+-- condition
 ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
  estimated | actual 
 -----------+--------
-        25 |     50
+         1 |     50
 (1 row)
 
 ANALYZE functional_dependencies;
@@ -772,6 +1333,332 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
         50 |     50
 (1 row)
 
+DROP STATISTICS func_deps_stat;
+-- now try functional dependencies with expressions
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X''');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X'' AND (c + 1) = 2');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+-- IN
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ''1X''');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') IN (''1X'', ''2X'')');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') IN (''1X'', ''2X'')');
+ estimated | actual 
+-----------+--------
+         1 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ''1X''');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) = 2');
+ estimated | actual 
+-----------+--------
+         1 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) IN (2)');
+ estimated | actual 
+-----------+--------
+         1 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND (b || ''X'') IN (''1X'', ''2X'', ''26X'', ''27X'') AND (c + 1) IN (2, 3)');
+ estimated | actual 
+-----------+--------
+         1 |    400
+(1 row)
+
+-- OR clauses referencing the same attribute
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (b || ''X'') = ''1X''');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
+ estimated | actual 
+-----------+--------
+         1 |    200
+(1 row)
+
+-- OR clauses referencing different attributes
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (b || ''X'') = ''1X'') AND (b || ''X'') = ''1X''');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+-- ANY
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ''1X''');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual 
+-----------+--------
+         1 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = 2');
+ estimated | actual 
+-----------+--------
+         1 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = ANY (ARRAY[2])');
+ estimated | actual 
+-----------+--------
+         1 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X'', ''26X'', ''27X'']) AND (c + 1) = ANY (ARRAY[2, 3])');
+ estimated | actual 
+-----------+--------
+         1 |    400
+(1 row)
+
+-- ANY with inequalities should not benefit from functional dependencies
+-- the estimates however improve thanks to having expression statistics
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND (b || ''X'') > ''1X''');
+ estimated | actual 
+-----------+--------
+       926 |   1900
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND (b || ''X'') <= ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual 
+-----------+--------
+      1543 |   2250
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') >= ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual 
+-----------+--------
+      2229 |   2050
+(1 row)
+
+-- ALL (should not benefit from functional dependencies)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X''])');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+-- create statistics on expressions
+CREATE STATISTICS func_deps_stat (dependencies) ON (a * 2), (b || 'X'), (c + 1) FROM functional_dependencies;
+ANALYZE functional_dependencies;
+-- print the detected dependencies
+SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat';
+                                                      dependencies                                                      
+------------------------------------------------------------------------------------------------------------------------
+ {"-1 => -2": 1.000000, "-1 => -3": 1.000000, "-2 => -3": 1.000000, "-1, -2 => -3": 1.000000, "-1, -3 => -2": 1.000000}
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X''');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X'' AND (c + 1) = 2');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+-- IN
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ''1X''');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') IN (''1X'', ''2X'')');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') IN (''1X'', ''2X'')');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ''1X''');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) = 2');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) IN (2)');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND (b || ''X'') IN (''1X'', ''2X'', ''26X'', ''27X'') AND (c + 1) IN (2, 3)');
+ estimated | actual 
+-----------+--------
+       400 |    400
+(1 row)
+
+-- OR clauses referencing the same attribute
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (b || ''X'') = ''1X''');
+ estimated | actual 
+-----------+--------
+        99 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
+ estimated | actual 
+-----------+--------
+        99 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
+ estimated | actual 
+-----------+--------
+       197 |    200
+(1 row)
+
+-- OR clauses referencing different attributes
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (b || ''X'') = ''1X'') AND (b || ''X'') = ''1X''');
+ estimated | actual 
+-----------+--------
+         3 |    100
+(1 row)
+
+-- ANY
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ''1X''');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = 2');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = ANY (ARRAY[2])');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X'', ''26X'', ''27X'']) AND (c + 1) = ANY (ARRAY[2, 3])');
+ estimated | actual 
+-----------+--------
+       400 |    400
+(1 row)
+
+-- ANY with inequalities should not benefit from functional dependencies
+-- the estimates however improve thanks to having expression statistics
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND (b || ''X'') > ''1X''');
+ estimated | actual 
+-----------+--------
+      1957 |   1900
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND (b || ''X'') <= ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual 
+-----------+--------
+      2933 |   2250
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') >= ANY (ARRAY[''1X'', ''2X''])');
+ estimated | actual 
+-----------+--------
+      3548 |   2050
+(1 row)
+
+-- ALL (should not benefit from functional dependencies)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X''])');
+ estimated | actual 
+-----------+--------
+         2 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
 -- check the ability to use multiple functional dependencies
 CREATE TABLE functional_dependencies_multi (
 	a INTEGER,
@@ -896,6 +1783,39 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b =
          1 |      1
 (1 row)
 
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+-- random data (no MCV list), but with expression
+INSERT INTO mcv_lists (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,1000) s(i);
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     13
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,7)), (mod(b::int,11)), (mod(c,13)) FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1');
+ estimated | actual 
+-----------+--------
+        13 |     13
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
+
 -- 100 distinct combinations, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
@@ -1121,6 +2041,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
        200 |    200
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
  estimated | actual 
 -----------+--------
@@ -1207,6 +2133,212 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b =
         50 |     50
 (1 row)
 
+-- 100 distinct combinations, all in the MCV list, but with expressions
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+INSERT INTO mcv_lists (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,1000) s(i);
+ANALYZE mcv_lists;
+-- without any stats on the expressions, we have to use default selectivities, which
+-- is why the estimates here are different from the pre-computed case above
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
+ estimated | actual 
+-----------+--------
+       111 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
+ estimated | actual 
+-----------+--------
+       111 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+        15 |    120
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
+ estimated | actual 
+-----------+--------
+        11 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+-- create statistics with expressions only (we create three separate stats, in order not to build more complex extended stats)
+CREATE STATISTICS mcv_lists_stats_1 ON (mod(a,20)) FROM mcv_lists;
+CREATE STATISTICS mcv_lists_stats_2 ON (mod(b::int,10)) FROM mcv_lists;
+CREATE STATISTICS mcv_lists_stats_3 ON (mod(c,5)) FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
+ estimated | actual 
+-----------+--------
+         5 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
+ estimated | actual 
+-----------+--------
+         5 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
+ estimated | actual 
+-----------+--------
+         5 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
+ estimated | actual 
+-----------+--------
+         5 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+       149 |    120
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
+ estimated | actual 
+-----------+--------
+        20 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
+ estimated | actual 
+-----------+--------
+        20 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
+ estimated | actual 
+-----------+--------
+       116 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+        12 |    100
+(1 row)
+
+DROP STATISTICS mcv_lists_stats_1;
+DROP STATISTICS mcv_lists_stats_2;
+DROP STATISTICS mcv_lists_stats_3;
+-- create statistics with both MCV and expressions
+CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,20)), (mod(b::int,10)), (mod(c,5)) FROM mcv_lists;
+ANALYZE mcv_lists;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+       105 |    120
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
+ estimated | actual 
+-----------+--------
+       150 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,5) = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
 -- 100 distinct combinations with NULL values, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
@@ -1712,6 +2844,100 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR
 (1 row)
 
 DROP TABLE mcv_lists_multi;
+-- statistics on integer expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,1000) s(i);
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+CREATE STATISTICS expr_stats_1 (mcv) ON (a+b), (a-b), (2*a), (3*b) FROM expr_stats;
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+DROP STATISTICS expr_stats_1;
+DROP TABLE expr_stats;
+-- statistics on a mix columns and expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,1000) s(i);
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (2*a), (3*b), (a+b), (a-b) FROM expr_stats;
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+DROP TABLE expr_stats;
+-- statistics on expressions with different data types
+CREATE TABLE expr_stats (a int, b name, c text);
+INSERT INTO expr_stats SELECT mod(i,10), md5(mod(i,10)::text), md5(mod(i,10)::text) FROM generate_series(1,1000) s(i);
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+ estimated | actual 
+-----------+--------
+        11 |    100
+(1 row)
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (b || c), (c || b) FROM expr_stats;
+ANALYZE expr_stats;
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+DROP TABLE expr_stats;
 -- Permission tests. Users should not be able to see specific data values in
 -- the extended statistics, if they lack permission to see those values in
 -- the underlying table.
@@ -1743,21 +2969,21 @@ create statistics stts_s2.stts_yama (dependencies, mcv) on col1, col3 from stts_
 insert into stts_t1 select i,i from generate_series(1,100) i;
 analyze stts_t1;
 \dX
-                                          List of extended statistics
-  Schema  |          Name          |              Definition              | Ndistinct | Dependencies |   MCV   
-----------+------------------------+--------------------------------------+-----------+--------------+---------
- public   | func_deps_stat         | a, b, c FROM functional_dependencies |           | defined      | 
- public   | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays        |           |              | defined
- public   | mcv_lists_bool_stats   | a, b, c FROM mcv_lists_bool          |           |              | defined
- public   | mcv_lists_stats        | a, b, d FROM mcv_lists               |           |              | defined
- public   | stts_1                 | a, b FROM stts_t1                    | defined   |              | 
- public   | stts_2                 | a, b FROM stts_t1                    | defined   | defined      | 
- public   | stts_3                 | a, b FROM stts_t1                    | defined   | defined      | defined
- public   | stts_4                 | b, c FROM stts_t2                    | defined   | defined      | defined
- public   | stts_hoge              | col1, col2, col3 FROM stts_t3        | defined   | defined      | defined
- stts_s1  | stts_foo               | col1, col2 FROM stts_t3              | defined   | defined      | defined
- stts_s2  | stts_yama              | col1, col3 FROM stts_t3              |           | defined      | defined
- tststats | priv_test_stats        | a, b FROM tststats.priv_test_tbl     |           |              | defined
+                                                                List of extended statistics
+  Schema  |          Name          |                                    Definition                                    | Ndistinct | Dependencies |   MCV   
+----------+------------------------+----------------------------------------------------------------------------------+-----------+--------------+---------
+ public   | func_deps_stat         | ((a * 2)), ((b || 'X'::text)), ((c + (1)::numeric)) FROM functional_dependencies |           | defined      | 
+ public   | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays                                                    |           |              | defined
+ public   | mcv_lists_bool_stats   | a, b, c FROM mcv_lists_bool                                                      |           |              | defined
+ public   | mcv_lists_stats        | a, b, d FROM mcv_lists                                                           |           |              | defined
+ public   | stts_1                 | a, b FROM stts_t1                                                                | defined   |              | 
+ public   | stts_2                 | a, b FROM stts_t1                                                                | defined   | defined      | 
+ public   | stts_3                 | a, b FROM stts_t1                                                                | defined   | defined      | defined
+ public   | stts_4                 | b, c FROM stts_t2                                                                | defined   | defined      | defined
+ public   | stts_hoge              | col1, col2, col3 FROM stts_t3                                                    | defined   | defined      | defined
+ stts_s1  | stts_foo               | col1, col2 FROM stts_t3                                                          | defined   | defined      | defined
+ stts_s2  | stts_yama              | col1, col3 FROM stts_t3                                                          |           | defined      | defined
+ tststats | priv_test_stats        | a, b FROM tststats.priv_test_tbl                                                 |           |              | defined
 (12 rows)
 
 \dX stts_?
@@ -1778,21 +3004,21 @@ analyze stts_t1;
 (1 row)
 
 \dX+
-                                          List of extended statistics
-  Schema  |          Name          |              Definition              | Ndistinct | Dependencies |   MCV   
-----------+------------------------+--------------------------------------+-----------+--------------+---------
- public   | func_deps_stat         | a, b, c FROM functional_dependencies |           | defined      | 
- public   | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays        |           |              | defined
- public   | mcv_lists_bool_stats   | a, b, c FROM mcv_lists_bool          |           |              | defined
- public   | mcv_lists_stats        | a, b, d FROM mcv_lists               |           |              | defined
- public   | stts_1                 | a, b FROM stts_t1                    | defined   |              | 
- public   | stts_2                 | a, b FROM stts_t1                    | defined   | defined      | 
- public   | stts_3                 | a, b FROM stts_t1                    | defined   | defined      | defined
- public   | stts_4                 | b, c FROM stts_t2                    | defined   | defined      | defined
- public   | stts_hoge              | col1, col2, col3 FROM stts_t3        | defined   | defined      | defined
- stts_s1  | stts_foo               | col1, col2 FROM stts_t3              | defined   | defined      | defined
- stts_s2  | stts_yama              | col1, col3 FROM stts_t3              |           | defined      | defined
- tststats | priv_test_stats        | a, b FROM tststats.priv_test_tbl     |           |              | defined
+                                                                List of extended statistics
+  Schema  |          Name          |                                    Definition                                    | Ndistinct | Dependencies |   MCV   
+----------+------------------------+----------------------------------------------------------------------------------+-----------+--------------+---------
+ public   | func_deps_stat         | ((a * 2)), ((b || 'X'::text)), ((c + (1)::numeric)) FROM functional_dependencies |           | defined      | 
+ public   | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays                                                    |           |              | defined
+ public   | mcv_lists_bool_stats   | a, b, c FROM mcv_lists_bool                                                      |           |              | defined
+ public   | mcv_lists_stats        | a, b, d FROM mcv_lists                                                           |           |              | defined
+ public   | stts_1                 | a, b FROM stts_t1                                                                | defined   |              | 
+ public   | stts_2                 | a, b FROM stts_t1                                                                | defined   | defined      | 
+ public   | stts_3                 | a, b FROM stts_t1                                                                | defined   | defined      | defined
+ public   | stts_4                 | b, c FROM stts_t2                                                                | defined   | defined      | defined
+ public   | stts_hoge              | col1, col2, col3 FROM stts_t3                                                    | defined   | defined      | defined
+ stts_s1  | stts_foo               | col1, col2 FROM stts_t3                                                          | defined   | defined      | defined
+ stts_s2  | stts_yama              | col1, col3 FROM stts_t3                                                          |           | defined      | defined
+ tststats | priv_test_stats        | a, b FROM tststats.priv_test_tbl                                                 |           |              | defined
 (12 rows)
 
 \dX+ stts_?
@@ -1822,21 +3048,21 @@ analyze stts_t1;
 create role regress_stats_ext nosuperuser;
 set role regress_stats_ext;
 \dX
-                                          List of extended statistics
-  Schema  |          Name          |              Definition              | Ndistinct | Dependencies |   MCV   
-----------+------------------------+--------------------------------------+-----------+--------------+---------
- public   | func_deps_stat         | a, b, c FROM functional_dependencies |           | defined      | 
- public   | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays        |           |              | defined
- public   | mcv_lists_bool_stats   | a, b, c FROM mcv_lists_bool          |           |              | defined
- public   | mcv_lists_stats        | a, b, d FROM mcv_lists               |           |              | defined
- public   | stts_1                 | a, b FROM stts_t1                    | defined   |              | 
- public   | stts_2                 | a, b FROM stts_t1                    | defined   | defined      | 
- public   | stts_3                 | a, b FROM stts_t1                    | defined   | defined      | defined
- public   | stts_4                 | b, c FROM stts_t2                    | defined   | defined      | defined
- public   | stts_hoge              | col1, col2, col3 FROM stts_t3        | defined   | defined      | defined
- stts_s1  | stts_foo               | col1, col2 FROM stts_t3              | defined   | defined      | defined
- stts_s2  | stts_yama              | col1, col3 FROM stts_t3              |           | defined      | defined
- tststats | priv_test_stats        | a, b FROM tststats.priv_test_tbl     |           |              | defined
+                                                                List of extended statistics
+  Schema  |          Name          |                                    Definition                                    | Ndistinct | Dependencies |   MCV   
+----------+------------------------+----------------------------------------------------------------------------------+-----------+--------------+---------
+ public   | func_deps_stat         | ((a * 2)), ((b || 'X'::text)), ((c + (1)::numeric)) FROM functional_dependencies |           | defined      | 
+ public   | mcv_lists_arrays_stats | a, b, c FROM mcv_lists_arrays                                                    |           |              | defined
+ public   | mcv_lists_bool_stats   | a, b, c FROM mcv_lists_bool                                                      |           |              | defined
+ public   | mcv_lists_stats        | a, b, d FROM mcv_lists                                                           |           |              | defined
+ public   | stts_1                 | a, b FROM stts_t1                                                                | defined   |              | 
+ public   | stts_2                 | a, b FROM stts_t1                                                                | defined   | defined      | 
+ public   | stts_3                 | a, b FROM stts_t1                                                                | defined   | defined      | defined
+ public   | stts_4                 | b, c FROM stts_t2                                                                | defined   | defined      | defined
+ public   | stts_hoge              | col1, col2, col3 FROM stts_t3                                                    | defined   | defined      | defined
+ stts_s1  | stts_foo               | col1, col2 FROM stts_t3                                                          | defined   | defined      | defined
+ stts_s2  | stts_yama              | col1, col3 FROM stts_t3                                                          |           | defined      | defined
+ tststats | priv_test_stats        | a, b FROM tststats.priv_test_tbl                                                 |           |              | defined
 (12 rows)
 
 reset role;
diff --git a/src/test/regress/sql/create_table_like.sql b/src/test/regress/sql/create_table_like.sql
index 06b76f949d..4929d373a2 100644
--- a/src/test/regress/sql/create_table_like.sql
+++ b/src/test/regress/sql/create_table_like.sql
@@ -124,7 +124,9 @@ CREATE TABLE ctlt1 (a text CHECK (length(a) > 2) PRIMARY KEY, b text);
 CREATE INDEX ctlt1_b_key ON ctlt1 (b);
 CREATE INDEX ctlt1_fnidx ON ctlt1 ((a || b));
 CREATE STATISTICS ctlt1_a_b_stat ON a,b FROM ctlt1;
+CREATE STATISTICS ctlt1_expr_stat ON (a || b) FROM ctlt1;
 COMMENT ON STATISTICS ctlt1_a_b_stat IS 'ab stats';
+COMMENT ON STATISTICS ctlt1_expr_stat IS 'ab expr stats';
 COMMENT ON COLUMN ctlt1.a IS 'A';
 COMMENT ON COLUMN ctlt1.b IS 'B';
 COMMENT ON CONSTRAINT ctlt1_a_check ON ctlt1 IS 't1_a_check';
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index f9f12769e4..fb3af6e523 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -28,16 +28,21 @@ end;
 $$;
 
 -- Verify failures
-CREATE TABLE ext_stats_test (x int, y int, z int);
+CREATE TABLE ext_stats_test (x text, y int, z int);
 CREATE STATISTICS tst;
 CREATE STATISTICS tst ON a, b;
 CREATE STATISTICS tst FROM sometab;
 CREATE STATISTICS tst ON a, b FROM nonexistent;
 CREATE STATISTICS tst ON a, b FROM ext_stats_test;
 CREATE STATISTICS tst ON x, x, y FROM ext_stats_test;
-CREATE STATISTICS tst ON x + y FROM ext_stats_test;
-CREATE STATISTICS tst ON (x, y) FROM ext_stats_test;
+CREATE STATISTICS tst ON x, x, y, x, x, y, x, x, y FROM ext_stats_test;
+CREATE STATISTICS tst ON x, x, y, x, x, (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test;
+CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test;
+CREATE STATISTICS tst ON (x || 'x'), (x || 'x'), y FROM ext_stats_test;
 CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test;
+-- incorrect expressions
+CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses
+CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression
 DROP TABLE ext_stats_test;
 
 -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
@@ -97,6 +102,36 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
 ANALYZE ab1;
 DROP TABLE ab1 CASCADE;
 
+-- basic test for statistics on expressions
+CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
+
+-- expression stats may be built on a single expression column
+CREATE STATISTICS ab1_exprstat_1 ON (a+b) FROM ab1;
+
+-- with a single expression, we only enable expression statistics
+CREATE STATISTICS ab1_exprstat_2 ON (a+b) FROM ab1;
+SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_2';
+
+-- adding anything to the expression builds all statistics kinds
+CREATE STATISTICS ab1_exprstat_3 ON (a+b), a FROM ab1;
+SELECT stxkind FROM pg_statistic_ext WHERE stxname = 'ab1_exprstat_3';
+
+-- date_trunc on timestamptz is not immutable, but that should not matter
+CREATE STATISTICS ab1_exprstat_4 ON date_trunc('day', d) FROM ab1;
+
+-- date_trunc on timestamp is immutable
+CREATE STATISTICS ab1_exprstat_5 ON date_trunc('day', c) FROM ab1;
+
+-- insert some data and run analyze, to test that these cases build properly
+INSERT INTO ab1
+SELECT
+    generate_series(1,10),
+    generate_series(1,10),
+    generate_series('2020-10-01'::timestamp, '2020-10-10'::timestamp, interval '1 day'),
+    generate_series('2020-10-01'::timestamptz, '2020-10-10'::timestamptz, interval '1 day');
+ANALYZE ab1;
+DROP TABLE ab1;
+
 -- Verify supported object types for extended statistics
 CREATE schema tststats;
 
@@ -164,6 +199,14 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b
 
 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
 
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+
 -- correct command
 CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
 
@@ -184,6 +227,16 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c
 
 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
 
+-- partial improvement (match on attributes)
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
+
+-- expressions - no improvement
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+
 -- last two plans keep using Group Aggregate, because 'd' is not covered
 -- by the statistic and while it's NULL-only we assume 200 values for it
 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
@@ -216,6 +269,14 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c
 
 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
 
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+
 DROP STATISTICS s10;
 
 SELECT s.stxkind, d.stxdndistinct
@@ -234,6 +295,206 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c
 
 SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
 
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (a+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+
+-- ndistinct estimates with statistics on expressions
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+
+CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct;
+
+ANALYZE ndistinct;
+
+SELECT s.stxkind, d.stxdndistinct
+  FROM pg_statistic_ext s, pg_statistic_ext_data d
+ WHERE s.stxrelid = 'ndistinct'::regclass
+   AND d.stxoid = s.oid;
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100), (2*c)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (a+1), (b+100)');
+
+DROP STATISTICS s10;
+
+-- a mix of attributes and expressions
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (2*c)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (2*c)');
+
+CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct;
+
+ANALYZE ndistinct;
+
+SELECT s.stxkind, d.stxdndistinct
+  FROM pg_statistic_ext s, pg_statistic_ext_data d
+ WHERE s.stxrelid = 'ndistinct'::regclass
+   AND d.stxoid = s.oid;
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (2*c)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (2*c)');
+
+DROP STATISTICS s10;
+
+-- combination of multiple ndistinct statistics, with/without expressions
+TRUNCATE ndistinct;
+
+-- two mostly independent groups of columns
+INSERT INTO ndistinct (a, b, c, d)
+     SELECT mod(i,3), mod(i,9), mod(i,5), mod(i,20)
+       FROM generate_series(1,1000) s(i);
+
+ANALYZE ndistinct;
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+
+-- basic statistics on both attributes (no expressions)
+CREATE STATISTICS s11 (ndistinct) ON a, b FROM ndistinct;
+
+CREATE STATISTICS s12 (ndistinct) ON c, d FROM ndistinct;
+
+ANALYZE ndistinct;
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+
+
+-- replace the second statistics by statistics on expressions
+
+DROP STATISTICS s12;
+
+CREATE STATISTICS s12 (ndistinct) ON (c * 10), (d - 1) FROM ndistinct;
+
+ANALYZE ndistinct;
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+
+
+-- replace the second statistics by statistics on both attributes and expressions
+
+DROP STATISTICS s12;
+
+CREATE STATISTICS s12 (ndistinct) ON c, d, (c * 10), (d - 1) FROM ndistinct;
+
+ANALYZE ndistinct;
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+
+
+-- replace the other statistics by statistics on both attributes and expressions
+
+DROP STATISTICS s11;
+
+CREATE STATISTICS s11 (ndistinct) ON a, b, (a*5), (b+1) FROM ndistinct;
+
+ANALYZE ndistinct;
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+
+
+-- replace statistics by somewhat overlapping ones (this expected to get worse estimate
+-- because the first statistics shall be applied to 3 columns, and the second one can't
+-- be really applied)
+
+DROP STATISTICS s11;
+DROP STATISTICS s12;
+
+CREATE STATISTICS s11 (ndistinct) ON a, b, (a*5), (b+1) FROM ndistinct;
+CREATE STATISTICS s12 (ndistinct) ON a, (b+1), (c * 10) FROM ndistinct;
+
+ANALYZE ndistinct;
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), b');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a*5), (b+1), c');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, (c*10)');
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (b+1), c, (d - 1)');
+
+DROP STATISTICS s11;
+DROP STATISTICS s12;
+
 -- functional dependencies tests
 CREATE TABLE functional_dependencies (
     filler1 TEXT,
@@ -272,6 +533,29 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
 TRUNCATE functional_dependencies;
 DROP STATISTICS func_deps_stat;
 
+-- now do the same thing, but with expressions
+INSERT INTO functional_dependencies (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,5000) s(i);
+
+ANALYZE functional_dependencies;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1');
+
+-- create statistics
+CREATE STATISTICS func_deps_stat (dependencies) ON (mod(a,11)), (mod(b::int, 13)), (mod(c, 7)) FROM functional_dependencies;
+
+ANALYZE functional_dependencies;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE mod(a, 11) = 1 AND mod(b::int, 13) = 1 AND mod(c, 7) = 1');
+
+-- a => b, a => c, b => c
+TRUNCATE functional_dependencies;
+DROP STATISTICS func_deps_stat;
+
 INSERT INTO functional_dependencies (a, b, c, filler1)
      SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
 
@@ -397,9 +681,9 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
 
 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ALL (ARRAY[''1'', ''2''])');
 
--- changing the type of column c causes its single-column stats to be dropped,
--- giving a default estimate of 0.005 * 5000 = 25 for (c = 1); check multiple
--- clauses estimated with functional dependencies does not exceed this
+-- changing the type of column c causes all its stats to be dropped, reverting
+-- to default estimates without any statistics, i.e. 0.5% selectivity for each
+-- condition
 ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
 
 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
@@ -408,6 +692,132 @@ ANALYZE functional_dependencies;
 
 SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
 
+DROP STATISTICS func_deps_stat;
+
+-- now try functional dependencies with expressions
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X'' AND (c + 1) = 2');
+
+-- IN
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ''1X''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') IN (''1X'', ''2X'')');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') IN (''1X'', ''2X'')');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ''1X''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) = 2');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) IN (2)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND (b || ''X'') IN (''1X'', ''2X'', ''26X'', ''27X'') AND (c + 1) IN (2, 3)');
+
+-- OR clauses referencing the same attribute
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (b || ''X'') = ''1X''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
+
+-- OR clauses referencing different attributes
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (b || ''X'') = ''1X'') AND (b || ''X'') = ''1X''');
+
+-- ANY
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ''1X''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = 2');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = ANY (ARRAY[2])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X'', ''26X'', ''27X'']) AND (c + 1) = ANY (ARRAY[2, 3])');
+
+-- ANY with inequalities should not benefit from functional dependencies
+-- the estimates however improve thanks to having expression statistics
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND (b || ''X'') > ''1X''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND (b || ''X'') <= ANY (ARRAY[''1X'', ''2X''])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') >= ANY (ARRAY[''1X'', ''2X''])');
+
+-- ALL (should not benefit from functional dependencies)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X''])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
+
+-- create statistics on expressions
+CREATE STATISTICS func_deps_stat (dependencies) ON (a * 2), (b || 'X'), (c + 1) FROM functional_dependencies;
+
+ANALYZE functional_dependencies;
+
+-- print the detected dependencies
+SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat';
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND (b || ''X'') = ''1X'' AND (c + 1) = 2');
+
+-- IN
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ''1X''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') IN (''1X'', ''2X'')');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') IN (''1X'', ''2X'')');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ''1X''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) = 2');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 52, 102, 152) AND (b || ''X'') IN (''1X'', ''26X'') AND (c + 1) IN (2)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 52, 54, 102, 104, 152, 154) AND (b || ''X'') IN (''1X'', ''2X'', ''26X'', ''27X'') AND (c + 1) IN (2, 3)');
+
+-- OR clauses referencing the same attribute
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND (b || ''X'') = ''1X''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 102) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (a * 2) = 4 OR (a * 2) = 102 OR (a * 2) = 104) AND ((b || ''X'') = ''1X'' OR (b || ''X'') = ''2X'')');
+
+-- OR clauses referencing different attributes
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE ((a * 2) = 2 OR (b || ''X'') = ''1X'') AND (b || ''X'') = ''1X''');
+
+-- ANY
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ''1X''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 102]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X''])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = 2');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 52, 102, 152]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''26X'']) AND (c + 1) = ANY (ARRAY[2])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = ANY (ARRAY[2, 4, 52, 54, 102, 104, 152, 154]) AND (b || ''X'') = ANY (ARRAY[''1X'', ''2X'', ''26X'', ''27X'']) AND (c + 1) = ANY (ARRAY[2, 3])');
+
+-- ANY with inequalities should not benefit from functional dependencies
+-- the estimates however improve thanks to having expression statistics
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) < ANY (ARRAY[2, 102]) AND (b || ''X'') > ''1X''');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) >= ANY (ARRAY[2, 102]) AND (b || ''X'') <= ANY (ARRAY[''1X'', ''2X''])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) <= ANY (ARRAY[2, 4, 102, 104]) AND (b || ''X'') >= ANY (ARRAY[''1X'', ''2X''])');
+
+-- ALL (should not benefit from functional dependencies)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X''])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 102) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) IN (2, 4, 102, 104) AND (b || ''X'') = ALL (ARRAY[''1X'', ''2X''])');
+
 -- check the ability to use multiple functional dependencies
 CREATE TABLE functional_dependencies_multi (
 	a INTEGER,
@@ -479,6 +889,28 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b =
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
 
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+
+-- random data (no MCV list), but with expression
+INSERT INTO mcv_lists (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,1000) s(i);
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1');
+
+-- create statistics
+CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,7)), (mod(b::int,11)), (mod(c,13)) FROM mcv_lists;
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,7) = 1 AND mod(b::int,11) = 1 AND mod(c,13) = 1');
+
 -- 100 distinct combinations, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
@@ -565,6 +997,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
@@ -602,6 +1036,98 @@ ANALYZE mcv_lists;
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
 
+
+-- 100 distinct combinations, all in the MCV list, but with expressions
+TRUNCATE mcv_lists;
+DROP STATISTICS mcv_lists_stats;
+
+INSERT INTO mcv_lists (a, b, c, filler1)
+     SELECT i, i, i, i FROM generate_series(1,1000) s(i);
+
+ANALYZE mcv_lists;
+
+-- without any stats on the expressions, we have to use default selectivities, which
+-- is why the estimates here are different from the pre-computed case above
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
+
+-- create statistics with expressions only (we create three separate stats, in order not to build more complex extended stats)
+CREATE STATISTICS mcv_lists_stats_1 ON (mod(a,20)) FROM mcv_lists;
+CREATE STATISTICS mcv_lists_stats_2 ON (mod(b::int,10)) FROM mcv_lists;
+CREATE STATISTICS mcv_lists_stats_3 ON (mod(c,5)) FROM mcv_lists;
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
+
+DROP STATISTICS mcv_lists_stats_1;
+DROP STATISTICS mcv_lists_stats_2;
+DROP STATISTICS mcv_lists_stats_3;
+
+-- create statistics with both MCV and expressions
+CREATE STATISTICS mcv_lists_stats (mcv) ON (mod(a,20)), (mod(b::int,10)), (mod(c,5)) FROM mcv_lists;
+
+ANALYZE mcv_lists;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 = mod(a,20) AND 1 = mod(b::int,10)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < 1 AND mod(b::int,10) < 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 1 > mod(a,20) AND 1 > mod(b::int,10)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 AND mod(b::int,10) = 1 AND mod(c,5) = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,25) = 1 OR d IS NOT NULL');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) IN (1, 2, 51, 52, NULL) AND mod(b::int,10) IN ( 1, 2, NULL)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = ANY (ARRAY[1, 2, 51, 52]) AND mod(b::int,10) = ANY (ARRAY[1, 2])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) <= ANY (ARRAY[1, NULL, 2, 3]) AND mod(b::int,10) IN (1, 2, NULL, 3)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) < ALL (ARRAY[4, 5]) AND mod(b::int,10) IN (1, 2, 3) AND mod(c,5) > ANY (ARRAY[1, 2, 3])');
+
+-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE mod(a,20) = 1 OR mod(b::int,10) = 1 OR mod(c,5) = 1 OR d IS NOT NULL');
+
 -- 100 distinct combinations with NULL values, all in the MCV list
 TRUNCATE mcv_lists;
 DROP STATISTICS mcv_lists_stats;
@@ -894,6 +1420,57 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR
 
 DROP TABLE mcv_lists_multi;
 
+
+-- statistics on integer expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,1000) s(i);
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+
+CREATE STATISTICS expr_stats_1 (mcv) ON (a+b), (a-b), (2*a), (3*b) FROM expr_stats;
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (2*a) = 0 AND (3*b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE (a+b) = 0 AND (a-b) = 0');
+
+DROP STATISTICS expr_stats_1;
+DROP TABLE expr_stats;
+
+-- statistics on a mix columns and expressions
+CREATE TABLE expr_stats (a int, b int, c int);
+INSERT INTO expr_stats SELECT mod(i,10), mod(i,10), mod(i,10) FROM generate_series(1,1000) s(i);
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (2*a), (3*b), (a+b), (a-b) FROM expr_stats;
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (2*a) = 0 AND (3*b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 3 AND b = 3 AND (a-b) = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND b = 1 AND (a-b) = 0');
+
+DROP TABLE expr_stats;
+
+-- statistics on expressions with different data types
+CREATE TABLE expr_stats (a int, b name, c text);
+INSERT INTO expr_stats SELECT mod(i,10), md5(mod(i,10)::text), md5(mod(i,10)::text) FROM generate_series(1,1000) s(i);
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+
+CREATE STATISTICS expr_stats_1 (mcv) ON a, b, (b || c), (c || b) FROM expr_stats;
+ANALYZE expr_stats;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b || c) <= ''z'' AND (c || b) >= ''0''');
+
+DROP TABLE expr_stats;
+
+
 -- Permission tests. Users should not be able to see specific data values in
 -- the extended statistics, if they lack permission to see those values in
 -- the underlying table.