Implement multivariate n-distinct coefficients

Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns.  Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too.  All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.

This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table.  This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve.  A new
special pseudo-type pg_ndistinct is used.

(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)

Author: Tomas Vondra.  Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
    Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
    https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
This commit is contained in:
Alvaro Herrera 2017-03-24 14:06:10 -03:00
parent f120b614e0
commit 7b504eb282
77 changed files with 3599 additions and 63 deletions

View File

@ -295,6 +295,11 @@
<entry>planner statistics</entry>
</row>
<row>
<entry><link linkend="catalog-pg-statistic-ext"><structname>pg_statistic_ext</structname></link></entry>
<entry>extended planner statistics</entry>
</row>
<row>
<entry><link linkend="catalog-pg-subscription"><structname>pg_subscription</structname></link></entry>
<entry>logical replication subscriptions</entry>
@ -4247,6 +4252,98 @@
</table>
</sect1>
<sect1 id="catalog-pg-statistic-ext">
<title><structname>pg_statistic_ext</structname></title>
<indexterm zone="catalog-pg-statistic-ext">
<primary>pg_statistic_ext</primary>
</indexterm>
<para>
The catalog <structname>pg_statistic_ext</structname>
holds extended planner statistics.
</para>
<table>
<title><structname>pg_statistic_ext</> Columns</title>
<tgroup cols="4">
<thead>
<row>
<entry>Name</entry>
<entry>Type</entry>
<entry>References</entry>
<entry>Description</entry>
</row>
</thead>
<tbody>
<row>
<entry><structfield>starelid</structfield></entry>
<entry><type>oid</type></entry>
<entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
<entry>The table that the described columns belongs to</entry>
</row>
<row>
<entry><structfield>staname</structfield></entry>
<entry><type>name</type></entry>
<entry></entry>
<entry>Name of the statistic.</entry>
</row>
<row>
<entry><structfield>stanamespace</structfield></entry>
<entry><type>oid</type></entry>
<entry><literal><link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.oid</literal></entry>
<entry>
The OID of the namespace that contains this statistic
</entry>
</row>
<row>
<entry><structfield>staowner</structfield></entry>
<entry><type>oid</type></entry>
<entry><literal><link linkend="catalog-pg-authid"><structname>pg_authid</structname></link>.oid</literal></entry>
<entry>Owner of the statistic</entry>
</row>
<row>
<entry><structfield>staenabled</structfield></entry>
<entry><type>char[]</type></entry>
<entry></entry>
<entry>
An array with the modes of the enabled statistic types, encoded as
<literal>d</literal> for ndistinct coefficients.
</entry>
</row>
<row>
<entry><structfield>stakeys</structfield></entry>
<entry><type>int2vector</type></entry>
<entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
<entry>
This is an array of values that indicate which table columns this
statistic covers. For example a value of <literal>1 3</literal> would
mean that the first and the third table columns make up the statistic key.
</entry>
</row>
<row>
<entry><structfield>standistinct</structfield></entry>
<entry><type>pg_ndistinct</type></entry>
<entry></entry>
<entry>
N-distinct coefficients, serialized as <structname>pg_ndistinct</> type.
</entry>
</row>
</tbody>
</tgroup>
</table>
</sect1>
<sect1 id="catalog-pg-namespace">
<title><structname>pg_namespace</structname></title>

View File

@ -16720,6 +16720,10 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
<primary>pg_get_serial_sequence</primary>
</indexterm>
<indexterm>
<primary>pg_get_statisticsextdef</primary>
</indexterm>
<indexterm>
<primary>pg_get_triggerdef</primary>
</indexterm>
@ -16889,6 +16893,11 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
<entry>get name of the sequence that a <type>serial</type>, <type>smallserial</type> or <type>bigserial</type> column
uses</entry>
</row>
<row>
<entry><literal><function>pg_get_statisticsextdef(<parameter>statext_oid</parameter>)</function></literal></entry>
<entry><type>text</type></entry>
<entry>get <command>CREATE STATISTICS</> command for extended statistics objects</entry>
</row>
<row>
<entry><function>pg_get_triggerdef</function>(<parameter>trigger_oid</parameter>)</entry>
<entry><type>text</type></entry>
@ -17034,19 +17043,20 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
<para>
<function>pg_get_constraintdef</function>,
<function>pg_get_indexdef</function>, <function>pg_get_ruledef</function>,
and <function>pg_get_triggerdef</function>, respectively reconstruct the
creating command for a constraint, index, rule, or trigger. (Note that this
is a decompiled reconstruction, not the original text of the command.)
<function>pg_get_expr</function> decompiles the internal form of an
individual expression, such as the default value for a column. It can be
useful when examining the contents of system catalogs. If the expression
might contain Vars, specify the OID of the relation they refer to as the
second parameter; if no Vars are expected, zero is sufficient.
<function>pg_get_viewdef</function> reconstructs the <command>SELECT</>
query that defines a view. Most of these functions come in two variants,
one of which can optionally <quote>pretty-print</> the result. The
pretty-printed format is more readable, but the default format is more
likely to be interpreted the same way by future versions of
<function>pg_get_statisticsextdef</function>, and
<function>pg_get_triggerdef</function>, respectively reconstruct the
creating command for a constraint, index, rule, extended statistics object,
or trigger. (Note that this is a decompiled reconstruction, not the
original text of the command.) <function>pg_get_expr</function> decompiles
the internal form of an individual expression, such as the default value
for a column. It can be useful when examining the contents of system
catalogs. If the expression might contain Vars, specify the OID of the
relation they refer to as the second parameter; if no Vars are expected,
zero is sufficient. <function>pg_get_viewdef</function> reconstructs the
<command>SELECT</> query that defines a view. Most of these functions come
in two variants, one of which can optionally <quote>pretty-print</> the
result. The pretty-printed format is more readable, but the default format
is more likely to be interpreted the same way by future versions of
<productname>PostgreSQL</>; avoid using pretty-printed output for dump
purposes. Passing <literal>false</> for the pretty-print parameter yields
the same result as the variant that does not have the parameter at all.

View File

@ -34,6 +34,7 @@ Complete list of usable sgml source files in this directory.
<!ENTITY alterSequence SYSTEM "alter_sequence.sgml">
<!ENTITY alterSubscription SYSTEM "alter_subscription.sgml">
<!ENTITY alterSystem SYSTEM "alter_system.sgml">
<!ENTITY alterStatistics SYSTEM "alter_statistics.sgml">
<!ENTITY alterTable SYSTEM "alter_table.sgml">
<!ENTITY alterTableSpace SYSTEM "alter_tablespace.sgml">
<!ENTITY alterTSConfig SYSTEM "alter_tsconfig.sgml">
@ -80,6 +81,7 @@ Complete list of usable sgml source files in this directory.
<!ENTITY createSchema SYSTEM "create_schema.sgml">
<!ENTITY createSequence SYSTEM "create_sequence.sgml">
<!ENTITY createServer SYSTEM "create_server.sgml">
<!ENTITY createStatistics SYSTEM "create_statistics.sgml">
<!ENTITY createSubscription SYSTEM "create_subscription.sgml">
<!ENTITY createTable SYSTEM "create_table.sgml">
<!ENTITY createTableAs SYSTEM "create_table_as.sgml">
@ -126,6 +128,7 @@ Complete list of usable sgml source files in this directory.
<!ENTITY dropSchema SYSTEM "drop_schema.sgml">
<!ENTITY dropSequence SYSTEM "drop_sequence.sgml">
<!ENTITY dropServer SYSTEM "drop_server.sgml">
<!ENTITY dropStatistics SYSTEM "drop_statistics.sgml">
<!ENTITY dropSubscription SYSTEM "drop_subscription.sgml">
<!ENTITY dropTable SYSTEM "drop_table.sgml">
<!ENTITY dropTableSpace SYSTEM "drop_tablespace.sgml">

View File

@ -0,0 +1,115 @@
<!--
doc/src/sgml/ref/alter_statistics.sgml
PostgreSQL documentation
-->
<refentry id="SQL-ALTERSTATISTICS">
<indexterm zone="sql-alterstatistics">
<primary>ALTER STATISTICS</primary>
</indexterm>
<refmeta>
<refentrytitle>ALTER STATISTICS</refentrytitle>
<manvolnum>7</manvolnum>
<refmiscinfo>SQL - Language Statements</refmiscinfo>
</refmeta>
<refnamediv>
<refname>ALTER STATISTICS</refname>
<refpurpose>
change the definition of a extended statistics
</refpurpose>
</refnamediv>
<refsynopsisdiv>
<synopsis>
ALTER STATISTICS <replaceable class="parameter">name</replaceable> OWNER TO { <replaceable class="PARAMETER">new_owner</replaceable> | CURRENT_USER | SESSION_USER }
ALTER STATISTICS <replaceable class="parameter">name</replaceable> RENAME TO <replaceable class="parameter">new_name</replaceable>
ALTER STATISTICS <replaceable class="parameter">name</replaceable> SET SCHEMA <replaceable class="parameter">new_schema</replaceable>
</synopsis>
</refsynopsisdiv>
<refsect1>
<title>Description</title>
<para>
<command>ALTER STATISTICS</command> changes the parameters of an existing
extended statistics. Any parameters not specifically set in the
<command>ALTER STATISTICS</command> command retain their prior settings.
</para>
<para>
You must own the statistics to use <command>ALTER STATISTICS</>.
To change a statistics' schema, you must also have <literal>CREATE</>
privilege on the new schema.
To alter the owner, you must also be a direct or indirect member of the new
owning role, and that role must have <literal>CREATE</literal> privilege on
the statistics' schema. (These restrictions enforce that altering the owner
doesn't do anything you couldn't do by dropping and recreating the statistics.
However, a superuser can alter ownership of any statistics anyway.)
</para>
</refsect1>
<refsect1>
<title>Parameters</title>
<para>
<variablelist>
<varlistentry>
<term><replaceable class="parameter">name</replaceable></term>
<listitem>
<para>
The name (optionally schema-qualified) of the statistics to be altered.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="PARAMETER">new_owner</replaceable></term>
<listitem>
<para>
The user name of the new owner of the statistics.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="parameter">new_name</replaceable></term>
<listitem>
<para>
The new name for the statistics.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="parameter">new_schema</replaceable></term>
<listitem>
<para>
The new schema for the statistics.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
</refsect1>
<refsect1>
<title>Compatibility</title>
<para>
There's no <command>ALTER STATISTICS</command> command in the SQL standard.
</para>
</refsect1>
<refsect1>
<title>See Also</title>
<simplelist type="inline">
<member><xref linkend="sql-createstatistics"></member>
<member><xref linkend="sql-dropstatistics"></member>
</simplelist>
</refsect1>
</refentry>

View File

@ -119,9 +119,12 @@ ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
<para>
This form drops a column from a table. Indexes and
table constraints involving the column will be automatically
dropped as well. You will need to say <literal>CASCADE</> if
anything outside the table depends on the column, for example,
foreign key references or views.
dropped as well.
Multivariate statistics referencing the dropped column will also be
removed if the removal of the column would cause the statistics to
contain data for only a single column.
You will need to say <literal>CASCADE</> if anything outside the table
depends on the column, for example, foreign key references or views.
If <literal>IF EXISTS</literal> is specified and the column
does not exist, no error is thrown. In this case a notice
is issued instead.

View File

@ -51,6 +51,7 @@ COMMENT ON
SCHEMA <replaceable class="PARAMETER">object_name</replaceable> |
SEQUENCE <replaceable class="PARAMETER">object_name</replaceable> |
SERVER <replaceable class="PARAMETER">object_name</replaceable> |
STATISTICS <replaceable class="PARAMETER">object_name</replaceable> |
TABLE <replaceable class="PARAMETER">object_name</replaceable> |
TABLESPACE <replaceable class="PARAMETER">object_name</replaceable> |
TEXT SEARCH CONFIGURATION <replaceable class="PARAMETER">object_name</replaceable> |
@ -125,8 +126,8 @@ COMMENT ON
The name of the object to be commented. Names of tables,
aggregates, collations, conversions, domains, foreign tables, functions,
indexes, operators, operator classes, operator families, sequences,
text search objects, types, and views can be schema-qualified.
When commenting on a column,
statistics, text search objects, types, and views can be
schema-qualified. When commenting on a column,
<replaceable class="parameter">relation_name</replaceable> must refer
to a table, view, composite type, or foreign table.
</para>
@ -327,6 +328,7 @@ COMMENT ON RULE my_rule ON my_table IS 'Logs updates of employee records';
COMMENT ON SCHEMA my_schema IS 'Departmental data';
COMMENT ON SEQUENCE my_sequence IS 'Used to generate primary keys';
COMMENT ON SERVER myserver IS 'my foreign server';
COMMENT ON STATISTICS my_statistics IS 'Improves planner row estimations';
COMMENT ON TABLE my_schema.my_table IS 'Employee Information';
COMMENT ON TABLESPACE my_tablespace IS 'Tablespace for indexes';
COMMENT ON TEXT SEARCH CONFIGURATION my_config IS 'Special word filtering';

View File

@ -0,0 +1,155 @@
<!--
doc/src/sgml/ref/create_statistics.sgml
PostgreSQL documentation
-->
<refentry id="SQL-CREATESTATISTICS">
<indexterm zone="sql-createstatistics">
<primary>CREATE STATISTICS</primary>
</indexterm>
<refmeta>
<refentrytitle>CREATE STATISTICS</refentrytitle>
<manvolnum>7</manvolnum>
<refmiscinfo>SQL - Language Statements</refmiscinfo>
</refmeta>
<refnamediv>
<refname>CREATE STATISTICS</refname>
<refpurpose>define extended statistics</refpurpose>
</refnamediv>
<refsynopsisdiv>
<synopsis>
CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_name</replaceable> ON (
<replaceable class="PARAMETER">column_name</replaceable>, <replaceable class="PARAMETER">column_name</replaceable> [, ...])
FROM <replaceable class="PARAMETER">table_name</replaceable>
</synopsis>
</refsynopsisdiv>
<refsect1 id="SQL-CREATESTATISTICS-description">
<title>Description</title>
<para>
<command>CREATE STATISTICS</command> will create a new extended statistics
object on the specified table.
The statistics will be created in the current database and
will be owned by the user issuing the command.
</para>
<para>
If a schema name is given (for example, <literal>CREATE STATISTICS
myschema.mystat ...</>) then the statistics is created in the specified
schema. Otherwise it is created in the current schema. The name of
the statistics must be distinct from the name of any other statistics in the
same schema.
</para>
</refsect1>
<refsect1>
<title>Parameters</title>
<variablelist>
<varlistentry>
<term><literal>IF NOT EXISTS</></term>
<listitem>
<para>
Do not throw an error if a statistics with the same name already exists.
A notice is issued in this case. Note that only the name of the
statistics object is considered here. The definition of the statistics is
not considered.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="PARAMETER">statistics_name</replaceable></term>
<listitem>
<para>
The name (optionally schema-qualified) of the statistics to be created.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="PARAMETER">column_name</replaceable></term>
<listitem>
<para>
The name of a column to be included in the statistics.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="PARAMETER">table_name</replaceable></term>
<listitem>
<para>
The name (optionally schema-qualified) of the table the statistics should
be created on.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1>
<title>Notes</title>
<para>
You must be the owner of a table to create or change statistics on it.
</para>
</refsect1>
<refsect1 id="SQL-CREATESTATISTICS-examples">
<title>Examples</title>
<para>
Create table <structname>t1</> with two functionally dependent columns, i.e.
knowledge of a value in the first column is sufficient for determining the
value in the other column. Then functional dependencies are built on those
columns:
<programlisting>
CREATE TABLE t1 (
a int,
b int
);
INSERT INTO t1 SELECT i/100, i/500
FROM generate_series(1,1000000) s(i);
CREATE STATISTICS s1 ON (a, b) FROM t1;
ANALYZE t1;
-- valid combination of values
EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0);
-- invalid combination of values
EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 1);
</programlisting>
</para>
</refsect1>
<refsect1>
<title>Compatibility</title>
<para>
There's no <command>CREATE STATISTICS</command> command in the SQL standard.
</para>
</refsect1>
<refsect1>
<title>See Also</title>
<simplelist type="inline">
<member><xref linkend="sql-alterstatistics"></member>
<member><xref linkend="sql-dropstatistics"></member>
</simplelist>
</refsect1>
</refentry>

View File

@ -0,0 +1,98 @@
<!--
doc/src/sgml/ref/drop_statistics.sgml
PostgreSQL documentation
-->
<refentry id="SQL-DROPSTATISTICS">
<indexterm zone="sql-dropstatistics">
<primary>DROP STATISTICS</primary>
</indexterm>
<refmeta>
<refentrytitle>DROP STATISTICS</refentrytitle>
<manvolnum>7</manvolnum>
<refmiscinfo>SQL - Language Statements</refmiscinfo>
</refmeta>
<refnamediv>
<refname>DROP STATISTICS</refname>
<refpurpose>remove extended statistics</refpurpose>
</refnamediv>
<refsynopsisdiv>
<synopsis>
DROP STATISTICS [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable> [, ...]
</synopsis>
</refsynopsisdiv>
<refsect1>
<title>Description</title>
<para>
<command>DROP STATISTICS</command> removes statistics from the database.
Only the statistics owner, the schema owner, and superuser can drop a
statistics.
</para>
</refsect1>
<refsect1>
<title>Parameters</title>
<variablelist>
<varlistentry>
<term><literal>IF EXISTS</literal></term>
<listitem>
<para>
Do not throw an error if the statistics do not exist. A notice is
issued in this case.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable class="PARAMETER">name</replaceable></term>
<listitem>
<para>
The name (optionally schema-qualified) of the statistics to drop.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1>
<title>Examples</title>
<para>
To destroy two statistics objects on different schemas, without failing
if they don't exist:
<programlisting>
DROP STATISTICS IF EXISTS
accounting.users_uid_creation,
public.grants_user_role;
</programlisting>
</para>
</refsect1>
<refsect1>
<title>Compatibility</title>
<para>
There's no <command>DROP STATISTICS</command> command in the SQL standard.
</para>
</refsect1>
<refsect1>
<title>See Also</title>
<simplelist type="inline">
<member><xref linkend="sql-alterstatistics"></member>
<member><xref linkend="sql-createstatistics"></member>
</simplelist>
</refsect1>
</refentry>

View File

@ -60,6 +60,7 @@
&alterSchema;
&alterSequence;
&alterServer;
&alterStatistics;
&alterSubscription;
&alterSystem;
&alterTable;
@ -108,6 +109,7 @@
&createSchema;
&createSequence;
&createServer;
&createStatistics;
&createSubscription;
&createTable;
&createTableAs;
@ -154,6 +156,7 @@
&dropSchema;
&dropSequence;
&dropServer;
&dropStatistics;
&dropSubscription;
&dropTable;
&dropTableSpace;

View File

@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global
SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \
main nodes optimizer port postmaster regex replication rewrite \
storage tcop tsearch utils $(top_builddir)/src/timezone
statistics storage tcop tsearch utils $(top_builddir)/src/timezone
include $(srcdir)/common.mk

View File

@ -33,6 +33,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
pg_statistic_ext.h \
pg_statistic.h pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \
pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \

View File

@ -48,6 +48,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
@ -3302,6 +3303,8 @@ static const char *const no_priv_msg[MAX_ACL_KIND] =
gettext_noop("permission denied for collation %s"),
/* ACL_KIND_CONVERSION */
gettext_noop("permission denied for conversion %s"),
/* ACL_KIND_STATISTICS */
gettext_noop("permission denied for statistics %s"),
/* ACL_KIND_TABLESPACE */
gettext_noop("permission denied for tablespace %s"),
/* ACL_KIND_TSDICTIONARY */
@ -3352,6 +3355,8 @@ static const char *const not_owner_msg[MAX_ACL_KIND] =
gettext_noop("must be owner of collation %s"),
/* ACL_KIND_CONVERSION */
gettext_noop("must be owner of conversion %s"),
/* ACL_KIND_STATISTICS */
gettext_noop("must be owner of statistics %s"),
/* ACL_KIND_TABLESPACE */
gettext_noop("must be owner of tablespace %s"),
/* ACL_KIND_TSDICTIONARY */
@ -3467,6 +3472,10 @@ pg_aclmask(AclObjectKind objkind, Oid table_oid, AttrNumber attnum, Oid roleid,
mask, how, NULL);
case ACL_KIND_NAMESPACE:
return pg_namespace_aclmask(table_oid, roleid, mask, how);
case ACL_KIND_STATISTICS:
elog(ERROR, "grantable rights not supported for statistics");
/* not reached, but keep compiler quiet */
return ACL_NO_RIGHTS;
case ACL_KIND_TABLESPACE:
return pg_tablespace_aclmask(table_oid, roleid, mask, how);
case ACL_KIND_FDW:
@ -5103,6 +5112,32 @@ pg_subscription_ownercheck(Oid sub_oid, Oid roleid)
return has_privs_of_role(roleid, ownerId);
}
/*
* Ownership check for a extended statistics (specified by OID).
*/
bool
pg_statistics_ownercheck(Oid stat_oid, Oid roleid)
{
HeapTuple tuple;
Oid ownerId;
/* Superusers bypass all permission checking. */
if (superuser_arg(roleid))
return true;
tuple = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stat_oid));
if (!HeapTupleIsValid(tuple))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("statistics with OID %u do not exist", stat_oid)));
ownerId = ((Form_pg_statistic_ext) GETSTRUCT(tuple))->staowner;
ReleaseSysCache(tuple);
return has_privs_of_role(roleid, ownerId);
}
/*
* Check whether specified role has CREATEROLE privilege (or is a superuser)
*

View File

@ -51,6 +51,7 @@
#include "catalog/pg_publication.h"
#include "catalog/pg_publication_rel.h"
#include "catalog/pg_rewrite.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
@ -154,6 +155,7 @@ static const Oid object_classes[] = {
RewriteRelationId, /* OCLASS_REWRITE */
TriggerRelationId, /* OCLASS_TRIGGER */
NamespaceRelationId, /* OCLASS_SCHEMA */
StatisticExtRelationId, /* OCLASS_STATISTIC_EXT */
TSParserRelationId, /* OCLASS_TSPARSER */
TSDictionaryRelationId, /* OCLASS_TSDICT */
TSTemplateRelationId, /* OCLASS_TSTEMPLATE */
@ -1263,6 +1265,10 @@ doDeletion(const ObjectAddress *object, int flags)
DropTransformById(object->objectId);
break;
case OCLASS_STATISTIC_EXT:
RemoveStatisticsById(object->objectId);
break;
default:
elog(ERROR, "unrecognized object class: %u",
object->classId);
@ -2377,6 +2383,9 @@ getObjectClass(const ObjectAddress *object)
case NamespaceRelationId:
return OCLASS_SCHEMA;
case StatisticExtRelationId:
return OCLASS_STATISTIC_EXT;
case TSParserRelationId:
return OCLASS_TSPARSER;

View File

@ -52,6 +52,7 @@
#include "catalog/pg_opclass.h"
#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription_rel.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
@ -1609,7 +1610,10 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
heap_close(attr_rel, RowExclusiveLock);
if (attnum > 0)
{
RemoveStatistics(relid, attnum);
RemoveStatisticsExt(relid, attnum);
}
relation_close(rel, NoLock);
}
@ -1860,6 +1864,7 @@ heap_drop_with_catalog(Oid relid)
* delete statistics
*/
RemoveStatistics(relid, 0);
RemoveStatisticsExt(relid, 0);
/*
* delete attribute tuples
@ -2771,6 +2776,75 @@ RemoveStatistics(Oid relid, AttrNumber attnum)
}
/*
* RemoveStatisticsExt --- remove entries in pg_statistic_ext for a relation
*
* If attnum is zero, remove all entries for rel; else remove only the
* one(s) involving that column.
*/
void
RemoveStatisticsExt(Oid relid, AttrNumber attnum)
{
Relation pgstatisticext;
SysScanDesc scan;
ScanKeyData key;
HeapTuple tuple;
/*
* Scan pg_statistic_ext to delete relevant tuples
*/
pgstatisticext = heap_open(StatisticExtRelationId, RowExclusiveLock);
ScanKeyInit(&key,
Anum_pg_statistic_ext_starelid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(relid));
scan = systable_beginscan(pgstatisticext,
StatisticExtRelidIndexId,
true, NULL, 1, &key);
while (HeapTupleIsValid(tuple = systable_getnext(scan)))
{
bool delete = false;
if (attnum == 0)
delete = true;
else if (attnum != 0)
{
Form_pg_statistic_ext staForm;
int i;
/*
* Decode the stakeys array and delete any stats that involve the
* specified column.
*/
staForm = (Form_pg_statistic_ext) GETSTRUCT(tuple);
for (i = 0; i < staForm->stakeys.dim1; i++)
{
if (staForm->stakeys.values[i] == attnum)
{
delete = true;
break;
}
}
}
if (delete)
{
CatalogTupleDelete(pgstatisticext, &tuple->t_self);
deleteDependencyRecordsFor(StatisticExtRelationId,
HeapTupleGetOid(tuple),
false);
}
}
systable_endscan(scan);
heap_close(pgstatisticext, RowExclusiveLock);
}
/*
* RelationTruncateIndexes - truncate all indexes associated
* with the heap relation to zero tuples.

View File

@ -2085,6 +2085,62 @@ ConversionIsVisible(Oid conid)
return visible;
}
/*
* get_statistics_oid - find a statistics by possibly qualified name
*
* If not found, returns InvalidOid if missing_ok, else throws error
*/
Oid
get_statistics_oid(List *names, bool missing_ok)
{
char *schemaname;
char *stats_name;
Oid namespaceId;
Oid stats_oid = InvalidOid;
ListCell *l;
/* deconstruct the name list */
DeconstructQualifiedName(names, &schemaname, &stats_name);
if (schemaname)
{
/* use exact schema given */
namespaceId = LookupExplicitNamespace(schemaname, missing_ok);
if (missing_ok && !OidIsValid(namespaceId))
stats_oid = InvalidOid;
else
stats_oid = GetSysCacheOid2(STATEXTNAMENSP,
PointerGetDatum(stats_name),
ObjectIdGetDatum(namespaceId));
}
else
{
/* search for it in search path */
recomputeNamespacePath();
foreach(l, activeSearchPath)
{
namespaceId = lfirst_oid(l);
if (namespaceId == myTempNamespace)
continue; /* do not look in temp namespace */
stats_oid = GetSysCacheOid2(STATEXTNAMENSP,
PointerGetDatum(stats_name),
ObjectIdGetDatum(namespaceId));
if (OidIsValid(stats_oid))
break;
}
}
if (!OidIsValid(stats_oid) && !missing_ok)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("statistics \"%s\" do not exist",
NameListToString(names))));
return stats_oid;
}
/*
* get_ts_parser_oid - find a TS parser by possibly qualified name
*

View File

@ -48,6 +48,7 @@
#include "catalog/pg_publication.h"
#include "catalog/pg_publication_rel.h"
#include "catalog/pg_rewrite.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
@ -478,6 +479,18 @@ static const ObjectPropertyType ObjectProperty[] =
InvalidAttrNumber,
ACL_KIND_SUBSCRIPTION,
true
},
{
StatisticExtRelationId,
StatisticExtOidIndexId,
STATEXTOID,
STATEXTNAMENSP,
Anum_pg_statistic_ext_staname,
Anum_pg_statistic_ext_stanamespace,
Anum_pg_statistic_ext_staowner,
InvalidAttrNumber, /* no ACL (same as relation) */
ACL_KIND_STATISTICS,
true
}
};
@ -696,6 +709,10 @@ static const struct object_type_map
/* OCLASS_TRANSFORM */
{
"transform", OBJECT_TRANSFORM
},
/* OBJECT_STATISTIC_EXT */
{
"statistics", OBJECT_STATISTIC_EXT
}
};
@ -974,6 +991,12 @@ get_object_address(ObjectType objtype, Node *object,
address = get_object_address_defacl(castNode(List, object),
missing_ok);
break;
case OBJECT_STATISTIC_EXT:
address.classId = StatisticExtRelationId;
address.objectId = get_statistics_oid(castNode(List, object),
missing_ok);
address.objectSubId = 0;
break;
default:
elog(ERROR, "unrecognized objtype: %d", (int) objtype);
/* placate compiler, in case it thinks elog might return */
@ -2083,6 +2106,7 @@ pg_get_object_address(PG_FUNCTION_ARGS)
case OBJECT_ATTRIBUTE:
case OBJECT_COLLATION:
case OBJECT_CONVERSION:
case OBJECT_STATISTIC_EXT:
case OBJECT_TSPARSER:
case OBJECT_TSDICTIONARY:
case OBJECT_TSTEMPLATE:
@ -2370,6 +2394,10 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser")));
break;
case OBJECT_STATISTIC_EXT:
if (!pg_statistics_ownercheck(address.objectId, roleid))
aclcheck_error_type(ACLCHECK_NOT_OWNER, address.objectId);
break;
default:
elog(ERROR, "unrecognized object type: %d",
(int) objtype);
@ -3857,6 +3885,10 @@ getObjectTypeDescription(const ObjectAddress *object)
appendStringInfoString(&buffer, "subscription");
break;
case OCLASS_STATISTIC_EXT:
appendStringInfoString(&buffer, "statistics");
break;
default:
appendStringInfo(&buffer, "unrecognized %u", object->classId);
break;
@ -4880,6 +4912,29 @@ getObjectIdentityParts(const ObjectAddress *object,
break;
}
case OCLASS_STATISTIC_EXT:
{
HeapTuple tup;
Form_pg_statistic_ext formStatistic;
char *schema;
tup = SearchSysCache1(STATEXTOID,
ObjectIdGetDatum(object->objectId));
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for statistics %u",
object->objectId);
formStatistic = (Form_pg_statistic_ext) GETSTRUCT(tup);
schema = get_namespace_name_or_temp(formStatistic->stanamespace);
appendStringInfoString(&buffer,
quote_qualified_identifier(schema,
NameStr(formStatistic->staname)));
if (objname)
*objname = list_make2(schema,
pstrdup(NameStr(formStatistic->staname)));
ReleaseSysCache(tup);
}
break;
default:
appendStringInfo(&buffer, "unrecognized object %u %u %d",
object->classId,

View File

@ -39,6 +39,7 @@
#include "catalog/pg_opfamily.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_shdepend.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_ts_config.h"
@ -1415,6 +1416,7 @@ shdepReassignOwned(List *roleids, Oid newrole)
case OperatorFamilyRelationId:
case OperatorClassRelationId:
case ExtensionRelationId:
case StatisticExtRelationId:
case TableSpaceRelationId:
case DatabaseRelationId:
case TSConfigRelationId:

View File

@ -186,6 +186,16 @@ CREATE OR REPLACE VIEW pg_sequences AS
WHERE NOT pg_is_other_temp_schema(N.oid)
AND relkind = 'S';
CREATE VIEW pg_stats_ext AS
SELECT
N.nspname AS schemaname,
C.relname AS tablename,
S.staname AS staname,
S.stakeys AS attnums,
length(s.standistinct) AS ndistbytes
FROM (pg_statistic_ext S JOIN pg_class C ON (C.oid = S.starelid))
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace);
CREATE VIEW pg_stats WITH (security_barrier) AS
SELECT
nspname AS schemaname,

View File

@ -18,8 +18,8 @@ OBJS = amcmds.o aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \
event_trigger.o explain.o extension.o foreigncmds.o functioncmds.o \
indexcmds.o lockcmds.o matview.o operatorcmds.o opclasscmds.o \
policy.o portalcmds.o prepare.o proclang.o publicationcmds.o \
schemacmds.o seclabel.o sequence.o subscriptioncmds.o tablecmds.o \
tablespace.o trigger.o tsearchcmds.o typecmds.o user.o vacuum.o \
vacuumlazy.o variable.o view.o
schemacmds.o seclabel.o sequence.o statscmds.o subscriptioncmds.o \
tablecmds.o tablespace.o trigger.o tsearchcmds.o typecmds.o user.o \
vacuum.o vacuumlazy.o variable.o view.o
include $(top_srcdir)/src/backend/common.mk

View File

@ -33,6 +33,7 @@
#include "catalog/pg_opfamily.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_dict.h"
#include "catalog/pg_ts_parser.h"
@ -120,6 +121,10 @@ report_namespace_conflict(Oid classId, const char *name, Oid nspOid)
Assert(OidIsValid(nspOid));
msgfmt = gettext_noop("conversion \"%s\" already exists in schema \"%s\"");
break;
case StatisticExtRelationId:
Assert(OidIsValid(nspOid));
msgfmt = gettext_noop("statistics \"%s\" already exists in schema \"%s\"");
break;
case TSParserRelationId:
Assert(OidIsValid(nspOid));
msgfmt = gettext_noop("text search parser \"%s\" already exists in schema \"%s\"");
@ -373,6 +378,7 @@ ExecRenameStmt(RenameStmt *stmt)
case OBJECT_OPCLASS:
case OBJECT_OPFAMILY:
case OBJECT_LANGUAGE:
case OBJECT_STATISTIC_EXT:
case OBJECT_TSCONFIGURATION:
case OBJECT_TSDICTIONARY:
case OBJECT_TSPARSER:
@ -489,6 +495,7 @@ ExecAlterObjectSchemaStmt(AlterObjectSchemaStmt *stmt,
case OBJECT_OPERATOR:
case OBJECT_OPCLASS:
case OBJECT_OPFAMILY:
case OBJECT_STATISTIC_EXT:
case OBJECT_TSCONFIGURATION:
case OBJECT_TSDICTIONARY:
case OBJECT_TSPARSER:
@ -803,6 +810,7 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt)
case OBJECT_OPERATOR:
case OBJECT_OPCLASS:
case OBJECT_OPFAMILY:
case OBJECT_STATISTIC_EXT:
case OBJECT_TABLESPACE:
case OBJECT_TSDICTIONARY:
case OBJECT_TSCONFIGURATION:

View File

@ -17,6 +17,7 @@
#include <math.h>
#include "access/multixact.h"
#include "access/sysattr.h"
#include "access/transam.h"
#include "access/tupconvert.h"
#include "access/tuptoaster.h"
@ -28,6 +29,7 @@
#include "catalog/pg_collation.h"
#include "catalog/pg_inherits_fn.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_statistic_ext.h"
#include "commands/dbcommands.h"
#include "commands/tablecmds.h"
#include "commands/vacuum.h"
@ -39,13 +41,17 @@
#include "parser/parse_relation.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/acl.h"
#include "utils/attoptcache.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@ -566,6 +572,10 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
update_attstats(RelationGetRelid(Irel[ind]), false,
thisdata->attr_cnt, thisdata->vacattrstats);
}
/* Build extended statistics (if there are any). */
BuildRelationExtStatistics(onerel, totalrows, numrows, rows, attr_cnt,
vacattrstats);
}
/*
@ -1681,19 +1691,6 @@ ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
/*
* Extra information used by the default analysis routines
*/
typedef struct
{
Oid eqopr; /* '=' operator for datatype, if any */
Oid eqfunc; /* and associated function */
Oid ltopr; /* '<' operator for datatype, if any */
} StdAnalyzeData;
typedef struct
{
Datum value; /* a data value */
int tupno; /* position index for tuple it came from */
} ScalarItem;
typedef struct
{
int count; /* # of duplicates */

View File

@ -286,6 +286,13 @@ does_not_exist_skipping(ObjectType objtype, Node *object)
msg = gettext_noop("schema \"%s\" does not exist, skipping");
name = strVal((Value *) object);
break;
case OBJECT_STATISTIC_EXT:
if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
{
msg = gettext_noop("extended statistics \"%s\" do not exist, skipping");
name = NameListToString(castNode(List, object));
}
break;
case OBJECT_TSPARSER:
if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
{

View File

@ -112,6 +112,7 @@ static event_trigger_support_data event_trigger_support[] = {
{"SCHEMA", true},
{"SEQUENCE", true},
{"SERVER", true},
{"STATISTICS", true},
{"SUBSCRIPTION", true},
{"TABLE", true},
{"TABLESPACE", false},
@ -1108,6 +1109,7 @@ EventTriggerSupportsObjectType(ObjectType obtype)
case OBJECT_SCHEMA:
case OBJECT_SEQUENCE:
case OBJECT_SUBSCRIPTION:
case OBJECT_STATISTIC_EXT:
case OBJECT_TABCONSTRAINT:
case OBJECT_TABLE:
case OBJECT_TRANSFORM:
@ -1173,6 +1175,7 @@ EventTriggerSupportsObjectClass(ObjectClass objclass)
case OCLASS_PUBLICATION:
case OCLASS_PUBLICATION_REL:
case OCLASS_SUBSCRIPTION:
case OCLASS_STATISTIC_EXT:
return true;
}

View File

@ -0,0 +1,296 @@
/*-------------------------------------------------------------------------
*
* statscmds.c
* Commands for creating and altering extended statistics
*
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/commands/statscmds.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/relscan.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_statistic_ext.h"
#include "commands/defrem.h"
#include "miscadmin.h"
#include "statistics/statistics.h"
#include "utils/builtins.h"
#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
/* used for sorting the attnums in CreateStatistics */
static int
compare_int16(const void *a, const void *b)
{
return memcmp(a, b, sizeof(int16));
}
/*
* CREATE STATISTICS
*/
ObjectAddress
CreateStatistics(CreateStatsStmt *stmt)
{
int i;
ListCell *l;
int16 attnums[STATS_MAX_DIMENSIONS];
int numcols = 0;
ObjectAddress address = InvalidObjectAddress;
char *namestr;
NameData staname;
Oid statoid;
Oid namespaceId;
HeapTuple htup;
Datum values[Natts_pg_statistic_ext];
bool nulls[Natts_pg_statistic_ext];
int2vector *stakeys;
Relation statrel;
Relation rel;
Oid relid;
ObjectAddress parentobject,
childobject;
Datum types[1]; /* only ndistinct defined now */
int ntypes;
ArrayType *staenabled;
bool build_ndistinct;
bool requested_type = false;
Assert(IsA(stmt, CreateStatsStmt));
/* resolve the pieces of the name (namespace etc.) */
namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr);
namestrcpy(&staname, namestr);
/*
* If if_not_exists was given and the statistics already exists, bail out.
*/
if (SearchSysCacheExists2(STATEXTNAMENSP,
PointerGetDatum(&staname),
ObjectIdGetDatum(namespaceId)))
{
if (stmt->if_not_exists)
{
ereport(NOTICE,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("statistics \"%s\" already exist, skipping",
namestr)));
return InvalidObjectAddress;
}
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("statistics \"%s\" already exist", namestr)));
}
rel = heap_openrv(stmt->relation, AccessExclusiveLock);
relid = RelationGetRelid(rel);
if (rel->rd_rel->relkind != RELKIND_RELATION &&
rel->rd_rel->relkind != RELKIND_MATVIEW)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("relation \"%s\" is not a table or materialized view",
RelationGetRelationName(rel))));
/*
* Transform column names to array of attnums. While at it, enforce some
* constraints.
*/
foreach(l, stmt->keys)
{
char *attname = strVal(lfirst(l));
HeapTuple atttuple;
Form_pg_attribute attForm;
TypeCacheEntry *type;
atttuple = SearchSysCacheAttName(relid, attname);
if (!HeapTupleIsValid(atttuple))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("column \"%s\" referenced in statistics does not exist",
attname)));
attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
/* Disallow use of system attributes in extended stats */
if (attForm->attnum < 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("statistic creation on system columns is not supported")));
/* Disallow data types without a less-than operator */
type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
if (type->lt_opr == InvalidOid)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("only scalar types can be used in extended statistics")));
/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
if (numcols >= STATS_MAX_DIMENSIONS)
ereport(ERROR,
(errcode(ERRCODE_TOO_MANY_COLUMNS),
errmsg("cannot have more than %d keys in statistics",
STATS_MAX_DIMENSIONS)));
attnums[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum;
ReleaseSysCache(atttuple);
numcols++;
}
/*
* Check that at least two columns were specified in the statement. The
* upper bound was already checked in the loop above.
*/
if (numcols < 2)
ereport(ERROR,
(errcode(ERRCODE_TOO_MANY_COLUMNS),
errmsg("statistics require at least 2 columns")));
/*
* Sort the attnums, which makes detecting duplicies somewhat easier, and
* it does not hurt (it does not affect the efficiency, unlike for
* indexes, for example).
*/
qsort(attnums, numcols, sizeof(int16), compare_int16);
/*
* Look for duplicities in the list of columns. The attnums are sorted so
* just check consecutive elements.
*/
for (i = 1; i < numcols; i++)
if (attnums[i] == attnums[i - 1])
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("duplicate column name in statistics definition")));
stakeys = buildint2vector(attnums, numcols);
/*
* Parse the statistics options. Currently only statistics types are
* recognized.
*/
build_ndistinct = false;
foreach(l, stmt->options)
{
DefElem *opt = (DefElem *) lfirst(l);
if (strcmp(opt->defname, "ndistinct") == 0)
{
build_ndistinct = defGetBoolean(opt);
requested_type = true;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unrecognized STATISTICS option \"%s\"",
opt->defname)));
}
/* If no statistic type was specified, build them all. */
if (!requested_type)
build_ndistinct = true;
/* construct the char array of enabled statistic types */
ntypes = 0;
if (build_ndistinct)
types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
Assert(ntypes > 0);
staenabled = construct_array(types, ntypes, CHAROID, 1, true, 'c');
/*
* Everything seems fine, so let's build the pg_statistic_ext tuple.
*/
memset(values, 0, sizeof(values));
memset(nulls, false, sizeof(nulls));
values[Anum_pg_statistic_ext_starelid - 1] = ObjectIdGetDatum(relid);
values[Anum_pg_statistic_ext_staname - 1] = NameGetDatum(&staname);
values[Anum_pg_statistic_ext_stanamespace - 1] = ObjectIdGetDatum(namespaceId);
values[Anum_pg_statistic_ext_staowner - 1] = ObjectIdGetDatum(GetUserId());
values[Anum_pg_statistic_ext_stakeys - 1] = PointerGetDatum(stakeys);
values[Anum_pg_statistic_ext_staenabled - 1] = PointerGetDatum(staenabled);
/* no statistics build yet */
nulls[Anum_pg_statistic_ext_standistinct - 1] = true;
/* insert it into pg_statistic_ext */
statrel = heap_open(StatisticExtRelationId, RowExclusiveLock);
htup = heap_form_tuple(statrel->rd_att, values, nulls);
CatalogTupleInsert(statrel, htup);
statoid = HeapTupleGetOid(htup);
heap_freetuple(htup);
heap_close(statrel, RowExclusiveLock);
relation_close(rel, NoLock);
/*
* Add a dependency on a table, so that stats get dropped on DROP TABLE.
*/
ObjectAddressSet(parentobject, RelationRelationId, relid);
ObjectAddressSet(childobject, StatisticExtRelationId, statoid);
recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
/*
* Also add dependency on the schema. This is required to ensure that we
* drop the statistics on DROP SCHEMA. This is not handled automatically
* by DROP TABLE because the statistics are not an object in the table's
* schema.
*/
ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
ObjectAddressSet(address, StatisticExtRelationId, statoid);
/*
* Invalidate relcache so that others see the new statistics.
*/
CacheInvalidateRelcache(rel);
return address;
}
/*
* Guts of statistics deletion.
*/
void
RemoveStatisticsById(Oid statsOid)
{
Relation relation;
Oid relid;
Relation rel;
HeapTuple tup;
Form_pg_statistic_ext statext;
/*
* Delete the pg_statistic_ext tuple.
*/
relation = heap_open(StatisticExtRelationId, RowExclusiveLock);
tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
if (!HeapTupleIsValid(tup)) /* should not happen */
elog(ERROR, "cache lookup failed for statistics %u", statsOid);
statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
relid = statext->starelid;
rel = heap_open(relid, AccessExclusiveLock);
simple_heap_delete(relation, &tup->t_self);
CacheInvalidateRelcache(rel);
ReleaseSysCache(tup);
heap_close(relation, RowExclusiveLock);
heap_close(rel, NoLock);
}

View File

@ -3337,6 +3337,20 @@ _copyIndexStmt(const IndexStmt *from)
return newnode;
}
static CreateStatsStmt *
_copyCreateStatsStmt(const CreateStatsStmt *from)
{
CreateStatsStmt *newnode = makeNode(CreateStatsStmt);
COPY_NODE_FIELD(defnames);
COPY_NODE_FIELD(relation);
COPY_NODE_FIELD(keys);
COPY_NODE_FIELD(options);
COPY_SCALAR_FIELD(if_not_exists);
return newnode;
}
static CreateFunctionStmt *
_copyCreateFunctionStmt(const CreateFunctionStmt *from)
{
@ -5050,6 +5064,9 @@ copyObject(const void *from)
case T_IndexStmt:
retval = _copyIndexStmt(from);
break;
case T_CreateStatsStmt:
retval = _copyCreateStatsStmt(from);
break;
case T_CreateFunctionStmt:
retval = _copyCreateFunctionStmt(from);
break;

View File

@ -1334,6 +1334,18 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b)
return true;
}
static bool
_equalCreateStatsStmt(const CreateStatsStmt *a, const CreateStatsStmt *b)
{
COMPARE_NODE_FIELD(defnames);
COMPARE_NODE_FIELD(relation);
COMPARE_NODE_FIELD(keys);
COMPARE_NODE_FIELD(options);
COMPARE_SCALAR_FIELD(if_not_exists);
return true;
}
static bool
_equalCreateFunctionStmt(const CreateFunctionStmt *a, const CreateFunctionStmt *b)
{
@ -3236,6 +3248,9 @@ equal(const void *a, const void *b)
case T_IndexStmt:
retval = _equalIndexStmt(a, b);
break;
case T_CreateStatsStmt:
retval = _equalCreateStatsStmt(a, b);
break;
case T_CreateFunctionStmt:
retval = _equalCreateFunctionStmt(a, b);
break;

View File

@ -2202,6 +2202,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_NODE_FIELD(lateral_vars);
WRITE_BITMAPSET_FIELD(lateral_referencers);
WRITE_NODE_FIELD(indexlist);
WRITE_NODE_FIELD(statlist);
WRITE_UINT_FIELD(pages);
WRITE_FLOAT_FIELD(tuples, "%.0f");
WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
@ -2274,6 +2275,18 @@ _outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node)
appendStringInfo(str, " %d", list_length(node->rinfos[i]));
}
static void
_outStatisticExtInfo(StringInfo str, const StatisticExtInfo *node)
{
WRITE_NODE_TYPE("STATISTICEXTINFO");
/* NB: this isn't a complete set of fields */
WRITE_OID_FIELD(statOid);
/* don't write rel, leads to infinite recursion in plan tree dump */
WRITE_CHAR_FIELD(kind);
WRITE_BITMAPSET_FIELD(keys);
}
static void
_outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
{
@ -2577,6 +2590,18 @@ _outIndexStmt(StringInfo str, const IndexStmt *node)
WRITE_BOOL_FIELD(if_not_exists);
}
static void
_outCreateStatsStmt(StringInfo str, const CreateStatsStmt *node)
{
WRITE_NODE_TYPE("CREATESTATSSTMT");
WRITE_NODE_FIELD(defnames);
WRITE_NODE_FIELD(relation);
WRITE_NODE_FIELD(keys);
WRITE_NODE_FIELD(options);
WRITE_BOOL_FIELD(if_not_exists);
}
static void
_outNotifyStmt(StringInfo str, const NotifyStmt *node)
{
@ -3936,6 +3961,9 @@ outNode(StringInfo str, const void *obj)
case T_PlannerParamItem:
_outPlannerParamItem(str, obj);
break;
case T_StatisticExtInfo:
_outStatisticExtInfo(str, obj);
break;
case T_ExtensibleNode:
_outExtensibleNode(str, obj);
@ -3953,6 +3981,9 @@ outNode(StringInfo str, const void *obj)
case T_IndexStmt:
_outIndexStmt(str, obj);
break;
case T_CreateStatsStmt:
_outCreateStatsStmt(str, obj);
break;
case T_NotifyStmt:
_outNotifyStmt(str, obj);
break;

View File

@ -29,6 +29,7 @@
#include "catalog/heap.h"
#include "catalog/partition.h"
#include "catalog/pg_am.h"
#include "catalog/pg_statistic_ext.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
@ -40,8 +41,11 @@
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
#include "statistics/statistics.h"
#include "storage/bufmgr.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
@ -63,7 +67,7 @@ static List *get_relation_constraints(PlannerInfo *root,
bool include_notnull);
static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
Relation heapRelation);
static List *get_relation_statistics(RelOptInfo *rel, Relation relation);
/*
* get_relation_info -
@ -398,6 +402,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
rel->indexlist = indexinfos;
rel->statlist = get_relation_statistics(rel, relation);
/* Grab foreign-table info using the relcache, while we have it */
if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{
@ -1251,6 +1257,65 @@ get_relation_constraints(PlannerInfo *root,
return result;
}
/*
* get_relation_statistics
* Retrieve extended statistics defined on the table.
*
* Returns a List (possibly empty) of StatisticExtInfo objects describing
* the statistics. Note that this doesn't load the actual statistics data,
* just the identifying metadata. Only stats actually built are considered.
*/
static List *
get_relation_statistics(RelOptInfo *rel, Relation relation)
{
List *statoidlist;
List *stainfos = NIL;
ListCell *l;
statoidlist = RelationGetStatExtList(relation);
foreach(l, statoidlist)
{
Oid statOid = lfirst_oid(l);
Form_pg_statistic_ext staForm;
HeapTuple htup;
Bitmapset *keys = NULL;
int i;
htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
if (!htup)
elog(ERROR, "cache lookup failed for statistics %u", statOid);
staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
/*
* First, build the array of columns covered. This is ultimately
* wasted if no stats are actually built, but it doesn't seem worth
* troubling over that case.
*/
for (i = 0; i < staForm->stakeys.dim1; i++)
keys = bms_add_member(keys, staForm->stakeys.values[i]);
/* add one StatisticExtInfo for each kind built */
if (statext_is_kind_built(htup, STATS_EXT_NDISTINCT))
{
StatisticExtInfo *info = makeNode(StatisticExtInfo);
info->statOid = statOid;
info->rel = rel;
info->kind = STATS_EXT_NDISTINCT;
info->keys = bms_copy(keys);
stainfos = lcons(info, stainfos);
}
ReleaseSysCache(htup);
bms_free(keys);
}
list_free(statoidlist);
return stainfos;
}
/*
* relation_excluded_by_constraints

View File

@ -257,7 +257,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
ConstraintsSetStmt CopyStmt CreateAsStmt CreateCastStmt
CreateDomainStmt CreateExtensionStmt CreateGroupStmt CreateOpClassStmt
CreateOpFamilyStmt AlterOpFamilyStmt CreatePLangStmt
CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt
CreateSchemaStmt CreateSeqStmt CreateStmt CreateStatsStmt CreateTableSpaceStmt
CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt
CreateAssertStmt CreateTransformStmt CreateTrigStmt CreateEventTrigStmt
CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatePolicyStmt
@ -874,6 +874,7 @@ stmt :
| CreateSeqStmt
| CreateStmt
| CreateSubscriptionStmt
| CreateStatsStmt
| CreateTableSpaceStmt
| CreateTransformStmt
| CreateTrigStmt
@ -3747,6 +3748,35 @@ OptConsTableSpace: USING INDEX TABLESPACE name { $$ = $4; }
ExistingIndex: USING INDEX index_name { $$ = $3; }
;
/*****************************************************************************
*
* QUERY :
* CREATE STATISTICS stats_name WITH (options) ON (columns) FROM relname
*
*****************************************************************************/
CreateStatsStmt: CREATE STATISTICS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name
{
CreateStatsStmt *n = makeNode(CreateStatsStmt);
n->defnames = $3;
n->relation = $10;
n->keys = $7;
n->options = $4;
n->if_not_exists = false;
$$ = (Node *)n;
}
| CREATE STATISTICS IF_P NOT EXISTS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name
{
CreateStatsStmt *n = makeNode(CreateStatsStmt);
n->defnames = $6;
n->relation = $13;
n->keys = $10;
n->options = $7;
n->if_not_exists = true;
$$ = (Node *)n;
}
;
/*****************************************************************************
*
@ -6042,6 +6072,7 @@ drop_type_any_name:
| FOREIGN TABLE { $$ = OBJECT_FOREIGN_TABLE; }
| COLLATION { $$ = OBJECT_COLLATION; }
| CONVERSION_P { $$ = OBJECT_CONVERSION; }
| STATISTICS { $$ = OBJECT_STATISTIC_EXT; }
| TEXT_P SEARCH PARSER { $$ = OBJECT_TSPARSER; }
| TEXT_P SEARCH DICTIONARY { $$ = OBJECT_TSDICTIONARY; }
| TEXT_P SEARCH TEMPLATE { $$ = OBJECT_TSTEMPLATE; }
@ -6119,7 +6150,7 @@ opt_restart_seqs:
* EXTENSION | EVENT TRIGGER | FOREIGN DATA WRAPPER |
* FOREIGN TABLE | INDEX | [PROCEDURAL] LANGUAGE |
* MATERIALIZED VIEW | POLICY | ROLE | SCHEMA | SEQUENCE |
* SERVER | TABLE | TABLESPACE |
* SERVER | STATISTICS | TABLE | TABLESPACE |
* TEXT SEARCH CONFIGURATION | TEXT SEARCH DICTIONARY |
* TEXT SEARCH PARSER | TEXT SEARCH TEMPLATE | TYPE |
* VIEW] <objname> |
@ -6288,6 +6319,7 @@ comment_type_any_name:
COLUMN { $$ = OBJECT_COLUMN; }
| INDEX { $$ = OBJECT_INDEX; }
| SEQUENCE { $$ = OBJECT_SEQUENCE; }
| STATISTICS { $$ = OBJECT_STATISTIC_EXT; }
| TABLE { $$ = OBJECT_TABLE; }
| VIEW { $$ = OBJECT_VIEW; }
| MATERIALIZED VIEW { $$ = OBJECT_MATVIEW; }
@ -8428,6 +8460,15 @@ RenameStmt: ALTER AGGREGATE aggregate_with_argtypes RENAME TO name
n->missing_ok = false;
$$ = (Node *)n;
}
| ALTER STATISTICS any_name RENAME TO name
{
RenameStmt *n = makeNode(RenameStmt);
n->renameType = OBJECT_STATISTIC_EXT;
n->object = (Node *) $3;
n->newname = $6;
n->missing_ok = false;
$$ = (Node *)n;
}
| ALTER TEXT_P SEARCH PARSER any_name RENAME TO name
{
RenameStmt *n = makeNode(RenameStmt);
@ -8643,6 +8684,15 @@ AlterObjectSchemaStmt:
n->missing_ok = true;
$$ = (Node *)n;
}
| ALTER STATISTICS any_name SET SCHEMA name
{
AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt);
n->objectType = OBJECT_STATISTIC_EXT;
n->object = (Node *) $3;
n->newschema = $6;
n->missing_ok = false;
$$ = (Node *)n;
}
| ALTER TEXT_P SEARCH PARSER any_name SET SCHEMA name
{
AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt);
@ -8906,6 +8956,14 @@ AlterOwnerStmt: ALTER AGGREGATE aggregate_with_argtypes OWNER TO RoleSpec
n->newowner = $6;
$$ = (Node *)n;
}
| ALTER STATISTICS any_name OWNER TO RoleSpec
{
AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
n->objectType = OBJECT_STATISTIC_EXT;
n->object = (Node *) $3;
n->newowner = $6;
$$ = (Node *)n;
}
| ALTER TEXT_P SEARCH DICTIONARY any_name OWNER TO RoleSpec
{
AlterOwnerStmt *n = makeNode(AlterOwnerStmt);

View File

@ -0,0 +1,17 @@
#-------------------------------------------------------------------------
#
# Makefile--
# Makefile for statistics
#
# IDENTIFICATION
# src/backend/statistics/Makefile
#
#-------------------------------------------------------------------------
subdir = src/backend/statistics
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS = extended_stats.o mvdistinct.o
include $(top_srcdir)/src/backend/common.mk

View File

@ -0,0 +1,34 @@
Extended statistics
===================
When estimating various quantities (e.g. condition selectivities) the default
approach relies on the assumption of independence. In practice that's often
not true, resulting in estimation errors.
Extended statistics track different types of dependencies between the columns,
hopefully improving the estimates and producing better plans.
Currently we only have one type of extended statistics - ndistinct
coefficients, and we use it to improve estimates of grouping queries. See
README.ndistinct for details.
Size of sample in ANALYZE
-------------------------
When performing ANALYZE, the number of rows to sample is determined as
(300 * statistics_target)
That works reasonably well for statistics on individual columns, but perhaps
it's not enough for extended statistics. Papers analyzing estimation errors
all use samples proportional to the table (usually finding that 1-3% of the
table is enough to build accurate stats).
The requested accuracy (number of MCV items or histogram bins) should also
be considered when determining the sample size, and in extended statistics
those are not necessarily limited by statistics_target.
This however merits further discussion, because collecting the sample is quite
expensive and increasing it further would make ANALYZE even more painful.
Judging by the experiments with the current implementation, the fixed size
seems to work reasonably well for now, so we leave this as a future work.

View File

@ -0,0 +1,389 @@
/*-------------------------------------------------------------------------
*
* extended_stats.c
* POSTGRES extended statistics
*
* Generic code supporting statistic objects created via CREATE STATISTICS.
*
*
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/statistics/extended_stats.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "catalog/indexing.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_statistic_ext.h"
#include "nodes/relation.h"
#include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
#include "utils/syscache.h"
/*
* Used internally to refer to an individual pg_statistic_ext entry.
*/
typedef struct StatExtEntry
{
Oid statOid; /* OID of pg_statistic_ext entry */
Bitmapset *columns; /* attribute numbers covered by the statistics */
List *types; /* 'char' list of enabled statistic kinds */
} StatExtEntry;
static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
int natts, VacAttrStats **vacattrstats);
static void statext_store(Relation pg_stext, Oid relid,
MVNDistinct *ndistinct,
VacAttrStats **stats);
/*
* Compute requested extended stats, using the rows sampled for the plain
* (single-column) stats.
*
* This fetches a list of stats from pg_statistic_ext, computes the stats
* and serializes them back into the catalog (as bytea values).
*/
void
BuildRelationExtStatistics(Relation onerel, double totalrows,
int numrows, HeapTuple *rows,
int natts, VacAttrStats **vacattrstats)
{
Relation pg_stext;
ListCell *lc;
List *stats;
pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock);
stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
foreach(lc, stats)
{
StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
MVNDistinct *ndistinct = NULL;
VacAttrStats **stats;
ListCell *lc2;
/* filter only the interesting vacattrstats records */
stats = lookup_var_attr_stats(onerel, stat->columns,
natts, vacattrstats);
/* check allowed number of dimensions */
Assert(bms_num_members(stat->columns) >= 2 &&
bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
/* compute statistic of each type */
foreach(lc2, stat->types)
{
char t = (char) lfirst_int(lc2);
if (t == STATS_EXT_NDISTINCT)
ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
stat->columns, stats);
}
/* store the statistics in the catalog */
statext_store(pg_stext, stat->statOid, ndistinct, stats);
}
heap_close(pg_stext, RowExclusiveLock);
}
/*
* statext_is_kind_built
* Is this stat kind built in the given pg_statistic_ext tuple?
*/
bool
statext_is_kind_built(HeapTuple htup, char type)
{
AttrNumber attnum;
switch (type)
{
case STATS_EXT_NDISTINCT:
attnum = Anum_pg_statistic_ext_standistinct;
break;
default:
elog(ERROR, "unexpected statistics type requested: %d", type);
}
return !heap_attisnull(htup, attnum);
}
/*
* Return a list (of StatExtEntry) of statistics for the given relation.
*/
static List *
fetch_statentries_for_relation(Relation pg_statext, Oid relid)
{
SysScanDesc scan;
ScanKeyData skey;
HeapTuple htup;
List *result = NIL;
/*
* Prepare to scan pg_statistic_ext for entries having indrelid = this
* rel.
*/
ScanKeyInit(&skey,
Anum_pg_statistic_ext_starelid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(relid));
scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
NULL, 1, &skey);
while (HeapTupleIsValid(htup = systable_getnext(scan)))
{
StatExtEntry *entry;
Datum datum;
bool isnull;
int i;
ArrayType *arr;
char *enabled;
Form_pg_statistic_ext staForm;
entry = palloc0(sizeof(StatExtEntry));
entry->statOid = HeapTupleGetOid(htup);
staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
for (i = 0; i < staForm->stakeys.dim1; i++)
{
entry->columns = bms_add_member(entry->columns,
staForm->stakeys.values[i]);
}
/* decode the staenabled char array into a list of chars */
datum = SysCacheGetAttr(STATEXTOID, htup,
Anum_pg_statistic_ext_staenabled, &isnull);
Assert(!isnull);
arr = DatumGetArrayTypeP(datum);
if (ARR_NDIM(arr) != 1 ||
ARR_HASNULL(arr) ||
ARR_ELEMTYPE(arr) != CHAROID)
elog(ERROR, "staenabled is not a 1-D char array");
enabled = (char *) ARR_DATA_PTR(arr);
for (i = 0; i < ARR_DIMS(arr)[0]; i++)
{
Assert(enabled[i] == STATS_EXT_NDISTINCT);
entry->types = lappend_int(entry->types, (int) enabled[i]);
}
result = lappend(result, entry);
}
systable_endscan(scan);
return result;
}
/*
* Using 'vacattrstats' of size 'natts' as input data, return a newly built
* VacAttrStats array which includes only the items corresponding to attributes
* indicated by 'attrs'.
*/
static VacAttrStats **
lookup_var_attr_stats(Relation rel, Bitmapset *attrs, int natts,
VacAttrStats **vacattrstats)
{
int i = 0;
int x = -1;
VacAttrStats **stats;
Bitmapset *matched = NULL;
stats = (VacAttrStats **)
palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
/* lookup VacAttrStats info for the requested columns (same attnum) */
while ((x = bms_next_member(attrs, x)) >= 0)
{
int j;
stats[i] = NULL;
for (j = 0; j < natts; j++)
{
if (x == vacattrstats[j]->tupattnum)
{
stats[i] = vacattrstats[j];
break;
}
}
if (!stats[i])
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("extended statistics could not be collected for column \"%s\" of relation %s.%s",
NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname),
get_namespace_name(rel->rd_rel->relnamespace),
RelationGetRelationName(rel)),
errhint("Consider ALTER TABLE \"%s\".\"%s\" ALTER \"%s\" SET STATISTICS -1",
get_namespace_name(rel->rd_rel->relnamespace),
RelationGetRelationName(rel),
NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname))));
/*
* Check that we found a non-dropped column and that the attnum
* matches.
*/
Assert(!stats[i]->attr->attisdropped);
matched = bms_add_member(matched, stats[i]->tupattnum);
i++;
}
if (bms_subset_compare(matched, attrs) != BMS_EQUAL)
elog(ERROR, "could not find all attributes in attribute stats array");
bms_free(matched);
return stats;
}
/*
* statext_store
* Serializes the statistics and stores them into the pg_statistic_ext tuple.
*/
static void
statext_store(Relation pg_stext, Oid statOid,
MVNDistinct *ndistinct,
VacAttrStats **stats)
{
HeapTuple stup,
oldtup;
Datum values[Natts_pg_statistic_ext];
bool nulls[Natts_pg_statistic_ext];
bool replaces[Natts_pg_statistic_ext];
memset(nulls, 1, Natts_pg_statistic_ext * sizeof(bool));
memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool));
memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum));
/*
* Construct a new pg_statistic_ext tuple, replacing the calculated stats.
*/
if (ndistinct != NULL)
{
bytea *data = statext_ndistinct_serialize(ndistinct);
nulls[Anum_pg_statistic_ext_standistinct - 1] = (data == NULL);
values[Anum_pg_statistic_ext_standistinct - 1] = PointerGetDatum(data);
}
/* always replace the value (either by bytea or NULL) */
replaces[Anum_pg_statistic_ext_standistinct - 1] = true;
/* there should already be a pg_statistic_ext tuple */
oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
if (!HeapTupleIsValid(oldtup))
elog(ERROR, "cache lookup failed for extended statistics %u", statOid);
/* replace it */
stup = heap_modify_tuple(oldtup,
RelationGetDescr(pg_stext),
values,
nulls,
replaces);
ReleaseSysCache(oldtup);
CatalogTupleUpdate(pg_stext, &stup->t_self, stup);
heap_freetuple(stup);
}
/* initialize multi-dimensional sort */
MultiSortSupport
multi_sort_init(int ndims)
{
MultiSortSupport mss;
Assert(ndims >= 2);
mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
+sizeof(SortSupportData) * ndims);
mss->ndims = ndims;
return mss;
}
/*
* Prepare sort support info using the given sort operator
* at the position 'sortdim'
*/
void
multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper)
{
SortSupport ssup = &mss->ssup[sortdim];
ssup->ssup_cxt = CurrentMemoryContext;
ssup->ssup_collation = DEFAULT_COLLATION_OID;
ssup->ssup_nulls_first = false;
ssup->ssup_cxt = CurrentMemoryContext;
PrepareSortSupportFromOrderingOp(oper, ssup);
}
/* compare all the dimensions in the selected order */
int
multi_sort_compare(const void *a, const void *b, void *arg)
{
MultiSortSupport mss = (MultiSortSupport) arg;
SortItem *ia = (SortItem *) a;
SortItem *ib = (SortItem *) b;
int i;
for (i = 0; i < mss->ndims; i++)
{
int compare;
compare = ApplySortComparator(ia->values[i], ia->isnull[i],
ib->values[i], ib->isnull[i],
&mss->ssup[i]);
if (compare != 0)
return compare;
}
/* equal by default */
return 0;
}
/* compare selected dimension */
int
multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
MultiSortSupport mss)
{
return ApplySortComparator(a->values[dim], a->isnull[dim],
b->values[dim], b->isnull[dim],
&mss->ssup[dim]);
}
int
multi_sort_compare_dims(int start, int end,
const SortItem *a, const SortItem *b,
MultiSortSupport mss)
{
int dim;
for (dim = start; dim <= end; dim++)
{
int r = ApplySortComparator(a->values[dim], a->isnull[dim],
b->values[dim], b->isnull[dim],
&mss->ssup[dim]);
if (r != 0)
return r;
}
return 0;
}

View File

@ -0,0 +1,671 @@
/*-------------------------------------------------------------------------
*
* mvdistinct.c
* POSTGRES multivariate ndistinct coefficients
*
* Estimating number of groups in a combination of columns (e.g. for GROUP BY)
* is tricky, and the estimation error is often significant.
* The multivariate ndistinct coefficients address this by storing ndistinct
* estimates for combinations of the user-specified columns. So for example
* given a statistics object on three columns (a,b,c), this module estimates
* and store n-distinct for (a,b), (a,c), (b,c) and (a,b,c). The per-column
* estimates are already available in pg_statistic.
*
*
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/statistics/mvdistinct.c
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include "access/htup_details.h"
#include "catalog/pg_statistic_ext.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "lib/stringinfo.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
#include "statistics/extended_stats_internal.h"
#include "statistics/statistics.h"
static double ndistinct_for_combination(double totalrows, int numrows,
HeapTuple *rows, VacAttrStats **stats,
int k, int *combination);
static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
static int n_choose_k(int n, int k);
static int num_combinations(int n);
/* Combination generator API */
/* internal state for generator of k-combinations of n elements */
typedef struct CombinationGenerator
{
int k; /* size of the combination */
int n; /* total number of elements */
int current; /* index of the next combination to return */
int ncombinations; /* number of combinations (size of array) */
int *combinations; /* array of pre-built combinations */
} CombinationGenerator;
static CombinationGenerator *generator_init(int n, int k);
static void generator_free(CombinationGenerator *state);
static int *generator_next(CombinationGenerator *state);
static void generate_combinations(CombinationGenerator *state);
/*
* statext_ndistinct_build
* Compute ndistinct coefficient for the combination of attributes.
*
* This computes the ndistinct estimate using the same estimator used
* in analyze.c and then computes the coefficient.
*/
MVNDistinct *
statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
Bitmapset *attrs, VacAttrStats **stats)
{
MVNDistinct *result;
int k;
int itemcnt;
int numattrs = bms_num_members(attrs);
int numcombs = num_combinations(numattrs);
result = palloc(offsetof(MVNDistinct, items) +
numcombs * sizeof(MVNDistinctItem));
result->magic = STATS_NDISTINCT_MAGIC;
result->type = STATS_NDISTINCT_TYPE_BASIC;
result->nitems = numcombs;
itemcnt = 0;
for (k = 2; k <= numattrs; k++)
{
int *combination;
CombinationGenerator *generator;
/* generate combinations of K out of N elements */
generator = generator_init(numattrs, k);
while ((combination = generator_next(generator)))
{
MVNDistinctItem *item = &result->items[itemcnt];
int j;
item->attrs = NULL;
for (j = 0; j < k; j++)
item->attrs = bms_add_member(item->attrs,
stats[combination[j]]->attr->attnum);
item->ndistinct =
ndistinct_for_combination(totalrows, numrows, rows,
stats, k, combination);
itemcnt++;
Assert(itemcnt <= result->nitems);
}
generator_free(generator);
}
/* must consume exactly the whole output array */
Assert(itemcnt == result->nitems);
return result;
}
/*
* statext_ndistinct_load
* Load the ndistinct value for the indicated pg_statistic_ext tuple
*/
MVNDistinct *
statext_ndistinct_load(Oid mvoid)
{
bool isnull = false;
Datum ndist;
HeapTuple htup;
htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid));
if (!htup)
elog(ERROR, "cache lookup failed for statistics %u", mvoid);
ndist = SysCacheGetAttr(STATEXTOID, htup,
Anum_pg_statistic_ext_standistinct, &isnull);
if (isnull)
elog(ERROR,
"requested statistic kind %c not yet built for statistics %u",
STATS_EXT_NDISTINCT, mvoid);
ReleaseSysCache(htup);
return statext_ndistinct_deserialize(DatumGetByteaP(ndist));
}
/*
* statext_ndistinct_serialize
* serialize ndistinct to the on-disk bytea format
*/
bytea *
statext_ndistinct_serialize(MVNDistinct *ndistinct)
{
int i;
bytea *output;
char *tmp;
Size len;
Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
/*
* Base size is base struct size, plus one base struct for each items,
* including number of items for each.
*/
len = VARHDRSZ + offsetof(MVNDistinct, items) +
ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + sizeof(int));
/* and also include space for the actual attribute numbers */
for (i = 0; i < ndistinct->nitems; i++)
{
int nmembers;
nmembers = bms_num_members(ndistinct->items[i].attrs);
Assert(nmembers >= 2);
len += sizeof(AttrNumber) * nmembers;
}
output = (bytea *) palloc(len);
SET_VARSIZE(output, len);
tmp = VARDATA(output);
/* Store the base struct values */
memcpy(tmp, ndistinct, offsetof(MVNDistinct, items));
tmp += offsetof(MVNDistinct, items);
/*
* store number of attributes and attribute numbers for each ndistinct
* entry
*/
for (i = 0; i < ndistinct->nitems; i++)
{
MVNDistinctItem item = ndistinct->items[i];
int nmembers = bms_num_members(item.attrs);
int x;
memcpy(tmp, &item.ndistinct, sizeof(double));
tmp += sizeof(double);
memcpy(tmp, &nmembers, sizeof(int));
tmp += sizeof(int);
x = -1;
while ((x = bms_next_member(item.attrs, x)) >= 0)
{
AttrNumber value = (AttrNumber) x;
memcpy(tmp, &value, sizeof(AttrNumber));
tmp += sizeof(AttrNumber);
}
Assert(tmp <= ((char *) output + len));
}
return output;
}
/*
* statext_ndistinct_deserialize
* Read an on-disk bytea format MVNDistinct to in-memory format
*/
MVNDistinct *
statext_ndistinct_deserialize(bytea *data)
{
int i;
Size expected_size;
MVNDistinct *ndistinct;
char *tmp;
if (data == NULL)
return NULL;
if (VARSIZE_ANY_EXHDR(data) < offsetof(MVNDistinct, items))
elog(ERROR, "invalid MVNDistinct size %ld (expected at least %ld)",
VARSIZE_ANY_EXHDR(data), offsetof(MVNDistinct, items));
/* read the MVNDistinct header */
ndistinct = (MVNDistinct *) palloc(sizeof(MVNDistinct));
/* initialize pointer to the data part (skip the varlena header) */
tmp = VARDATA_ANY(data);
/* get the header and perform basic sanity checks */
memcpy(ndistinct, tmp, offsetof(MVNDistinct, items));
tmp += offsetof(MVNDistinct, items);
if (ndistinct->magic != STATS_NDISTINCT_MAGIC)
elog(ERROR, "invalid ndistinct magic %d (expected %d)",
ndistinct->magic, STATS_NDISTINCT_MAGIC);
if (ndistinct->type != STATS_NDISTINCT_TYPE_BASIC)
elog(ERROR, "invalid ndistinct type %d (expected %d)",
ndistinct->type, STATS_NDISTINCT_TYPE_BASIC);
Assert(ndistinct->nitems > 0);
/* what minimum bytea size do we expect for those parameters */
expected_size = offsetof(MVNDistinct, items) +
ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) +
sizeof(AttrNumber) * 2);
if (VARSIZE_ANY_EXHDR(data) < expected_size)
elog(ERROR, "invalid dependencies size %ld (expected at least %ld)",
VARSIZE_ANY_EXHDR(data), expected_size);
/* allocate space for the ndistinct items */
ndistinct = repalloc(ndistinct, offsetof(MVNDistinct, items) +
(ndistinct->nitems * sizeof(MVNDistinctItem)));
for (i = 0; i < ndistinct->nitems; i++)
{
MVNDistinctItem *item = &ndistinct->items[i];
int nelems;
item->attrs = NULL;
/* ndistinct value */
memcpy(&item->ndistinct, tmp, sizeof(double));
tmp += sizeof(double);
/* number of attributes */
memcpy(&nelems, tmp, sizeof(int));
tmp += sizeof(int);
Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS));
while (nelems-- > 0)
{
AttrNumber attno;
memcpy(&attno, tmp, sizeof(AttrNumber));
tmp += sizeof(AttrNumber);
item->attrs = bms_add_member(item->attrs, attno);
}
/* still within the bytea */
Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
}
/* we should have consumed the whole bytea exactly */
Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
return ndistinct;
}
/*
* pg_ndistinct_in
* input routine for type pg_ndistinct
*
* pg_ndistinct is real enough to be a table column, but it has no
* operations of its own, and disallows input (jus like pg_node_tree).
*/
Datum
pg_ndistinct_in(PG_FUNCTION_ARGS)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot accept a value of type %s", "pg_ndistinct")));
PG_RETURN_VOID(); /* keep compiler quiet */
}
/*
* pg_ndistinct
* output routine for type pg_ndistinct
*
* Produces a human-readable representation of the value.
*/
Datum
pg_ndistinct_out(PG_FUNCTION_ARGS)
{
bytea *data = PG_GETARG_BYTEA_PP(0);
MVNDistinct *ndist = statext_ndistinct_deserialize(data);
int i;
StringInfoData str;
initStringInfo(&str);
appendStringInfoChar(&str, '[');
for (i = 0; i < ndist->nitems; i++)
{
MVNDistinctItem item = ndist->items[i];
if (i > 0)
appendStringInfoString(&str, ", ");
appendStringInfoChar(&str, '{');
outBitmapset(&str, item.attrs);
appendStringInfo(&str, ", %f}", item.ndistinct);
}
appendStringInfoChar(&str, ']');
PG_RETURN_CSTRING(str.data);
}
/*
* pg_ndistinct_recv
* binary input routine for type pg_ndistinct
*/
Datum
pg_ndistinct_recv(PG_FUNCTION_ARGS)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot accept a value of type %s", "pg_ndistinct")));
PG_RETURN_VOID(); /* keep compiler quiet */
}
/*
* pg_ndistinct_send
* binary output routine for type pg_ndistinct
*
* n-distinct is serialized into a bytea value, so let's send that.
*/
Datum
pg_ndistinct_send(PG_FUNCTION_ARGS)
{
return byteasend(fcinfo);
}
/*
* ndistinct_for_combination
* Estimates number of distinct values in a combination of columns.
*
* This uses the same ndistinct estimator as compute_scalar_stats() in
* ANALYZE, i.e.,
* n*d / (n - f1 + f1*n/N)
*
* except that instead of values in a single column we are dealing with
* combination of multiple columns.
*/
static double
ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
VacAttrStats **stats, int k, int *combination)
{
int i,
j;
int f1,
cnt,
d;
bool *isnull;
Datum *values;
SortItem *items;
MultiSortSupport mss;
mss = multi_sort_init(k);
/*
* In order to determine the number of distinct elements, create separate
* values[]/isnull[] arrays with all the data we have, then sort them
* using the specified column combination as dimensions. We could try to
* sort in place, but it'd probably be more complex and bug-prone.
*/
items = (SortItem *) palloc(numrows * sizeof(SortItem));
values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
isnull = (bool *) palloc0(sizeof(bool) * numrows * k);
for (i = 0; i < numrows; i++)
{
items[i].values = &values[i * k];
items[i].isnull = &isnull[i * k];
}
/*
* For each dimension, set up sort-support and fill in the values from
* the sample data.
*/
for (i = 0; i < k; i++)
{
VacAttrStats *colstat = stats[combination[i]];
TypeCacheEntry *type;
type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
if (type->lt_opr == InvalidOid) /* shouldn't happen */
elog(ERROR, "cache lookup failed for ordering operator for type %u",
colstat->attrtypid);
/* prepare the sort function for this dimension */
multi_sort_add_dimension(mss, i, type->lt_opr);
/* accumulate all the data for this dimension into the arrays */
for (j = 0; j < numrows; j++)
{
items[j].values[i] =
heap_getattr(rows[j],
colstat->attr->attnum,
colstat->tupDesc,
&items[j].isnull[i]);
}
}
/* We can sort the array now ... */
qsort_arg((void *) items, numrows, sizeof(SortItem),
multi_sort_compare, mss);
/* ... and count the number of distinct combinations */
f1 = 0;
cnt = 1;
d = 1;
for (i = 1; i < numrows; i++)
{
if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
{
if (cnt == 1)
f1 += 1;
d++;
cnt = 0;
}
cnt += 1;
}
if (cnt == 1)
f1 += 1;
return estimate_ndistinct(totalrows, numrows, d, f1);
}
/* The Duj1 estimator (already used in analyze.c). */
static double
estimate_ndistinct(double totalrows, int numrows, int d, int f1)
{
double numer,
denom,
ndistinct;
numer = (double) numrows * (double) d;
denom = (double) (numrows - f1) +
(double) f1 *(double) numrows / totalrows;
ndistinct = numer / denom;
/* Clamp to sane range in case of roundoff error */
if (ndistinct < (double) d)
ndistinct = (double) d;
if (ndistinct > totalrows)
ndistinct = totalrows;
return floor(ndistinct + 0.5);
}
/*
* n_choose_k
* computes binomial coefficients using an algorithm that is both
* efficient and prevents overflows
*/
static int
n_choose_k(int n, int k)
{
int d,
r;
Assert((k > 0) && (n >= k));
/* use symmetry of the binomial coefficients */
k = Min(k, n - k);
r = 1;
for (d = 1; d <= k; ++d)
{
r *= n--;
r /= d;
}
return r;
}
/*
* num_combinations
* number of combinations, excluding single-value combinations
*/
static int
num_combinations(int n)
{
int k;
int ncombs = 1;
for (k = 1; k <= n; k++)
ncombs *= 2;
ncombs -= (n + 1);
return ncombs;
}
/*
* generator_init
* initialize the generator of combinations
*
* The generator produces combinations of K elements in the interval (0..N).
* We prebuild all the combinations in this method, which is simpler than
* generating them on the fly.
*/
static CombinationGenerator *
generator_init(int n, int k)
{
CombinationGenerator *state;
Assert((n >= k) && (k > 0));
/* allocate the generator state as a single chunk of memory */
state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator));
state->ncombinations = n_choose_k(n, k);
/* pre-allocate space for all combinations*/
state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations);
state->current = 0;
state->k = k;
state->n = n;
/* now actually pre-generate all the combinations of K elements */
generate_combinations(state);
/* make sure we got the expected number of combinations */
Assert(state->current == state->ncombinations);
/* reset the number, so we start with the first one */
state->current = 0;
return state;
}
/*
* generator_next
* returns the next combination from the prebuilt list
*
* Returns a combination of K array indexes (0 .. N), as specified to
* generator_init), or NULL when there are no more combination.
*/
static int *
generator_next(CombinationGenerator *state)
{
if (state->current == state->ncombinations)
return NULL;
return &state->combinations[state->k * state->current++];
}
/*
* generator_free
* free the internal state of the generator
*
* Releases the generator internal state (pre-built combinations).
*/
static void
generator_free(CombinationGenerator *state)
{
pfree(state->combinations);
pfree(state);
}
/*
* generate_combinations_recurse
* given a prefix, generate all possible combinations
*
* Given a prefix (first few elements of the combination), generate following
* elements recursively. We generate the combinations in lexicographic order,
* which eliminates permutations of the same combination.
*/
static void
generate_combinations_recurse(CombinationGenerator *state,
int index, int start, int *current)
{
/* If we haven't filled all the elements, simply recurse. */
if (index < state->k)
{
int i;
/*
* The values have to be in ascending order, so make sure we start
* with the value passed by parameter.
*/
for (i = start; i < state->n; i++)
{
current[index] = i;
generate_combinations_recurse(state, (index + 1), (i + 1), current);
}
return;
}
else
{
/* we got a valid combination, add it to the array */
memcpy(&state->combinations[(state->k * state->current)],
current, state->k * sizeof(int));
state->current++;
}
}
/*
* generate_combinations
* generate all k-combinations of N elements
*/
static void
generate_combinations(CombinationGenerator *state)
{
int *current = (int *) palloc0(sizeof(int) * state->k);
generate_combinations_recurse(state, 0, 0, current);
pfree(current);
}

View File

@ -1623,6 +1623,10 @@ ProcessUtilitySlow(ParseState *pstate,
commandCollected = true;
break;
case T_CreateStatsStmt:
address = CreateStatistics((CreateStatsStmt *) parsetree);
break;
case T_AlterCollationStmt:
address = AlterCollation((AlterCollationStmt *) parsetree);
break;
@ -1992,6 +1996,8 @@ AlterObjectTypeCommandTag(ObjectType objtype)
break;
case OBJECT_SUBSCRIPTION:
tag = "ALTER SUBSCRIPTION";
case OBJECT_STATISTIC_EXT:
tag = "ALTER STATISTICS";
break;
default:
tag = "???";
@ -2286,6 +2292,8 @@ CreateCommandTag(Node *parsetree)
break;
case OBJECT_PUBLICATION:
tag = "DROP PUBLICATION";
case OBJECT_STATISTIC_EXT:
tag = "DROP STATISTICS";
break;
default:
tag = "???";
@ -2689,6 +2697,10 @@ CreateCommandTag(Node *parsetree)
tag = "EXECUTE";
break;
case T_CreateStatsStmt:
tag = "CREATE STATISTICS";
break;
case T_DeallocateStmt:
{
DeallocateStmt *stmt = (DeallocateStmt *) parsetree;

View File

@ -35,6 +35,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
@ -317,6 +318,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno,
const Oid *excludeOps,
bool attrsOnly, bool showTblSpc,
int prettyFlags, bool missing_ok);
static char *pg_get_statisticsext_worker(Oid statextid, bool missing_ok);
static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags,
bool attrsOnly);
static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
@ -1421,6 +1423,85 @@ pg_get_indexdef_worker(Oid indexrelid, int colno,
return buf.data;
}
/*
* pg_get_statisticsextdef
* Get the definition of an extended statistics object
*/
Datum
pg_get_statisticsextdef(PG_FUNCTION_ARGS)
{
Oid statextid = PG_GETARG_OID(0);
char *res;
res = pg_get_statisticsext_worker(statextid, true);
if (res == NULL)
PG_RETURN_NULL();
PG_RETURN_TEXT_P(string_to_text(res));
}
/*
* Internal workhorse to decompile an extended statistics object.
*/
static char *
pg_get_statisticsext_worker(Oid statextid, bool missing_ok)
{
Form_pg_statistic_ext statextrec;
Form_pg_class pgclassrec;
HeapTuple statexttup;
HeapTuple pgclasstup;
StringInfoData buf;
int colno;
statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
if (!HeapTupleIsValid(statexttup))
{
if (missing_ok)
return NULL;
elog(ERROR, "cache lookup failed for extended statistics %u", statextid);
}
statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup);
pgclasstup = SearchSysCache1(RELOID, ObjectIdGetDatum(statextrec->starelid));
if (!HeapTupleIsValid(statexttup))
{
ReleaseSysCache(statexttup);
elog(ERROR, "cache lookup failed for relation %u", statextrec->starelid);
}
pgclassrec = (Form_pg_class) GETSTRUCT(pgclasstup);
initStringInfo(&buf);
appendStringInfo(&buf, "CREATE STATISTICS %s ON (",
quote_identifier(NameStr(statextrec->staname)));
for (colno = 0; colno < statextrec->stakeys.dim1; colno++)
{
AttrNumber attnum = statextrec->stakeys.values[colno];
char *attname;
if (colno > 0)
appendStringInfoString(&buf, ", ");
attname = get_relid_attribute_name(statextrec->starelid, attnum);
appendStringInfoString(&buf, quote_identifier(attname));
}
appendStringInfo(&buf, ") FROM %s",
quote_identifier(NameStr(pgclassrec->relname)));
ReleaseSysCache(statexttup);
ReleaseSysCache(pgclasstup);
return buf.data;
}
/*
* pg_get_partkeydef
*

View File

@ -110,6 +110,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
#include "mb/pg_wchar.h"
@ -126,6 +127,7 @@
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parsetree.h"
#include "statistics/statistics.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/date.h"
@ -164,6 +166,8 @@ static double eqjoinsel_inner(Oid operator,
static double eqjoinsel_semi(Oid operator,
VariableStatData *vardata1, VariableStatData *vardata2,
RelOptInfo *inner_rel);
static bool estimate_multivariate_ndistinct(PlannerInfo *root,
RelOptInfo *rel, List **varinfos, double *ndistinct);
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound);
@ -3398,25 +3402,25 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
{
GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
RelOptInfo *rel = varinfo1->rel;
double reldistinct = varinfo1->ndistinct;
double reldistinct = 1;
double relmaxndistinct = reldistinct;
int relvarcount = 1;
List *newvarinfos = NIL;
List *relvarinfos = NIL;
/*
* Get the product of numdistinct estimates of the Vars for this rel.
* Also, construct new varinfos list of remaining Vars.
* Split the list of varinfos in two - one for the current rel,
* one for remaining Vars on other rels.
*/
relvarinfos = lcons(varinfo1, relvarinfos);
for_each_cell(l, lnext(list_head(varinfos)))
{
GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
if (varinfo2->rel == varinfo1->rel)
{
reldistinct *= varinfo2->ndistinct;
if (relmaxndistinct < varinfo2->ndistinct)
relmaxndistinct = varinfo2->ndistinct;
relvarcount++;
/* varinfos on current rel */
relvarinfos = lcons(varinfo2, relvarinfos);
}
else
{
@ -3425,6 +3429,43 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
}
}
/*
* Get the numdistinct estimate for the Vars of this rel. We
* iteratively search for multivariate n-distinct with maximum number
* of vars; assuming that each var group is independent of the others,
* we multiply them together. Any remaining relvarinfos after
* no more multivariate matches are found are assumed independent too,
* so their individual ndistinct estimates are multiplied also.
*/
while (relvarinfos)
{
double mvndistinct;
if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
&mvndistinct))
{
reldistinct *= mvndistinct;
if (relmaxndistinct < mvndistinct)
relmaxndistinct = mvndistinct;
relvarcount++; /* inaccurate, but doesn't matter */
}
else
{
foreach (l, relvarinfos)
{
GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
reldistinct *= varinfo2->ndistinct;
if (relmaxndistinct < varinfo2->ndistinct)
relmaxndistinct = varinfo2->ndistinct;
relvarcount++;
}
/* we're done with this relation */
relvarinfos = NIL;
}
}
/*
* Sanity check --- don't divide by zero if empty relation.
*/
@ -3667,6 +3708,132 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
*-------------------------------------------------------------------------
*/
/*
* Find applicable ndistinct statistics for the given list of VarInfos (which
* must all belong to the given rel), and update *ndistinct to the estimate of
* the MVNDistinctItem that best matches. If a match it found, *varinfos is
* updated to remove the list of matched varinfos.
*
* Varinfos that aren't for simple Vars are ignored.
*
* Return TRUE if we're able to find a match, FALSE otherwise.
*/
static bool
estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
List **varinfos, double *ndistinct)
{
ListCell *lc;
Bitmapset *attnums = NULL;
int nmatches;
Oid statOid = InvalidOid;
MVNDistinct *stats;
Bitmapset *matched = NULL;
/* bail out immediately if the table has no extended statistics */
if (!rel->statlist)
return false;
/* Determine the attnums we're looking for */
foreach(lc, *varinfos)
{
GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
Assert(varinfo->rel == rel);
if (IsA(varinfo->var, Var))
{
attnums = bms_add_member(attnums,
((Var *) varinfo->var)->varattno);
}
}
/* look for the ndistinct statistics matching the most vars */
nmatches = 1; /* we require at least two matches */
foreach(lc, rel->statlist)
{
StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
Bitmapset *shared;
/* skip statistics of other kinds */
if (info->kind != STATS_EXT_NDISTINCT)
continue;
/* compute attnums shared by the vars and the statistic */
shared = bms_intersect(info->keys, attnums);
/*
* Does this statistics matches more columns than the currently
* best statistic? If so, use this one instead.
*
* XXX This should break ties using name of the statistic, or
* something like that, to make the outcome stable.
*/
if (bms_num_members(shared) > nmatches)
{
statOid = info->statOid;
nmatches = bms_num_members(shared);
matched = shared;
}
}
/* No match? */
if (statOid == InvalidOid)
return false;
Assert(nmatches > 1 && matched != NULL);
stats = statext_ndistinct_load(statOid);
/*
* If we have a match, search it for the specific item that matches (there
* must be one), and construct the output values.
*/
if (stats)
{
int i;
List *newlist = NIL;
MVNDistinctItem *item = NULL;
/* Find the specific item that exactly matches the combination */
for (i = 0; i < stats->nitems; i++)
{
MVNDistinctItem *tmpitem = &stats->items[i];
if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
{
item = tmpitem;
break;
}
}
/* make sure we found an item */
if (!item)
elog(ERROR, "corrupt MVNDistinct entry");
/* Form the output varinfo list, keeping only unmatched ones */
foreach(lc, *varinfos)
{
GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
AttrNumber attnum;
if (!IsA(varinfo->var, Var))
{
newlist = lappend(newlist, varinfo);
continue;
}
attnum = ((Var *) varinfo->var)->varattno;
if (!bms_is_member(attnum, matched))
newlist = lappend(newlist, varinfo);
}
*varinfos = newlist;
*ndistinct = item->ndistinct;
return true;
}
return false;
}
/*
* convert_to_scalar
* Convert non-NULL values of the indicated types to the comparison

View File

@ -56,6 +56,7 @@
#include "catalog/pg_publication.h"
#include "catalog/pg_rewrite.h"
#include "catalog/pg_shseclabel.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_trigger.h"
@ -4451,6 +4452,82 @@ RelationGetIndexList(Relation relation)
return result;
}
/*
* RelationGetStatExtList
* get a list of OIDs of extended statistics on this relation
*
* The statistics list is created only if someone requests it, in a way
* similar to RelationGetIndexList(). We scan pg_statistic_ext to find
* relevant statistics, and add the list to the relcache entry so that we
* won't have to compute it again. Note that shared cache inval of a
* relcache entry will delete the old list and set rd_statvalid to 0,
* so that we must recompute the statistics list on next request. This
* handles creation or deletion of a statistic.
*
* The returned list is guaranteed to be sorted in order by OID, although
* this is not currently needed.
*
* Since shared cache inval causes the relcache's copy of the list to go away,
* we return a copy of the list palloc'd in the caller's context. The caller
* may list_free() the returned list after scanning it. This is necessary
* since the caller will typically be doing syscache lookups on the relevant
* statistics, and syscache lookup could cause SI messages to be processed!
*/
List *
RelationGetStatExtList(Relation relation)
{
Relation indrel;
SysScanDesc indscan;
ScanKeyData skey;
HeapTuple htup;
List *result;
List *oldlist;
MemoryContext oldcxt;
/* Quick exit if we already computed the list. */
if (relation->rd_statvalid != 0)
return list_copy(relation->rd_statlist);
/*
* We build the list we intend to return (in the caller's context) while
* doing the scan. After successfully completing the scan, we copy that
* list into the relcache entry. This avoids cache-context memory leakage
* if we get some sort of error partway through.
*/
result = NIL;
/* Prepare to scan pg_statistic_ext for entries having starelid = this rel. */
ScanKeyInit(&skey,
Anum_pg_statistic_ext_starelid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(RelationGetRelid(relation)));
indrel = heap_open(StatisticExtRelationId, AccessShareLock);
indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
NULL, 1, &skey);
while (HeapTupleIsValid(htup = systable_getnext(indscan)))
/* TODO maybe include only already built statistics? */
result = insert_ordered_oid(result, HeapTupleGetOid(htup));
systable_endscan(indscan);
heap_close(indrel, AccessShareLock);
/* Now save a copy of the completed list in the relcache entry. */
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
oldlist = relation->rd_statlist;
relation->rd_statlist = list_copy(result);
relation->rd_statvalid = true;
MemoryContextSwitchTo(oldcxt);
/* Don't leak the old list, if there is one */
list_free(oldlist);
return result;
}
/*
* insert_ordered_oid
* Insert a new Oid into a sorted list of Oids, preserving ordering
@ -5560,6 +5637,8 @@ load_relcache_init_file(bool shared)
rel->rd_pkattr = NULL;
rel->rd_idattr = NULL;
rel->rd_pubactions = NULL;
rel->rd_statvalid = false;
rel->rd_statlist = NIL;
rel->rd_createSubid = InvalidSubTransactionId;
rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
rel->rd_amcache = NULL;

View File

@ -61,6 +61,7 @@
#include "catalog/pg_shseclabel.h"
#include "catalog/pg_replication_origin.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_subscription_rel.h"
#include "catalog/pg_tablespace.h"
@ -726,6 +727,28 @@ static const struct cachedesc cacheinfo[] = {
},
32
},
{StatisticExtRelationId, /* STATEXTNAMENSP */
StatisticExtNameIndexId,
2,
{
Anum_pg_statistic_ext_staname,
Anum_pg_statistic_ext_stanamespace,
0,
0
},
4
},
{StatisticExtRelationId, /* STATEXTOID */
StatisticExtOidIndexId,
1,
{
ObjectIdAttributeNumber,
0,
0,
0
},
4
},
{StatisticRelationId, /* STATRELATTINH */
StatisticRelidAttnumInhIndexId,
3,

View File

@ -272,6 +272,10 @@ getSchemaData(Archive *fout, int *numTablesPtr)
write_msg(NULL, "reading indexes\n");
getIndexes(fout, tblinfo, numTables);
if (g_verbose)
write_msg(NULL, "reading extended statistics\n");
getExtendedStatistics(fout, tblinfo, numTables);
if (g_verbose)
write_msg(NULL, "reading constraints\n");
getConstraints(fout, tblinfo, numTables);

View File

@ -3540,7 +3540,8 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass)
strcmp(te->desc, "TRIGGER") == 0 ||
strcmp(te->desc, "ROW SECURITY") == 0 ||
strcmp(te->desc, "POLICY") == 0 ||
strcmp(te->desc, "USER MAPPING") == 0)
strcmp(te->desc, "USER MAPPING") == 0 ||
strcmp(te->desc, "STATISTICS") == 0)
{
/* these object types don't have separate owners */
}

View File

@ -192,6 +192,7 @@ static void dumpAttrDef(Archive *fout, AttrDefInfo *adinfo);
static void dumpSequence(Archive *fout, TableInfo *tbinfo);
static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo);
static void dumpIndex(Archive *fout, IndxInfo *indxinfo);
static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo);
static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo);
static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo);
static void dumpTSParser(Archive *fout, TSParserInfo *prsinfo);
@ -6582,6 +6583,99 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
destroyPQExpBuffer(query);
}
/*
* getExtendedStatistics
* get information about extended statistics on a dumpable table
* or materialized view.
*
* Note: extended statistics data is not returned directly to the caller, but
* it does get entered into the DumpableObject tables.
*/
void
getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables)
{
int i,
j;
PQExpBuffer query;
PGresult *res;
StatsExtInfo *statsextinfo;
int ntups;
int i_tableoid;
int i_oid;
int i_staname;
int i_stadef;
/* Extended statistics were new in v10 */
if (fout->remoteVersion < 100000)
return;
query = createPQExpBuffer();
for (i = 0; i < numTables; i++)
{
TableInfo *tbinfo = &tblinfo[i];
/* Only plain tables and materialized views can have extended statistics. */
if (tbinfo->relkind != RELKIND_RELATION &&
tbinfo->relkind != RELKIND_MATVIEW)
continue;
/*
* Ignore extended statistics of tables whose definitions are not to
* be dumped.
*/
if (!(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION))
continue;
if (g_verbose)
write_msg(NULL, "reading extended statistics for table \"%s.%s\"\n",
tbinfo->dobj.namespace->dobj.name,
tbinfo->dobj.name);
/* Make sure we are in proper schema so stadef is right */
selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name);
resetPQExpBuffer(query);
appendPQExpBuffer(query,
"SELECT "
"tableoid, "
"oid, "
"staname, "
"pg_catalog.pg_get_statisticsextdef(oid) AS stadef "
"FROM pg_statistic_ext "
"WHERE starelid = '%u' "
"ORDER BY staname", tbinfo->dobj.catId.oid);
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
ntups = PQntuples(res);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
i_staname = PQfnumber(res, "staname");
i_stadef = PQfnumber(res, "stadef");
statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo));
for (j = 0; j < ntups; j++)
{
statsextinfo[j].dobj.objType = DO_STATSEXT;
statsextinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid));
statsextinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid));
AssignDumpId(&statsextinfo[j].dobj);
statsextinfo[j].dobj.name = pg_strdup(PQgetvalue(res, j, i_staname));
statsextinfo[j].dobj.namespace = tbinfo->dobj.namespace;
statsextinfo[j].statsexttable = tbinfo;
statsextinfo[j].statsextdef = pg_strdup(PQgetvalue(res, j, i_stadef));
}
PQclear(res);
}
destroyPQExpBuffer(query);
}
/*
* getConstraints
*
@ -9234,6 +9328,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
case DO_INDEX:
dumpIndex(fout, (IndxInfo *) dobj);
break;
case DO_STATSEXT:
dumpStatisticsExt(fout, (StatsExtInfo *) dobj);
break;
case DO_REFRESH_MATVIEW:
refreshMatViewData(fout, (TableDataInfo *) dobj);
break;
@ -15728,6 +15825,61 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo)
destroyPQExpBuffer(labelq);
}
/*
* dumpStatisticsExt
* write out to fout an extended statistics object
*/
static void
dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo)
{
DumpOptions *dopt = fout->dopt;
TableInfo *tbinfo = statsextinfo->statsexttable;
PQExpBuffer q;
PQExpBuffer delq;
PQExpBuffer labelq;
if (dopt->dataOnly)
return;
q = createPQExpBuffer();
delq = createPQExpBuffer();
labelq = createPQExpBuffer();
appendPQExpBuffer(labelq, "STATISTICS %s",
fmtId(statsextinfo->dobj.name));
appendPQExpBuffer(q, "%s;\n", statsextinfo->statsextdef);
appendPQExpBuffer(delq, "DROP STATISTICS %s.",
fmtId(tbinfo->dobj.namespace->dobj.name));
appendPQExpBuffer(delq, "%s;\n",
fmtId(statsextinfo->dobj.name));
if (statsextinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)
ArchiveEntry(fout, statsextinfo->dobj.catId,
statsextinfo->dobj.dumpId,
statsextinfo->dobj.name,
tbinfo->dobj.namespace->dobj.name,
NULL,
tbinfo->rolname, false,
"STATISTICS", SECTION_POST_DATA,
q->data, delq->data, NULL,
NULL, 0,
NULL, NULL);
/* Dump Statistics Comments */
if (statsextinfo->dobj.dump & DUMP_COMPONENT_COMMENT)
dumpComment(fout, labelq->data,
tbinfo->dobj.namespace->dobj.name,
tbinfo->rolname,
statsextinfo->dobj.catId, 0,
statsextinfo->dobj.dumpId);
destroyPQExpBuffer(q);
destroyPQExpBuffer(delq);
destroyPQExpBuffer(labelq);
}
/*
* dumpConstraint
* write out to fout a user-defined constraint
@ -17266,6 +17418,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
addObjectDependency(postDataBound, dobj->dumpId);
break;
case DO_INDEX:
case DO_STATSEXT:
case DO_REFRESH_MATVIEW:
case DO_TRIGGER:
case DO_EVENT_TRIGGER:

View File

@ -56,6 +56,7 @@ typedef enum
DO_TABLE,
DO_ATTRDEF,
DO_INDEX,
DO_STATSEXT,
DO_RULE,
DO_TRIGGER,
DO_CONSTRAINT,
@ -362,6 +363,13 @@ typedef struct _indxInfo
int relpages; /* relpages of the underlying table */
} IndxInfo;
typedef struct _statsExtInfo
{
DumpableObject dobj;
TableInfo *statsexttable; /* link to table the stats ext is for */
char *statsextdef;
} StatsExtInfo;
typedef struct _ruleInfo
{
DumpableObject dobj;
@ -682,6 +690,7 @@ extern void getOwnedSeqs(Archive *fout, TableInfo tblinfo[], int numTables);
extern InhInfo *getInherits(Archive *fout, int *numInherits);
extern PartInfo *getPartitions(Archive *fout, int *numPartitions);
extern void getIndexes(Archive *fout, TableInfo tblinfo[], int numTables);
extern void getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables);
extern void getConstraints(Archive *fout, TableInfo tblinfo[], int numTables);
extern RuleInfo *getRules(Archive *fout, int *numRules);
extern void getTriggers(Archive *fout, TableInfo tblinfo[], int numTables);

View File

@ -53,10 +53,11 @@ static const int dbObjectTypePriority[] =
18, /* DO_TABLE */
20, /* DO_ATTRDEF */
28, /* DO_INDEX */
29, /* DO_RULE */
30, /* DO_TRIGGER */
29, /* DO_STATSEXT */
30, /* DO_RULE */
31, /* DO_TRIGGER */
27, /* DO_CONSTRAINT */
31, /* DO_FK_CONSTRAINT */
32, /* DO_FK_CONSTRAINT */
2, /* DO_PROCLANG */
10, /* DO_CAST */
23, /* DO_TABLE_DATA */
@ -1291,6 +1292,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize)
"INDEX %s (ID %d OID %u)",
obj->name, obj->dumpId, obj->catId.oid);
return;
case DO_STATSEXT:
snprintf(buf, bufsize,
"STATISTICS %s (ID %d OID %u)",
obj->name, obj->dumpId, obj->catId.oid);
return;
case DO_REFRESH_MATVIEW:
snprintf(buf, bufsize,
"REFRESH MATERIALIZED VIEW %s (ID %d OID %u)",

View File

@ -2320,6 +2320,57 @@ describeOneTableDetails(const char *schemaname,
PQclear(result);
}
/* print any extended statistics */
if (pset.sversion >= 100000)
{
printfPQExpBuffer(&buf,
"SELECT oid, stanamespace::regnamespace AS nsp, staname, stakeys,\n"
" (SELECT pg_catalog.string_agg(pg_catalog.quote_ident(attname::text),', ') \n"
" FROM ((SELECT pg_catalog.unnest(stakeys) AS attnum) s\n"
" JOIN pg_catalog.pg_attribute a ON (starelid = a.attrelid AND\n"
"a.attnum = s.attnum AND not attisdropped))) AS columns,\n"
" (staenabled::char[] @> '{d}'::char[]) AS ndist_enabled\n"
"FROM pg_catalog.pg_statistic_ext stat WHERE starelid = '%s'\n"
"ORDER BY 1;",
oid);
result = PSQLexec(buf.data);
if (!result)
goto error_return;
else
tuples = PQntuples(result);
if (tuples > 0)
{
printTableAddFooter(&cont, _("Statistics:"));
for (i = 0; i < tuples; i++)
{
int cnt = 0;
printfPQExpBuffer(&buf, " ");
/* statistics name (qualified with namespace) */
appendPQExpBuffer(&buf, "\"%s.%s\" WITH (",
PQgetvalue(result, i, 1),
PQgetvalue(result, i, 2));
/* options */
if (strcmp(PQgetvalue(result, i, 5), "t") == 0)
{
appendPQExpBufferStr(&buf, "ndistinct");
cnt++;
}
appendPQExpBuffer(&buf, ") ON (%s)",
PQgetvalue(result, i, 4));
printTableAddFooter(&cont, buf.data);
}
}
PQclear(result);
}
/* print rules */
if (tableinfo.hasrules && tableinfo.relkind != RELKIND_MATVIEW)
{

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201703241
#define CATALOG_VERSION_NO 201703242
#endif

View File

@ -147,6 +147,7 @@ typedef enum ObjectClass
OCLASS_REWRITE, /* pg_rewrite */
OCLASS_TRIGGER, /* pg_trigger */
OCLASS_SCHEMA, /* pg_namespace */
OCLASS_STATISTIC_EXT, /* pg_statistic_ext */
OCLASS_TSPARSER, /* pg_ts_parser */
OCLASS_TSDICT, /* pg_ts_dict */
OCLASS_TSTEMPLATE, /* pg_ts_template */

View File

@ -119,6 +119,7 @@ extern void RemoveAttrDefault(Oid relid, AttrNumber attnum,
DropBehavior behavior, bool complain, bool internal);
extern void RemoveAttrDefaultById(Oid attrdefId);
extern void RemoveStatistics(Oid relid, AttrNumber attnum);
extern void RemoveStatisticsExt(Oid relid, AttrNumber attnum);
extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno,
bool relhasoids);

View File

@ -182,6 +182,13 @@ DECLARE_UNIQUE_INDEX(pg_largeobject_loid_pn_index, 2683, on pg_largeobject using
DECLARE_UNIQUE_INDEX(pg_largeobject_metadata_oid_index, 2996, on pg_largeobject_metadata using btree(oid oid_ops));
#define LargeObjectMetadataOidIndexId 2996
DECLARE_UNIQUE_INDEX(pg_statistic_ext_oid_index, 3380, on pg_statistic_ext using btree(oid oid_ops));
#define StatisticExtOidIndexId 3380
DECLARE_UNIQUE_INDEX(pg_statistic_ext_name_index, 3997, on pg_statistic_ext using btree(staname name_ops, stanamespace oid_ops));
#define StatisticExtNameIndexId 3997
DECLARE_INDEX(pg_statistic_ext_relid_index, 3379, on pg_statistic_ext using btree(starelid oid_ops));
#define StatisticExtRelidIndexId 3379
DECLARE_UNIQUE_INDEX(pg_namespace_nspname_index, 2684, on pg_namespace using btree(nspname name_ops));
#define NamespaceNameIndexId 2684
DECLARE_UNIQUE_INDEX(pg_namespace_oid_index, 2685, on pg_namespace using btree(oid oid_ops));

View File

@ -141,6 +141,8 @@ extern Oid get_collation_oid(List *collname, bool missing_ok);
extern Oid get_conversion_oid(List *conname, bool missing_ok);
extern Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding);
extern Oid get_statistics_oid(List *names, bool missing_ok);
/* initialization & transaction cleanup code */
extern void InitializeSearchPath(void);
extern void AtEOXact_Namespace(bool isCommit, bool parallel);

View File

@ -254,6 +254,10 @@ DATA(insert ( 23 18 78 e f ));
/* pg_node_tree can be coerced to, but not from, text */
DATA(insert ( 194 25 0 i b ));
/* pg_ndistinct can be coerced to, but not from, bytea and text */
DATA(insert ( 3361 17 0 i b ));
DATA(insert ( 3361 25 0 i i ));
/*
* Datetime category
*/

View File

@ -1983,6 +1983,8 @@ DESCR("select statement of a view");
DATA(insert OID = 1642 ( pg_get_userbyid PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 19 "26" _null_ _null_ _null_ _null_ _null_ pg_get_userbyid _null_ _null_ _null_ ));
DESCR("role name by OID (with fallback)");
DATA(insert OID = 1643 ( pg_get_indexdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_indexdef _null_ _null_ _null_ ));
DESCR("extended statistics description");
DATA(insert OID = 3415 ( pg_get_statisticsextdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_statisticsextdef _null_ _null_ _null_ ));
DESCR("index description");
DATA(insert OID = 3352 ( pg_get_partkeydef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_partkeydef _null_ _null_ _null_ ));
DESCR("partition key description");
@ -2758,6 +2760,15 @@ DESCR("current user privilege on any column by rel name");
DATA(insert OID = 3029 ( has_any_column_privilege PGNSP PGUID 12 10 0 0 0 f f f f t f s s 2 0 16 "26 25" _null_ _null_ _null_ _null_ _null_ has_any_column_privilege_id _null_ _null_ _null_ ));
DESCR("current user privilege on any column by rel oid");
DATA(insert OID = 3355 ( pg_ndistinct_in PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3361 "2275" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_in _null_ _null_ _null_ ));
DESCR("I/O");
DATA(insert OID = 3356 ( pg_ndistinct_out PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_out _null_ _null_ _null_ ));
DESCR("I/O");
DATA(insert OID = 3357 ( pg_ndistinct_recv PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 3361 "2281" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_recv _null_ _null_ _null_ ));
DESCR("I/O");
DATA(insert OID = 3358 ( pg_ndistinct_send PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_send _null_ _null_ _null_ ));
DESCR("I/O");
DATA(insert OID = 1928 ( pg_stat_get_numscans PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ ));
DESCR("statistics: number of scans done for table/index");
DATA(insert OID = 1929 ( pg_stat_get_tuples_returned PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ ));

View File

@ -0,0 +1,75 @@
/*-------------------------------------------------------------------------
*
* pg_statistic_ext.h
* definition of the system "extended statistic" relation (pg_statistic_ext)
* along with the relation's initial contents.
*
*
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/catalog/pg_statistic_ext.h
*
* NOTES
* the genbki.pl script reads this file and generates .bki
* information from the DATA() statements.
*
*-------------------------------------------------------------------------
*/
#ifndef PG_STATISTIC_EXT_H
#define PG_STATISTIC_EXT_H
#include "catalog/genbki.h"
/* ----------------
* pg_statistic_ext definition. cpp turns this into
* typedef struct FormData_pg_statistic_ext
* ----------------
*/
#define StatisticExtRelationId 3381
CATALOG(pg_statistic_ext,3381)
{
/* These fields form the unique key for the entry: */
Oid starelid; /* relation containing attributes */
NameData staname; /* statistics name */
Oid stanamespace; /* OID of namespace containing this statistics */
Oid staowner; /* statistics owner */
/*
* variable-length fields start here, but we allow direct access to
* stakeys
*/
int2vector stakeys; /* array of column keys */
#ifdef CATALOG_VARLEN
char staenabled[1] BKI_FORCE_NOT_NULL; /* statistic types
* requested to build */
pg_ndistinct standistinct; /* ndistinct coefficients (serialized) */
#endif
} FormData_pg_statistic_ext;
/* ----------------
* Form_pg_statistic_ext corresponds to a pointer to a tuple with
* the format of pg_statistic_ext relation.
* ----------------
*/
typedef FormData_pg_statistic_ext *Form_pg_statistic_ext;
/* ----------------
* compiler constants for pg_statistic_ext
* ----------------
*/
#define Natts_pg_statistic_ext 7
#define Anum_pg_statistic_ext_starelid 1
#define Anum_pg_statistic_ext_staname 2
#define Anum_pg_statistic_ext_stanamespace 3
#define Anum_pg_statistic_ext_staowner 4
#define Anum_pg_statistic_ext_stakeys 5
#define Anum_pg_statistic_ext_staenabled 6
#define Anum_pg_statistic_ext_standistinct 7
#define STATS_EXT_NDISTINCT 'd'
#endif /* PG_STATISTIC_EXT_H */

View File

@ -364,6 +364,10 @@ DATA(insert OID = 194 ( pg_node_tree PGNSP PGUID -1 f b S f t \054 0 0 0 pg_node
DESCR("string representing an internal node tree");
#define PGNODETREEOID 194
DATA(insert OID = 3361 ( pg_ndistinct PGNSP PGUID -1 f b S f t \054 0 0 0 pg_ndistinct_in pg_ndistinct_out pg_ndistinct_recv pg_ndistinct_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ ));
DESCR("multivariate ndistinct coefficients");
#define PGNDISTINCTOID 3361
DATA(insert OID = 32 ( pg_ddl_command PGNSP PGUID SIZEOF_POINTER t p P f t \054 0 0 0 pg_ddl_command_in pg_ddl_command_out pg_ddl_command_recv pg_ddl_command_send - - - ALIGNOF_POINTER p f 0 -1 0 0 _null_ _null_ _null_ ));
DESCR("internal type for passing CollectedCommand");
#define PGDDLCOMMANDOID 32

View File

@ -53,6 +53,7 @@ DECLARE_TOAST(pg_proc, 2836, 2837);
DECLARE_TOAST(pg_rewrite, 2838, 2839);
DECLARE_TOAST(pg_seclabel, 3598, 3599);
DECLARE_TOAST(pg_statistic, 2840, 2841);
DECLARE_TOAST(pg_statistic_ext, 3439, 3440);
DECLARE_TOAST(pg_trigger, 2336, 2337);
/* shared catalogs */

View File

@ -77,6 +77,10 @@ extern ObjectAddress DefineOperator(List *names, List *parameters);
extern void RemoveOperatorById(Oid operOid);
extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt);
/* commands/statscmds.c */
extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt);
extern void RemoveStatisticsById(Oid statsOid);
/* commands/aggregatecmds.c */
extern ObjectAddress DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle,
List *parameters);

View File

@ -279,6 +279,7 @@ typedef enum NodeTag
T_PlaceHolderInfo,
T_MinMaxAggInfo,
T_PlannerParamItem,
T_StatisticExtInfo,
/*
* TAGS FOR MEMORY NODES (memnodes.h)
@ -424,6 +425,7 @@ typedef enum NodeTag
T_CreateSubscriptionStmt,
T_AlterSubscriptionStmt,
T_DropSubscriptionStmt,
T_CreateStatsStmt,
T_AlterCollationStmt,
/*

View File

@ -1593,6 +1593,7 @@ typedef enum ObjectType
OBJECT_SCHEMA,
OBJECT_SEQUENCE,
OBJECT_SUBSCRIPTION,
OBJECT_STATISTIC_EXT,
OBJECT_TABCONSTRAINT,
OBJECT_TABLE,
OBJECT_TABLESPACE,
@ -2656,6 +2657,20 @@ typedef struct IndexStmt
bool if_not_exists; /* just do nothing if index already exists? */
} IndexStmt;
/* ----------------------
* Create Statistics Statement
* ----------------------
*/
typedef struct CreateStatsStmt
{
NodeTag type;
List *defnames; /* qualified name (list of Value strings) */
RangeVar *relation; /* relation to build statistics on */
List *keys; /* String nodes naming referenced columns */
List *options; /* list of DefElem */
bool if_not_exists; /* do nothing if statistics already exists */
} CreateStatsStmt;
/* ----------------------
* Create Function Statement
* ----------------------

View File

@ -529,6 +529,7 @@ typedef struct RelOptInfo
List *lateral_vars; /* LATERAL Vars and PHVs referenced by rel */
Relids lateral_referencers; /* rels that reference me laterally */
List *indexlist; /* list of IndexOptInfo */
List *statlist; /* list of StatisticExtInfo */
BlockNumber pages; /* size estimates derived from pg_class */
double tuples;
double allvisfrac;
@ -668,6 +669,24 @@ typedef struct ForeignKeyOptInfo
List *rinfos[INDEX_MAX_KEYS];
} ForeignKeyOptInfo;
/*
* StatisticExtInfo
* Information about extended statistics for planning/optimization
*
* This contains information about which columns are covered by the
* statistics (stakeys), which options were requested while adding the
* statistics (*_enabled), and which kinds of statistics were actually
* built and are available for the optimizer (*_built).
*/
typedef struct StatisticExtInfo
{
NodeTag type;
Oid statOid; /* OID of the statistics row */
RelOptInfo *rel; /* back-link to index's table */
char kind; /* statistic kind of this entry */
Bitmapset *keys; /* attnums of the columns covered */
} StatisticExtInfo;
/*
* EquivalenceClasses

View File

@ -0,0 +1,64 @@
/*-------------------------------------------------------------------------
*
* extended_stats_internal.h
* POSTGRES extended statistics internal declarations
*
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/include/statistics/extended_stats_internal.h
*
*-------------------------------------------------------------------------
*/
#ifndef EXTENDED_STATS_INTERNAL_H
#define EXTENDED_STATS_INTERNAL_H
#include "utils/sortsupport.h"
#include "statistics/statistics.h"
typedef struct
{
Oid eqopr; /* '=' operator for datatype, if any */
Oid eqfunc; /* and associated function */
Oid ltopr; /* '<' operator for datatype, if any */
} StdAnalyzeData;
typedef struct
{
Datum value; /* a data value */
int tupno; /* position index for tuple it came from */
} ScalarItem;
/* multi-sort */
typedef struct MultiSortSupportData
{
int ndims; /* number of dimensions supported by the */
SortSupportData ssup[1]; /* sort support data for each dimension */
} MultiSortSupportData;
typedef MultiSortSupportData *MultiSortSupport;
typedef struct SortItem
{
Datum *values;
bool *isnull;
} SortItem;
extern MVNDistinct *statext_ndistinct_build(double totalrows,
int numrows, HeapTuple *rows,
Bitmapset *attrs, VacAttrStats **stats);
extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
extern MVNDistinct *statext_ndistinct_deserialize(bytea *data);
extern MultiSortSupport multi_sort_init(int ndims);
extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
Oid oper);
extern int multi_sort_compare(const void *a, const void *b, void *arg);
extern int multi_sort_compare_dim(int dim, const SortItem * a,
const SortItem * b, MultiSortSupport mss);
extern int multi_sort_compare_dims(int start, int end, const SortItem * a,
const SortItem * b, MultiSortSupport mss);
#endif /* EXTENDED_STATS_INTERNAL_H */

View File

@ -0,0 +1,47 @@
/*-------------------------------------------------------------------------
*
* statistics.h
* Extended statistics and selectivity estimation functions.
*
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/statistics/statistics.h
*
*-------------------------------------------------------------------------
*/
#ifndef STATISTICS_H
#define STATISTICS_H
#include "commands/vacuum.h"
#define STATS_MAX_DIMENSIONS 8 /* max number of attributes */
/* Multivariate distinct coefficients */
#define STATS_NDISTINCT_MAGIC 0xA352BFA4 /* struct identifier */
#define STATS_NDISTINCT_TYPE_BASIC 1 /* struct version */
/* MVDistinctItem represents a single combination of columns */
typedef struct MVNDistinctItem
{
double ndistinct; /* ndistinct value for this combination */
Bitmapset *attrs; /* attr numbers of items */
} MVNDistinctItem;
/* A MVNDistinct object, comprising all possible combinations of columns */
typedef struct MVNDistinct
{
uint32 magic; /* magic constant marker */
uint32 type; /* type of ndistinct (BASIC) */
uint32 nitems; /* number of items in the statistic */
MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER];
} MVNDistinct;
extern MVNDistinct *statext_ndistinct_load(Oid mvoid);
extern void BuildRelationExtStatistics(Relation onerel, double totalrows,
int numrows, HeapTuple *rows,
int natts, VacAttrStats **vacattrstats);
extern bool statext_is_kind_built(HeapTuple htup, char kind);
#endif /* STATISTICS_H */

View File

@ -192,6 +192,7 @@ typedef enum AclObjectKind
ACL_KIND_OPFAMILY, /* pg_opfamily */
ACL_KIND_COLLATION, /* pg_collation */
ACL_KIND_CONVERSION, /* pg_conversion */
ACL_KIND_STATISTICS, /* pg_statistic_ext */
ACL_KIND_TABLESPACE, /* pg_tablespace */
ACL_KIND_TSDICTIONARY, /* pg_ts_dict */
ACL_KIND_TSCONFIGURATION, /* pg_ts_config */
@ -326,6 +327,7 @@ extern bool pg_event_trigger_ownercheck(Oid et_oid, Oid roleid);
extern bool pg_extension_ownercheck(Oid ext_oid, Oid roleid);
extern bool pg_publication_ownercheck(Oid pub_oid, Oid roleid);
extern bool pg_subscription_ownercheck(Oid sub_oid, Oid roleid);
extern bool pg_statistics_ownercheck(Oid stat_oid, Oid roleid);
extern bool has_createrole_privilege(Oid roleid);
extern bool has_bypassrls_privilege(Oid roleid);

View File

@ -92,6 +92,7 @@ typedef struct RelationData
bool rd_isvalid; /* relcache entry is valid */
char rd_indexvalid; /* state of rd_indexlist: 0 = not valid, 1 =
* valid, 2 = temporarily forced */
bool rd_statvalid; /* is rd_statlist valid? */
/*
* rd_createSubid is the ID of the highest subtransaction the rel has
@ -136,6 +137,9 @@ typedef struct RelationData
Oid rd_pkindex; /* OID of primary key, if any */
Oid rd_replidindex; /* OID of replica identity index, if any */
/* data managed by RelationGetStatExtList: */
List *rd_statlist; /* list of OIDs of extended stats */
/* data managed by RelationGetIndexAttrBitmap: */
Bitmapset *rd_indexattr; /* identifies columns used in indexes */
Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */

View File

@ -39,6 +39,7 @@ extern void RelationClose(Relation relation);
*/
extern List *RelationGetFKeyList(Relation relation);
extern List *RelationGetIndexList(Relation relation);
extern List *RelationGetStatExtList(Relation relation);
extern Oid RelationGetOidIndex(Relation relation);
extern Oid RelationGetPrimaryKeyIndex(Relation relation);
extern Oid RelationGetReplicaIndex(Relation relation);

View File

@ -86,6 +86,8 @@ enum SysCacheIdentifier
PUBLICATIONRELMAP,
RULERELNAME,
SEQRELID,
STATEXTNAMENSP,
STATEXTOID,
STATRELATTINH,
SUBSCRIPTIONOID,
SUBSCRIPTIONNAME,

View File

@ -496,6 +496,48 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD
ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4);
ERROR: function 2(integer,integer) does not exist in operator family "alt_opf18"
DROP OPERATOR FAMILY alt_opf18 USING btree;
--
-- Statistics
--
SET SESSION AUTHORIZATION regress_alter_user1;
CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER);
CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
ALTER STATISTICS alt_stat1 RENAME TO alt_stat2; -- failed (name conflict)
ERROR: statistics "alt_stat2" already exists in schema "alt_nsp1"
ALTER STATISTICS alt_stat1 RENAME TO alt_stat3; -- failed (name conflict)
ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2; -- failed (no role membership)
ERROR: must be member of role "regress_alter_user2"
ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- OK
ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- OK
SET SESSION AUTHORIZATION regress_alter_user2;
CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
ALTER STATISTICS alt_stat3 RENAME TO alt_stat4; -- failed (not owner)
ERROR: must be owner of statistics alt_stat3
ALTER STATISTICS alt_stat1 RENAME TO alt_stat4; -- OK
ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner)
ERROR: must be owner of statistics alt_stat3
ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership)
ERROR: must be member of role "regress_alter_user3"
ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2; -- failed (not owner)
ERROR: must be owner of statistics alt_stat3
ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- failed (name conflict)
ERROR: statistics "alt_stat2" already exists in schema "alt_nsp2"
RESET SESSION AUTHORIZATION;
SELECT nspname, staname, rolname
FROM pg_statistic_ext s, pg_namespace n, pg_authid a
WHERE s.stanamespace = n.oid AND s.staowner = a.oid
AND n.nspname in ('alt_nsp1', 'alt_nsp2')
ORDER BY nspname, staname;
nspname | staname | rolname
----------+-----------+---------------------
alt_nsp1 | alt_stat2 | regress_alter_user2
alt_nsp1 | alt_stat3 | regress_alter_user1
alt_nsp1 | alt_stat4 | regress_alter_user2
alt_nsp2 | alt_stat2 | regress_alter_user3
(4 rows)
--
-- Text Search Dictionary
--
@ -639,7 +681,7 @@ DROP LANGUAGE alt_lang3 CASCADE;
DROP LANGUAGE alt_lang4 CASCADE;
ERROR: language "alt_lang4" does not exist
DROP SCHEMA alt_nsp1 CASCADE;
NOTICE: drop cascades to 26 other objects
NOTICE: drop cascades to 27 other objects
DETAIL: drop cascades to function alt_func3(integer)
drop cascades to function alt_agg3(integer)
drop cascades to function alt_func4(integer)
@ -656,6 +698,7 @@ drop cascades to operator family alt_opc1 for access method hash
drop cascades to operator family alt_opc2 for access method hash
drop cascades to operator family alt_opf4 for access method hash
drop cascades to operator family alt_opf2 for access method hash
drop cascades to table alt_regress_1
drop cascades to text search dictionary alt_ts_dict3
drop cascades to text search dictionary alt_ts_dict4
drop cascades to text search dictionary alt_ts_dict2

View File

@ -39,6 +39,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL (
CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable;
CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT);
WARNING: tables were not subscribed, you will have to run ALTER SUBSCRIPTION ... REFRESH PUBLICATION to subscribe the tables
CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable;
-- test some error cases
SELECT pg_get_object_address('stone', '{}', '{}');
ERROR: unrecognized object type "stone"
@ -409,7 +410,8 @@ WITH objects (type, name, args) AS (VALUES
('access method', '{btree}', '{}'),
('publication', '{addr_pub}', '{}'),
('publication relation', '{addr_nsp, gentable}', '{addr_pub}'),
('subscription', '{addr_sub}', '{}')
('subscription', '{addr_sub}', '{}'),
('statistics', '{addr_nsp, gentable_stat}', '{}')
)
SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
-- test roundtrip through pg_identify_object_as_address
@ -457,6 +459,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
trigger | | | t on addr_nsp.gentable | t
operator family | pg_catalog | integer_ops | pg_catalog.integer_ops USING btree | t
policy | | | genpol on addr_nsp.gentable | t
statistics | addr_nsp | gentable_stat | addr_nsp.gentable_stat | t
collation | pg_catalog | "default" | pg_catalog."default" | t
transform | | | for integer on language sql | t
text search dictionary | addr_nsp | addr_ts_dict | addr_nsp.addr_ts_dict | t
@ -466,7 +469,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
subscription | | addr_sub | addr_sub | t
publication | | addr_pub | addr_pub | t
publication relation | | | gentable in publication addr_pub | t
(45 rows)
(46 rows)
---
--- Cleanup resources

View File

@ -823,11 +823,12 @@ WHERE c.castmethod = 'b' AND
text | character | 0 | i
character varying | character | 0 | i
pg_node_tree | text | 0 | i
pg_ndistinct | bytea | 0 | i
cidr | inet | 0 | i
xml | text | 0 | a
xml | character varying | 0 | a
xml | character | 0 | a
(7 rows)
(8 rows)
-- **************** pg_conversion ****************
-- Look for illegal values in pg_conversion fields.

View File

@ -2164,6 +2164,14 @@ pg_stats| SELECT n.nspname AS schemaname,
JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum))))
LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid))));
pg_stats_ext| SELECT n.nspname AS schemaname,
c.relname AS tablename,
s.staname,
s.stakeys AS attnums,
length((s.standistinct)::text) AS ndistbytes
FROM ((pg_statistic_ext s
JOIN pg_class c ON ((c.oid = s.starelid)))
LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)));
pg_tables| SELECT n.nspname AS schemaname,
c.relname AS tablename,
pg_get_userbyid(c.relowner) AS tableowner,

View File

@ -142,6 +142,7 @@ pg_shdepend|t
pg_shdescription|t
pg_shseclabel|t
pg_statistic|t
pg_statistic_ext|t
pg_subscription|t
pg_subscription_rel|t
pg_tablespace|t

View File

@ -0,0 +1,155 @@
-- Generic extended statistics support
-- Ensure stats are dropped sanely
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
DROP STATISTICS ab1_a_b_stats;
CREATE SCHEMA regress_schema_2;
CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1;
DROP STATISTICS regress_schema_2.ab1_a_b_stats;
-- Ensure statistics are dropped when columns are
CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1;
CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1;
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
ALTER TABLE ab1 DROP COLUMN a;
\d ab1
Table "public.ab1"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
b | integer | | |
c | integer | | |
Statistics:
"public.ab1_b_c_stats" WITH (ndistinct) ON (b, c)
DROP TABLE ab1;
-- Ensure things work sanely with SET STATISTICS 0
CREATE TABLE ab1 (a INTEGER, b INTEGER);
ALTER TABLE ab1 ALTER a SET STATISTICS 0;
INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
ANALYZE ab1;
ERROR: extended statistics could not be collected for column "a" of relation public.ab1
HINT: Consider ALTER TABLE "public"."ab1" ALTER "a" SET STATISTICS -1
ALTER TABLE ab1 ALTER a SET STATISTICS -1;
ANALYZE ab1;
DROP TABLE ab1;
-- n-distinct tests
CREATE TABLE ndistinct (
filler1 TEXT,
filler2 NUMERIC,
a INT,
b INT,
filler3 DATE,
c INT,
d INT
);
-- unknown column
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
ERROR: column "unknown_column" referenced in statistics does not exist
-- single column
CREATE STATISTICS s10 ON (a) FROM ndistinct;
ERROR: statistics require at least 2 columns
-- single column, duplicated
CREATE STATISTICS s10 ON (a,a) FROM ndistinct;
ERROR: duplicate column name in statistics definition
-- two columns, one duplicated
CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
ERROR: duplicate column name in statistics definition
-- correct command
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
-- perfectly correlated groups
INSERT INTO ndistinct (a, b, c, filler1)
SELECT i/100, i/100, i/100, cash_words(i::money)
FROM generate_series(1,10000) s(i);
ANALYZE ndistinct;
SELECT staenabled, standistinct
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
staenabled | standistinct
------------+------------------------------------------------------------------------------------------------
{d} | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}]
(1 row)
EXPLAIN (COSTS off)
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
QUERY PLAN
-----------------------------
HashAggregate
Group Key: a, b
-> Seq Scan on ndistinct
(3 rows)
EXPLAIN (COSTS off)
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
QUERY PLAN
-----------------------------
HashAggregate
Group Key: a, b, c
-> Seq Scan on ndistinct
(3 rows)
EXPLAIN (COSTS off)
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
QUERY PLAN
-----------------------------
HashAggregate
Group Key: a, b, c, d
-> Seq Scan on ndistinct
(3 rows)
TRUNCATE TABLE ndistinct;
-- partially correlated groups
INSERT INTO ndistinct (a, b, c)
SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
ANALYZE ndistinct;
SELECT staenabled, standistinct
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
staenabled | standistinct
------------+------------------------------------------------------------------------------------------------
{d} | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}]
(1 row)
EXPLAIN
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
QUERY PLAN
---------------------------------------------------------------------
HashAggregate (cost=230.00..232.01 rows=201 width=16)
Group Key: a, b
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8)
(3 rows)
EXPLAIN
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
QUERY PLAN
----------------------------------------------------------------------
HashAggregate (cost=255.00..257.01 rows=201 width=20)
Group Key: a, b, c
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12)
(3 rows)
EXPLAIN
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
QUERY PLAN
----------------------------------------------------------------------
HashAggregate (cost=280.00..290.00 rows=1000 width=24)
Group Key: a, b, c, d
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=16)
(3 rows)
EXPLAIN
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
QUERY PLAN
----------------------------------------------------------------------
HashAggregate (cost=255.00..265.00 rows=1000 width=20)
Group Key: b, c, d
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12)
(3 rows)
EXPLAIN
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
QUERY PLAN
---------------------------------------------------------------------
HashAggregate (cost=230.00..240.00 rows=1000 width=16)
Group Key: a, d
-> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8)
(3 rows)
DROP TABLE ndistinct;

View File

@ -59,7 +59,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR
-- Look for types that should have an array type according to their typtype,
-- but don't. We exclude composites here because we have not bothered to
-- make array types corresponding to the system catalogs' rowtypes.
-- NOTE: as of v10, this check finds pg_node_tree and smgr.
-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr.
SELECT p1.oid, p1.typname
FROM pg_type as p1
WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
@ -67,11 +67,12 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
(SELECT 1 FROM pg_type as p2
WHERE p2.typname = ('_' || p1.typname)::name AND
p2.typelem = p1.oid and p1.typarray = p2.oid);
oid | typname
-----+--------------
194 | pg_node_tree
210 | smgr
(2 rows)
oid | typname
------+--------------
194 | pg_node_tree
3361 | pg_ndistinct
210 | smgr
(3 rows)
-- Make sure typarray points to a varlena array type of our own base
SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype,

View File

@ -89,7 +89,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview
# ----------
# Another group of parallel tests
# ----------
test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan
test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan stats_ext
# rules cannot run concurrently with any test that creates a view
test: rules psql_crosstab amutils

View File

@ -130,6 +130,7 @@ test: misc_functions
test: sysviews
test: tsrf
test: tidscan
test: stats_ext
test: rules
test: psql_crosstab
test: select_parallel

View File

@ -433,6 +433,37 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD
ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4);
DROP OPERATOR FAMILY alt_opf18 USING btree;
--
-- Statistics
--
SET SESSION AUTHORIZATION regress_alter_user1;
CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER);
CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
ALTER STATISTICS alt_stat1 RENAME TO alt_stat2; -- failed (name conflict)
ALTER STATISTICS alt_stat1 RENAME TO alt_stat3; -- failed (name conflict)
ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2; -- failed (no role membership)
ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- OK
ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- OK
SET SESSION AUTHORIZATION regress_alter_user2;
CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1;
CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1;
ALTER STATISTICS alt_stat3 RENAME TO alt_stat4; -- failed (not owner)
ALTER STATISTICS alt_stat1 RENAME TO alt_stat4; -- OK
ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner)
ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership)
ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2; -- failed (not owner)
ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- failed (name conflict)
RESET SESSION AUTHORIZATION;
SELECT nspname, staname, rolname
FROM pg_statistic_ext s, pg_namespace n, pg_authid a
WHERE s.stanamespace = n.oid AND s.staowner = a.oid
AND n.nspname in ('alt_nsp1', 'alt_nsp2')
ORDER BY nspname, staname;
--
-- Text Search Dictionary

View File

@ -41,6 +41,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL (
TO SQL WITH FUNCTION int4recv(internal));
CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable;
CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT);
CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable;
-- test some error cases
SELECT pg_get_object_address('stone', '{}', '{}');
@ -185,7 +186,8 @@ WITH objects (type, name, args) AS (VALUES
('access method', '{btree}', '{}'),
('publication', '{addr_pub}', '{}'),
('publication relation', '{addr_nsp, gentable}', '{addr_pub}'),
('subscription', '{addr_sub}', '{}')
('subscription', '{addr_sub}', '{}'),
('statistics', '{addr_nsp, gentable_stat}', '{}')
)
SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
-- test roundtrip through pg_identify_object_as_address

View File

@ -0,0 +1,102 @@
-- Generic extended statistics support
-- Ensure stats are dropped sanely
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
DROP STATISTICS ab1_a_b_stats;
CREATE SCHEMA regress_schema_2;
CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1;
DROP STATISTICS regress_schema_2.ab1_a_b_stats;
-- Ensure statistics are dropped when columns are
CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1;
CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1;
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
ALTER TABLE ab1 DROP COLUMN a;
\d ab1
DROP TABLE ab1;
-- Ensure things work sanely with SET STATISTICS 0
CREATE TABLE ab1 (a INTEGER, b INTEGER);
ALTER TABLE ab1 ALTER a SET STATISTICS 0;
INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1;
ANALYZE ab1;
ALTER TABLE ab1 ALTER a SET STATISTICS -1;
ANALYZE ab1;
DROP TABLE ab1;
-- n-distinct tests
CREATE TABLE ndistinct (
filler1 TEXT,
filler2 NUMERIC,
a INT,
b INT,
filler3 DATE,
c INT,
d INT
);
-- unknown column
CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct;
-- single column
CREATE STATISTICS s10 ON (a) FROM ndistinct;
-- single column, duplicated
CREATE STATISTICS s10 ON (a,a) FROM ndistinct;
-- two columns, one duplicated
CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct;
-- correct command
CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct;
-- perfectly correlated groups
INSERT INTO ndistinct (a, b, c, filler1)
SELECT i/100, i/100, i/100, cash_words(i::money)
FROM generate_series(1,10000) s(i);
ANALYZE ndistinct;
SELECT staenabled, standistinct
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
EXPLAIN (COSTS off)
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
EXPLAIN (COSTS off)
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
EXPLAIN (COSTS off)
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
TRUNCATE TABLE ndistinct;
-- partially correlated groups
INSERT INTO ndistinct (a, b, c)
SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i);
ANALYZE ndistinct;
SELECT staenabled, standistinct
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
EXPLAIN
SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
EXPLAIN
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
EXPLAIN
SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
EXPLAIN
SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
EXPLAIN
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
DROP TABLE ndistinct;

View File

@ -53,7 +53,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR
-- Look for types that should have an array type according to their typtype,
-- but don't. We exclude composites here because we have not bothered to
-- make array types corresponding to the system catalogs' rowtypes.
-- NOTE: as of v10, this check finds pg_node_tree and smgr.
-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr.
SELECT p1.oid, p1.typname
FROM pg_type as p1