From aa087ec64f703a52f3c48c70117bb02e578f1802 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 13 Jun 2019 17:25:04 +0200 Subject: [PATCH] Add pg_stats_ext view for extended statistics Regular per-column statistics are stored in pg_statistics catalog, which is however rather difficult to read, so we also have pg_stats view with a human-reablable version of the data. For extended statistic the catalog was fairly easy to read, so we did not have such human-readable view so far. Commit 9b6babfa2d however did split the catalog into two, which makes querying harder. Furthermore, we want to show the multi-column MCV list in a way similar to per-column stats (and not as a bytea value). This commit introduces pg_stats_ext view, joining the two catalogs and massaging the data to produce human-readable output similar to pg_stats. It also considers RLS and access privileges - the data is shown only when the user has access to all columns the extended statistic is defined on. Bumped CATVERSION due to adding new system view. Author: Dean Rasheed, with improvements by me Reviewed-by: Dean Rasheed, John Naylor Discussion: https://postgr.es/m/CAEZATCUhT9rt7Ui%3DVdx4N%3D%3DVV5XOK5dsXfnGgVOz_JhAicB%3DZA%40mail.gmail.com --- doc/src/sgml/catalogs.sgml | 192 +++++++++++++++++++++++++++ src/backend/catalog/system_views.sql | 41 ++++++ src/include/catalog/catversion.h | 2 +- src/test/regress/expected/rules.out | 29 ++++ 4 files changed, 263 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index ef4345524a..1300c7bbaa 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -6373,6 +6373,28 @@ SCRAM-SHA-256$<iteration count>:&l about those tables that are readable by the current user. + + pg_statistic should not be readable by the + public, since even statistical information about a table's contents + might be considered sensitive. (Example: minimum and maximum values + of a salary column might be quite interesting.) + pg_stats + is a publicly readable view on + pg_statistic that only exposes information + about those tables that are readable by the current user. + + + + Similarly, pg_statistic_ext_data should not be + readable by the public, since the contents might be considered sensitive. + (Example: most common combination of values in columns might be quite + interesting.) + pg_stats_ext + is a publicly readable view on pg_statistic_ext_data + (after joining with pg_statistic_ext) that only exposes + information about those tables and columns that are readable by the current user. + + <structname>pg_statistic</structname> Columns @@ -8343,6 +8365,11 @@ SCRAM-SHA-256$<iteration count>:&l planner statistics + + pg_stats_ext + extended planner statistics + + pg_tables tables @@ -10922,6 +10949,171 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx + + <structname>pg_stats_ext</structname> + + + pg_stats_ext + + + + The view pg_stats_ext provides access to + the information stored in the pg_statistic_ext + and pg_statistic_ext_data + catalogs. This view allows access only to rows of + pg_statistic_ext and pg_statistic_ext_data + that correspond to tables the user has permission to read, and therefore + it is safe to allow public read access to this view. + + + + pg_stats_ext is also designed to present the + information in a more readable format than the underlying catalog + — at the cost that its schema must be extended whenever new types + of extended statistics are added to pg_statistic_ext. + + +
+ <structname>pg_stats_ext</structname> Columns + + + + + Name + Type + References + Description + + + + + schemaname + name + pg_namespace.nspname + Name of schema containing table + + + + tablename + name + pg_class.relname + Name of table + + + + statistics_schemaname + name + pg_namespace.nspname + Name of schema containing extended statistic + + + + statistics_name + name + pg_statistic_ext.stxname + Name of extended statistics + + + + statistics_owner + oid + pg_authid.oid + Owner of the extended statistics + + + + attnames + name[] + pg_attribute.attname + Names of the column the extended statistics is defined on + + + + kinds + text[] + + Types of exdended statistics enabled for this record + + + + n_distinct + pg_ndistinct + + N-distinct counts for combinations of columns. If greater than + zero, the estimated number of distinct values in the combination. If + less than zero, the negative of the number of distinct values divided + by the number of rows. + (The negated form is used when ANALYZE believes that + the number of distinct values is likely to increase as the table grows; + the positive form is used when the column seems to have a fixed number + of possible values.) For example, -1 indicates a unique combination of + columns in which the number of distinct combinations is the same as the + number of rows. + + + + + dependencies + pg_dependencies + + Functional dependency statistics + + + + most_common_vals + anyarray + + + A list of the most common combinations in the columns. (Null if + no values seem to be more common than any others.) + + + + + most_common_val_nulls + anyarray + + + A list of NULL flags for the most common combinations of values. + (Null when most_common_vals is.) + + + + + most_common_freqs + real[] + + + A list of the frequencies of the most common combinations, + i.e., number of occurrences of each divided by total number of rows. + (Null when most_common_vals is.) + + + + + most_common_base_freqs + real[] + + + A list of the base frequencies of the most common combinations, + i.e., product of per-value frequencies. + (Null when most_common_vals is.) + + + + +
+ + + The maximum number of entries in the array fields can be controlled on a + column-by-column basis using the ALTER TABLE SET STATISTICS + command, or globally by setting the + run-time parameter. + + + + <structname>pg_tables</structname> diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 78a103cdb9..c889890118 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -253,6 +253,47 @@ CREATE VIEW pg_stats WITH (security_barrier) AS REVOKE ALL on pg_statistic FROM public; +CREATE VIEW pg_stats_ext WITH (security_barrier) AS + SELECT cn.nspname AS schemaname, + c.relname AS tablename, + sn.nspname AS statistics_schemaname, + s.stxname AS statistics_name, + pg_get_userbyid(s.stxowner) AS statistics_owner, + ( SELECT array_agg(a.attname ORDER BY a.attnum) + FROM unnest(s.stxkeys) k + JOIN pg_attribute a + ON (a.attrelid = s.stxrelid AND a.attnum = k) + ) AS attnames, + s.stxkind AS kinds, + sd.stxdndistinct AS n_distinct, + sd.stxddependencies AS dependencies, + m.most_common_vals, + m.most_common_val_nulls, + m.most_common_freqs, + m.most_common_base_freqs + FROM pg_statistic_ext s JOIN pg_class c ON (c.oid = s.stxrelid) + JOIN pg_statistic_ext_data sd ON (s.oid = sd.stxoid) + LEFT JOIN pg_namespace cn ON (cn.oid = c.relnamespace) + LEFT JOIN pg_namespace sn ON (sn.oid = s.stxnamespace) + LEFT JOIN LATERAL + ( SELECT array_agg(values) AS most_common_vals, + array_agg(nulls) AS most_common_val_nulls, + array_agg(frequency) AS most_common_freqs, + array_agg(base_frequency) AS most_common_base_freqs + FROM pg_mcv_list_items(sd.stxdmcv) + ) m ON sd.stxdmcv IS NOT NULL + WHERE NOT EXISTS + ( SELECT 1 + FROM unnest(stxkeys) k + JOIN pg_attribute a + ON (a.attrelid = s.stxrelid AND a.attnum = k) + WHERE NOT has_column_privilege(c.oid, a.attnum, 'select') ) + AND (c.relrowsecurity = false OR NOT row_security_active(c.oid)); + +REVOKE ALL on pg_statistic_ext FROM public; +GRANT SELECT (oid, stxrelid, stxname, stxnamespace, stxowner, stxkeys, stxkind) + ON pg_statistic_ext TO public; + CREATE VIEW pg_publication_tables AS SELECT P.pubname AS pubname, diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index e034506751..ed20a4faaf 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201906151 +#define CATALOG_VERSION_NO 201906152 #endif diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 7d365c48d1..210e9cd146 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2284,6 +2284,35 @@ pg_stats| SELECT n.nspname AS schemaname, JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid)))); +pg_stats_ext| SELECT cn.nspname AS schemaname, + c.relname AS tablename, + sn.nspname AS statistics_schemaname, + s.stxname AS statistics_name, + pg_get_userbyid(s.stxowner) AS statistics_owner, + ( SELECT array_agg(a.attname ORDER BY a.attnum) AS array_agg + FROM (unnest(s.stxkeys) k(k) + JOIN pg_attribute a ON (((a.attrelid = s.stxrelid) AND (a.attnum = k.k))))) AS attnames, + s.stxkind AS kinds, + sd.stxdndistinct AS n_distinct, + sd.stxddependencies AS dependencies, + m.most_common_vals, + m.most_common_val_nulls, + m.most_common_freqs, + m.most_common_base_freqs + FROM (((((pg_statistic_ext s + JOIN pg_class c ON ((c.oid = s.stxrelid))) + JOIN pg_statistic_ext_data sd ON ((s.oid = sd.stxoid))) + LEFT JOIN pg_namespace cn ON ((cn.oid = c.relnamespace))) + LEFT JOIN pg_namespace sn ON ((sn.oid = s.stxnamespace))) + LEFT JOIN LATERAL ( SELECT array_agg(pg_mcv_list_items."values") AS most_common_vals, + array_agg(pg_mcv_list_items.nulls) AS most_common_val_nulls, + array_agg(pg_mcv_list_items.frequency) AS most_common_freqs, + array_agg(pg_mcv_list_items.base_frequency) AS most_common_base_freqs + FROM pg_mcv_list_items(sd.stxdmcv) pg_mcv_list_items(index, "values", nulls, frequency, base_frequency)) m ON ((sd.stxdmcv IS NOT NULL))) + WHERE ((NOT (EXISTS ( SELECT 1 + FROM (unnest(s.stxkeys) k(k) + JOIN pg_attribute a ON (((a.attrelid = s.stxrelid) AND (a.attnum = k.k)))) + WHERE (NOT has_column_privilege(c.oid, a.attnum, 'select'::text))))) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid)))); pg_tables| SELECT n.nspname AS schemaname, c.relname AS tablename, pg_get_userbyid(c.relowner) AS tableowner,