pgstat: add pg_stat_force_next_flush(), use it to simplify tests.

In the stats collector days it was hard to write tests for the stats system,
because fundamentally delivery of stats messages over UDP was not
synchronous (nor guaranteed). Now we easily can force pending stats updates to
be flushed synchronously.

This moves stats.sql into a parallel group, there isn't a reason for it to run
in isolation anymore. And it may shake out some bugs.

Bumps catversion.

Author: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20220303021600.hs34ghqcw6zcokdh@alap3.anarazel.de
This commit is contained in:
Andres Freund 2022-04-06 23:35:56 -07:00
parent 5e07d3d6bd
commit 0f96965c65
11 changed files with 103 additions and 354 deletions

View File

@ -7,50 +7,6 @@ SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_stats', '
(1 row)
CREATE TABLE stats_test(data text);
-- function to wait for counters to advance
CREATE FUNCTION wait_for_decode_stats(check_reset bool, check_spill_txns bool) RETURNS void AS $$
DECLARE
start_time timestamptz := clock_timestamp();
updated bool;
BEGIN
-- we don't want to wait forever; loop will exit after 30 seconds
FOR i IN 1 .. 300 LOOP
IF check_spill_txns THEN
-- check to see if all updates have been reset/updated
SELECT CASE WHEN check_reset THEN (spill_txns = 0)
ELSE (spill_txns > 0)
END
INTO updated
FROM pg_stat_replication_slots WHERE slot_name='regression_slot_stats';
ELSE
-- check to see if all updates have been reset/updated
SELECT CASE WHEN check_reset THEN (total_txns = 0)
ELSE (total_txns > 0)
END
INTO updated
FROM pg_stat_replication_slots WHERE slot_name='regression_slot_stats';
END IF;
exit WHEN updated;
-- wait a little
perform pg_sleep_for('100 milliseconds');
-- reset stats snapshot so we can test again
perform pg_stat_clear_snapshot();
END LOOP;
-- report time waited in postmaster log (where it won't change test output)
RAISE LOG 'wait_for_decode_stats delayed % seconds',
extract(epoch from clock_timestamp() - start_time);
END
$$ LANGUAGE plpgsql;
-- non-spilled xact
SET logical_decoding_work_mem to '64MB';
INSERT INTO stats_test values(1);
@ -60,9 +16,9 @@ SELECT count(*) FROM pg_logical_slot_get_changes('regression_slot_stats', NULL,
3
(1 row)
SELECT wait_for_decode_stats(false, false);
wait_for_decode_stats
-----------------------
SELECT pg_stat_force_next_flush();
pg_stat_force_next_flush
--------------------------
(1 row)
@ -73,19 +29,13 @@ SELECT slot_name, spill_txns = 0 AS spill_txns, spill_count = 0 AS spill_count,
(1 row)
RESET logical_decoding_work_mem;
-- reset the slot stats, and wait for stats collector's total txn to reset
-- reset the slot stats
SELECT pg_stat_reset_replication_slot('regression_slot_stats');
pg_stat_reset_replication_slot
--------------------------------
(1 row)
SELECT wait_for_decode_stats(true, false);
wait_for_decode_stats
-----------------------
(1 row)
SELECT slot_name, spill_txns, spill_count, total_txns, total_bytes FROM pg_stat_replication_slots;
slot_name | spill_txns | spill_count | total_txns | total_bytes
-----------------------+------------+-------------+------------+-------------
@ -102,12 +52,12 @@ SELECT count(*) FROM pg_logical_slot_peek_changes('regression_slot_stats', NULL,
5002
(1 row)
-- Check stats, wait for the stats collector to update. We can't test the
-- exact stats count as that can vary if any background transaction (say by
-- autovacuum) happens in parallel to the main transaction.
SELECT wait_for_decode_stats(false, true);
wait_for_decode_stats
-----------------------
-- Check stats. We can't test the exact stats count as that can vary if any
-- background transaction (say by autovacuum) happens in parallel to the main
-- transaction.
SELECT pg_stat_force_next_flush();
pg_stat_force_next_flush
--------------------------
(1 row)
@ -133,7 +83,6 @@ SELECT slot_name FROM pg_stat_replication_slots;
(1 row)
COMMIT;
DROP FUNCTION wait_for_decode_stats(bool, bool);
DROP TABLE stats_test;
SELECT pg_drop_replication_slot('regression_slot_stats');
pg_drop_replication_slot

View File

@ -5,62 +5,16 @@ SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_stats', '
CREATE TABLE stats_test(data text);
-- function to wait for counters to advance
CREATE FUNCTION wait_for_decode_stats(check_reset bool, check_spill_txns bool) RETURNS void AS $$
DECLARE
start_time timestamptz := clock_timestamp();
updated bool;
BEGIN
-- we don't want to wait forever; loop will exit after 30 seconds
FOR i IN 1 .. 300 LOOP
IF check_spill_txns THEN
-- check to see if all updates have been reset/updated
SELECT CASE WHEN check_reset THEN (spill_txns = 0)
ELSE (spill_txns > 0)
END
INTO updated
FROM pg_stat_replication_slots WHERE slot_name='regression_slot_stats';
ELSE
-- check to see if all updates have been reset/updated
SELECT CASE WHEN check_reset THEN (total_txns = 0)
ELSE (total_txns > 0)
END
INTO updated
FROM pg_stat_replication_slots WHERE slot_name='regression_slot_stats';
END IF;
exit WHEN updated;
-- wait a little
perform pg_sleep_for('100 milliseconds');
-- reset stats snapshot so we can test again
perform pg_stat_clear_snapshot();
END LOOP;
-- report time waited in postmaster log (where it won't change test output)
RAISE LOG 'wait_for_decode_stats delayed % seconds',
extract(epoch from clock_timestamp() - start_time);
END
$$ LANGUAGE plpgsql;
-- non-spilled xact
SET logical_decoding_work_mem to '64MB';
INSERT INTO stats_test values(1);
SELECT count(*) FROM pg_logical_slot_get_changes('regression_slot_stats', NULL, NULL, 'skip-empty-xacts', '1');
SELECT wait_for_decode_stats(false, false);
SELECT pg_stat_force_next_flush();
SELECT slot_name, spill_txns = 0 AS spill_txns, spill_count = 0 AS spill_count, total_txns > 0 AS total_txns, total_bytes > 0 AS total_bytes FROM pg_stat_replication_slots;
RESET logical_decoding_work_mem;
-- reset the slot stats, and wait for stats collector's total txn to reset
-- reset the slot stats
SELECT pg_stat_reset_replication_slot('regression_slot_stats');
SELECT wait_for_decode_stats(true, false);
SELECT slot_name, spill_txns, spill_count, total_txns, total_bytes FROM pg_stat_replication_slots;
-- spilling the xact
@ -69,10 +23,10 @@ INSERT INTO stats_test SELECT 'serialize-topbig--1:'||g.i FROM generate_series(1
COMMIT;
SELECT count(*) FROM pg_logical_slot_peek_changes('regression_slot_stats', NULL, NULL, 'skip-empty-xacts', '1');
-- Check stats, wait for the stats collector to update. We can't test the
-- exact stats count as that can vary if any background transaction (say by
-- autovacuum) happens in parallel to the main transaction.
SELECT wait_for_decode_stats(false, true);
-- Check stats. We can't test the exact stats count as that can vary if any
-- background transaction (say by autovacuum) happens in parallel to the main
-- transaction.
SELECT pg_stat_force_next_flush();
SELECT slot_name, spill_txns > 0 AS spill_txns, spill_count > 0 AS spill_count FROM pg_stat_replication_slots;
-- Ensure stats can be repeatedly accessed using the same stats snapshot. See
@ -82,6 +36,5 @@ SELECT slot_name FROM pg_stat_replication_slots;
SELECT slot_name FROM pg_stat_replication_slots;
COMMIT;
DROP FUNCTION wait_for_decode_stats(bool, bool);
DROP TABLE stats_test;
SELECT pg_drop_replication_slot('regression_slot_stats');

View File

@ -219,6 +219,12 @@ static MemoryContext pgStatPendingContext = NULL;
static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
/*
* Force the next stats flush to happen regardless of
* PGSTAT_MIN_INTERVAL. Useful in test scripts.
*/
static bool pgStatForceNextFlush = false;
/*
* For assertions that check pgstat is not used before initialization / after
* shutdown.
@ -560,6 +566,13 @@ pgstat_report_stat(bool force)
pgstat_assert_is_up();
Assert(!IsTransactionBlock());
/* "absorb" the forced flush even if there's nothing to flush */
if (pgStatForceNextFlush)
{
force = true;
pgStatForceNextFlush = false;
}
/* Don't expend a clock check if nothing to do */
if (dlist_is_empty(&pgStatPending) &&
!have_slrustats &&
@ -637,6 +650,16 @@ pgstat_report_stat(bool force)
return 0;
}
/*
* Force locally pending stats to be flushed during the next
* pgstat_report_stat() call. This is useful for writing tests.
*/
void
pgstat_force_next_flush(void)
{
pgStatForceNextFlush = true;
}
/*
* Only for use by pgstat_reset_counters()
*/

View File

@ -2067,6 +2067,16 @@ pg_stat_clear_snapshot(PG_FUNCTION_ARGS)
}
/* Force statistics to be reported at the next occasion */
Datum
pg_stat_force_next_flush(PG_FUNCTION_ARGS)
{
pgstat_force_next_flush();
PG_RETURN_VOID();
}
/* Reset all counters for the current database */
Datum
pg_stat_reset(PG_FUNCTION_ARGS)

View File

@ -5744,6 +5744,11 @@
proname => 'pg_stat_clear_snapshot', proisstrict => 'f', provolatile => 'v',
proparallel => 'r', prorettype => 'void', proargtypes => '',
prosrc => 'pg_stat_clear_snapshot' },
{ oid => '2137',
descr => 'statistics: force stats to be flushed after the next commit',
proname => 'pg_stat_force_next_flush', proisstrict => 'f', provolatile => 'v',
proparallel => 'r', prorettype => 'void', proargtypes => '',
prosrc => 'pg_stat_force_next_flush' },
{ oid => '2274',
descr => 'statistics: reset collected statistics for current database',
proname => 'pg_stat_reset', proisstrict => 'f', provolatile => 'v',

View File

@ -414,6 +414,7 @@ extern void pgstat_initialize(void);
/* Functions called from backends */
extern long pgstat_report_stat(bool force);
extern void pgstat_force_next_flush(void);
extern void pgstat_reset_counters(void);
extern void pgstat_reset(PgStat_Kind kind, Oid dboid, Oid objectid);

View File

@ -603,3 +603,25 @@ SELECT COUNT(*) FROM brin_hot_2 WHERE a = 2 AND b = 100;
1
(1 row)
-- test BRIN index doesn't block HOT update
CREATE TABLE brin_hot (
id integer PRIMARY KEY,
val integer NOT NULL
) WITH (autovacuum_enabled = off, fillfactor = 70);
INSERT INTO brin_hot SELECT *, 0 FROM generate_series(1, 235);
CREATE INDEX val_brin ON brin_hot using brin(val);
UPDATE brin_hot SET val = -3 WHERE id = 42;
-- ensure pending stats are flushed
SELECT pg_stat_force_next_flush();
pg_stat_force_next_flush
--------------------------
(1 row)
SELECT pg_stat_get_tuples_hot_updated('brin_hot'::regclass::oid);
pg_stat_get_tuples_hot_updated
--------------------------------
1
(1 row)
DROP TABLE brin_hot;

View File

@ -28,63 +28,6 @@ SELECT t.seq_scan, t.seq_tup_read, t.idx_scan, t.idx_tup_fetch,
pg_catalog.pg_statio_user_tables AS b
WHERE t.relname='tenk2' AND b.relname='tenk2';
COMMIT;
-- function to wait for counters to advance
create function wait_for_stats() returns void as $$
declare
start_time timestamptz := clock_timestamp();
updated1 bool;
updated2 bool;
updated3 bool;
updated4 bool;
begin
SET LOCAL stats_fetch_consistency = snapshot;
-- We don't want to wait forever. No timeout suffices if the OS drops our
-- stats traffic because an earlier test file left a full UDP buffer.
-- Hence, don't use PG_TEST_TIMEOUT_DEFAULT, which may be large for
-- can't-happen timeouts. Exit after 30 seconds.
for i in 1 .. 300 loop
-- With parallel query, the seqscan and indexscan on tenk2 might be done
-- in parallel worker processes, which will send their stats counters
-- asynchronously to what our own session does. So we must check for
-- those counts to be registered separately from the update counts.
-- check to see if seqscan has been sensed
SELECT (st.seq_scan >= pr.seq_scan + 1) INTO updated1
FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr
WHERE st.relname='tenk2' AND cl.relname='tenk2';
-- check to see if indexscan has been sensed
SELECT (st.idx_scan >= pr.idx_scan + 1) INTO updated2
FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr
WHERE st.relname='tenk2' AND cl.relname='tenk2';
-- check to see if all updates have been sensed
SELECT (n_tup_ins > 0) INTO updated3
FROM pg_stat_user_tables WHERE relname='trunc_stats_test4';
-- We must also check explicitly that pg_stat_get_snapshot_timestamp has
-- advanced, because that comes from the global stats file which might
-- be older than the per-DB stats file we got the other values from.
SELECT (pr.snap_ts < pg_stat_get_snapshot_timestamp()) INTO updated4
FROM prevstats AS pr;
exit when updated1 and updated2 and updated3 and updated4;
-- wait a little
perform pg_sleep_for('100 milliseconds');
-- reset stats snapshot so we can test again
perform pg_stat_clear_snapshot();
end loop;
-- report time waited in postmaster log (where it won't change test output)
raise log 'wait_for_stats delayed % seconds',
extract(epoch from clock_timestamp() - start_time);
end
$$ language plpgsql;
-- test effects of TRUNCATE on n_live_tup/n_dead_tup counters
CREATE TABLE trunc_stats_test(id serial);
CREATE TABLE trunc_stats_test1(id serial, stuff text);
@ -153,17 +96,10 @@ SELECT count(*) FROM tenk2 WHERE unique1 = 1;
(1 row)
RESET enable_bitmapscan;
-- We can't just call wait_for_stats() at this point, because we only
-- transmit stats when the session goes idle, and we probably didn't
-- transmit the last couple of counts yet thanks to the rate-limiting logic
-- in pgstat_report_stat(). But instead of waiting for the rate limiter's
-- timeout to elapse, let's just start a new session. The old one will
-- then send its stats before dying.
\c -
-- wait for stats collector to update
SELECT wait_for_stats();
wait_for_stats
----------------
-- ensure pending stats are flushed
SELECT pg_stat_force_next_flush();
pg_stat_force_next_flush
--------------------------
(1 row)
@ -212,57 +148,6 @@ FROM prevstats AS pr;
COMMIT;
DROP TABLE trunc_stats_test, trunc_stats_test1, trunc_stats_test2, trunc_stats_test3, trunc_stats_test4;
DROP TABLE prevstats;
-- test BRIN index doesn't block HOT update - we include this test here, as it
-- relies on statistics collector and so it may occasionally fail, especially
-- on slower systems
CREATE TABLE brin_hot (
id integer PRIMARY KEY,
val integer NOT NULL
) WITH (autovacuum_enabled = off, fillfactor = 70);
INSERT INTO brin_hot SELECT *, 0 FROM generate_series(1, 235);
CREATE INDEX val_brin ON brin_hot using brin(val);
CREATE FUNCTION wait_for_hot_stats() RETURNS void AS $$
DECLARE
start_time timestamptz := clock_timestamp();
updated bool;
BEGIN
-- we don't want to wait forever; loop will exit after 30 seconds
FOR i IN 1 .. 300 LOOP
SELECT (pg_stat_get_tuples_hot_updated('brin_hot'::regclass::oid) > 0) INTO updated;
EXIT WHEN updated;
-- wait a little
PERFORM pg_sleep_for('100 milliseconds');
-- reset stats snapshot so we can test again
PERFORM pg_stat_clear_snapshot();
END LOOP;
-- report time waited in postmaster log (where it won't change test output)
RAISE log 'wait_for_hot_stats delayed % seconds',
EXTRACT(epoch FROM clock_timestamp() - start_time);
END
$$ LANGUAGE plpgsql;
UPDATE brin_hot SET val = -3 WHERE id = 42;
-- We can't just call wait_for_hot_stats() at this point, because we only
-- transmit stats when the session goes idle, and we probably didn't
-- transmit the last couple of counts yet thanks to the rate-limiting logic
-- in pgstat_report_stat(). But instead of waiting for the rate limiter's
-- timeout to elapse, let's just start a new session. The old one will
-- then send its stats before dying.
\c -
SELECT wait_for_hot_stats();
wait_for_hot_stats
--------------------
(1 row)
SELECT pg_stat_get_tuples_hot_updated('brin_hot'::regclass::oid);
pg_stat_get_tuples_hot_updated
--------------------------------
1
(1 row)
DROP TABLE brin_hot;
DROP FUNCTION wait_for_hot_stats();
-- ensure that stats accessors handle NULL input correctly
SELECT pg_stat_get_replication_slot(NULL);
pg_stat_get_replication_slot

View File

@ -124,7 +124,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
# ----------
# Another group of parallel tests
# ----------
test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize
test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats
# event_trigger cannot run concurrently with any test that runs DDL
# oidjoins is read-only, though, and should run late for best coverage
@ -132,6 +132,3 @@ test: event_trigger oidjoins
# this test also uses event triggers, so likewise run it by itself
test: fast_default
# run stats by itself because its delay may be insufficient under heavy load
test: stats

View File

@ -526,3 +526,22 @@ SET enable_seqscan = off;
EXPLAIN (COSTS OFF) SELECT * FROM brin_hot_2 WHERE a = 2 AND b = 100;
SELECT COUNT(*) FROM brin_hot_2 WHERE a = 2 AND b = 100;
-- test BRIN index doesn't block HOT update
CREATE TABLE brin_hot (
id integer PRIMARY KEY,
val integer NOT NULL
) WITH (autovacuum_enabled = off, fillfactor = 70);
INSERT INTO brin_hot SELECT *, 0 FROM generate_series(1, 235);
CREATE INDEX val_brin ON brin_hot using brin(val);
UPDATE brin_hot SET val = -3 WHERE id = 42;
-- ensure pending stats are flushed
SELECT pg_stat_force_next_flush();
SELECT pg_stat_get_tuples_hot_updated('brin_hot'::regclass::oid);
DROP TABLE brin_hot;

View File

@ -27,64 +27,6 @@ SELECT t.seq_scan, t.seq_tup_read, t.idx_scan, t.idx_tup_fetch,
WHERE t.relname='tenk2' AND b.relname='tenk2';
COMMIT;
-- function to wait for counters to advance
create function wait_for_stats() returns void as $$
declare
start_time timestamptz := clock_timestamp();
updated1 bool;
updated2 bool;
updated3 bool;
updated4 bool;
begin
SET LOCAL stats_fetch_consistency = snapshot;
-- We don't want to wait forever. No timeout suffices if the OS drops our
-- stats traffic because an earlier test file left a full UDP buffer.
-- Hence, don't use PG_TEST_TIMEOUT_DEFAULT, which may be large for
-- can't-happen timeouts. Exit after 30 seconds.
for i in 1 .. 300 loop
-- With parallel query, the seqscan and indexscan on tenk2 might be done
-- in parallel worker processes, which will send their stats counters
-- asynchronously to what our own session does. So we must check for
-- those counts to be registered separately from the update counts.
-- check to see if seqscan has been sensed
SELECT (st.seq_scan >= pr.seq_scan + 1) INTO updated1
FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr
WHERE st.relname='tenk2' AND cl.relname='tenk2';
-- check to see if indexscan has been sensed
SELECT (st.idx_scan >= pr.idx_scan + 1) INTO updated2
FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr
WHERE st.relname='tenk2' AND cl.relname='tenk2';
-- check to see if all updates have been sensed
SELECT (n_tup_ins > 0) INTO updated3
FROM pg_stat_user_tables WHERE relname='trunc_stats_test4';
-- We must also check explicitly that pg_stat_get_snapshot_timestamp has
-- advanced, because that comes from the global stats file which might
-- be older than the per-DB stats file we got the other values from.
SELECT (pr.snap_ts < pg_stat_get_snapshot_timestamp()) INTO updated4
FROM prevstats AS pr;
exit when updated1 and updated2 and updated3 and updated4;
-- wait a little
perform pg_sleep_for('100 milliseconds');
-- reset stats snapshot so we can test again
perform pg_stat_clear_snapshot();
end loop;
-- report time waited in postmaster log (where it won't change test output)
raise log 'wait_for_stats delayed % seconds',
extract(epoch from clock_timestamp() - start_time);
end
$$ language plpgsql;
-- test effects of TRUNCATE on n_live_tup/n_dead_tup counters
CREATE TABLE trunc_stats_test(id serial);
CREATE TABLE trunc_stats_test1(id serial, stuff text);
@ -151,16 +93,8 @@ SET enable_bitmapscan TO off;
SELECT count(*) FROM tenk2 WHERE unique1 = 1;
RESET enable_bitmapscan;
-- We can't just call wait_for_stats() at this point, because we only
-- transmit stats when the session goes idle, and we probably didn't
-- transmit the last couple of counts yet thanks to the rate-limiting logic
-- in pgstat_report_stat(). But instead of waiting for the rate limiter's
-- timeout to elapse, let's just start a new session. The old one will
-- then send its stats before dying.
\c -
-- wait for stats collector to update
SELECT wait_for_stats();
-- ensure pending stats are flushed
SELECT pg_stat_force_next_flush();
-- check effects
BEGIN;
@ -190,55 +124,6 @@ COMMIT;
DROP TABLE trunc_stats_test, trunc_stats_test1, trunc_stats_test2, trunc_stats_test3, trunc_stats_test4;
DROP TABLE prevstats;
-- test BRIN index doesn't block HOT update - we include this test here, as it
-- relies on statistics collector and so it may occasionally fail, especially
-- on slower systems
CREATE TABLE brin_hot (
id integer PRIMARY KEY,
val integer NOT NULL
) WITH (autovacuum_enabled = off, fillfactor = 70);
INSERT INTO brin_hot SELECT *, 0 FROM generate_series(1, 235);
CREATE INDEX val_brin ON brin_hot using brin(val);
CREATE FUNCTION wait_for_hot_stats() RETURNS void AS $$
DECLARE
start_time timestamptz := clock_timestamp();
updated bool;
BEGIN
-- we don't want to wait forever; loop will exit after 30 seconds
FOR i IN 1 .. 300 LOOP
SELECT (pg_stat_get_tuples_hot_updated('brin_hot'::regclass::oid) > 0) INTO updated;
EXIT WHEN updated;
-- wait a little
PERFORM pg_sleep_for('100 milliseconds');
-- reset stats snapshot so we can test again
PERFORM pg_stat_clear_snapshot();
END LOOP;
-- report time waited in postmaster log (where it won't change test output)
RAISE log 'wait_for_hot_stats delayed % seconds',
EXTRACT(epoch FROM clock_timestamp() - start_time);
END
$$ LANGUAGE plpgsql;
UPDATE brin_hot SET val = -3 WHERE id = 42;
-- We can't just call wait_for_hot_stats() at this point, because we only
-- transmit stats when the session goes idle, and we probably didn't
-- transmit the last couple of counts yet thanks to the rate-limiting logic
-- in pgstat_report_stat(). But instead of waiting for the rate limiter's
-- timeout to elapse, let's just start a new session. The old one will
-- then send its stats before dying.
\c -
SELECT wait_for_hot_stats();
SELECT pg_stat_get_tuples_hot_updated('brin_hot'::regclass::oid);
DROP TABLE brin_hot;
DROP FUNCTION wait_for_hot_stats();
-- ensure that stats accessors handle NULL input correctly
SELECT pg_stat_get_replication_slot(NULL);
SELECT pg_stat_get_subscription_stats(NULL);