diff --git a/contrib/Makefile b/contrib/Makefile index c90fe29222..8dc40f7de0 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -50,6 +50,7 @@ SUBDIRS = \ spi \ tablefunc \ tcn \ + test_decoding \ test_parser \ test_shm_mq \ tsearch2 \ diff --git a/contrib/test_decoding/.gitignore b/contrib/test_decoding/.gitignore new file mode 100644 index 0000000000..5dcb3ff972 --- /dev/null +++ b/contrib/test_decoding/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/contrib/test_decoding/Makefile b/contrib/test_decoding/Makefile new file mode 100644 index 0000000000..c193f73786 --- /dev/null +++ b/contrib/test_decoding/Makefile @@ -0,0 +1,69 @@ +# contrib/test_decoding/Makefile + +MODULES = test_decoding +OBJS = test_decoding.o + +# Note: because we don't tell the Makefile there are any regression tests, +# we have to clean those result files explicitly +EXTRA_CLEAN = -r $(pg_regress_clean_files) + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/test_decoding +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +# Disabled because these tests require "wal_level=logical", which +# typical installcheck users do not have (e.g. buildfarm clients). +installcheck:; + +# But it can nonetheless be very helpful to run tests on preexisting +# installation, allow to do so, but only if requested explicitly. +installcheck-force: regresscheck-install-force isolationcheck-install-force + +check: regresscheck isolationcheck + +submake-regress: + $(MAKE) -C $(top_builddir)/src/test/regress all + +submake-isolation: + $(MAKE) -C $(top_builddir)/src/test/isolation all + +submake-test_decoding: + $(MAKE) -C $(top_builddir)/contrib/test_decoding + +REGRESSCHECKS=ddl rewrite toast permissions decoding_in_xact binary + +regresscheck: all | submake-regress submake-test_decoding + $(pg_regress_check) \ + --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf \ + --temp-install=./tmp_check \ + --extra-install=contrib/test_decoding \ + $(REGRESSCHECKS) + +regresscheck-install-force: | submake-regress submake-test_decoding + $(pg_regress_installcheck) \ + --extra-install=contrib/test_decoding \ + $(REGRESSCHECKS) + +ISOLATIONCHECKS=mxact delayed_startup concurrent_ddl_dml + +isolationcheck: all | submake-isolation submake-test_decoding + $(pg_isolation_regress_check) \ + --temp-config $(top_srcdir)/contrib/test_decoding/logical.conf \ + --extra-install=contrib/test_decoding \ + $(ISOLATIONCHECKS) + +isolationcheck-install-force: all | submake-isolation submake-test_decoding + $(pg_isolation_regress_installcheck) \ + --extra-install=contrib/test_decoding \ + $(ISOLATIONCHECKS) + +PHONY: submake-test_decoding submake-regress check \ + regresscheck regresscheck-install-force \ + isolationcheck isolationcheck-install-force diff --git a/contrib/test_decoding/expected/binary.out b/contrib/test_decoding/expected/binary.out new file mode 100644 index 0000000000..3409d9d426 --- /dev/null +++ b/contrib/test_decoding/expected/binary.out @@ -0,0 +1,35 @@ +-- predictability +SET synchronous_commit = on; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +-- succeeds, textual plugin, textual consumer +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'force-binary', '0'); + data +------ +(0 rows) + +-- fails, binary plugin, textual consumer +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'force-binary', '1'); +ERROR: output plugin cannot produce text output +-- succeeds, textual plugin, binary consumer +SELECT data FROM pg_logical_slot_get_binary_changes('regression_slot', NULL, NULL, 'force-binary', '0'); + data +------ +(0 rows) + +-- succeeds, binary plugin, binary consumer +SELECT data FROM pg_logical_slot_get_binary_changes('regression_slot', NULL, NULL, 'force-binary', '1'); + data +------ +(0 rows) + +SELECT 'init' FROM pg_drop_replication_slot('regression_slot'); + ?column? +---------- + init +(1 row) + diff --git a/contrib/test_decoding/expected/concurrent_ddl_dml.out b/contrib/test_decoding/expected/concurrent_ddl_dml.out new file mode 100644 index 0000000000..cc9165655f --- /dev/null +++ b/contrib/test_decoding/expected/concurrent_ddl_dml.out @@ -0,0 +1,733 @@ +Parsed test spec with 2 sessions + +starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_float s1_insert_tbl2 s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_float: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE float; +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s1_commit: COMMIT; +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[double precision]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl1_float s1_insert_tbl2 s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl1_float: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE float; +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s1_commit: COMMIT; +step s2_alter_tbl1_float: <... completed> +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +BEGIN +table public.pg_temp: INSERT: val1[integer]:1 val2[double precision]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_char s1_insert_tbl2 s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_char: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying; +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s1_commit: COMMIT; +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[character varying]:'1' +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl1_char s1_insert_tbl2 s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl1_char: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s1_commit: COMMIT; +step s2_alter_tbl1_char: <... completed> +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +BEGIN +table public.pg_temp: INSERT: val1[integer]:1 val2[character varying]:'1' +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s2_alter_tbl1_float s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s2_alter_tbl1_float: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE float; +step s1_commit: COMMIT; +step s2_alter_tbl1_float: <... completed> +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +BEGIN +table public.pg_temp: INSERT: val1[integer]:1 val2[double precision]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s2_alter_tbl1_char s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s2_alter_tbl1_char: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; +step s1_commit: COMMIT; +step s2_alter_tbl1_char: <... completed> +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +BEGIN +table public.pg_temp: INSERT: val1[integer]:1 val2[character varying]:'1' +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_float s1_insert_tbl2 s2_alter_tbl1_float s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_float: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE float; +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s2_alter_tbl1_float: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE float; +step s1_commit: COMMIT; +step s2_alter_tbl1_float: <... completed> +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[double precision]:1 +COMMIT +BEGIN +table public.pg_temp: INSERT: val1[integer]:1 val2[double precision]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_char s1_insert_tbl2 s2_alter_tbl1_char s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_char: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying; +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s2_alter_tbl1_char: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; +step s1_commit: COMMIT; +step s2_alter_tbl1_char: <... completed> +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[character varying]:'1' +COMMIT +BEGIN +table public.pg_temp: INSERT: val1[integer]:1 val2[character varying]:'1' +COMMIT +?column? + +stop + +starting permutation: s1_init s2_alter_tbl2_char s1_begin s1_insert_tbl1 s2_alter_tbl2_text s1_insert_tbl2 s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s2_alter_tbl2_char: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying; +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_text: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE text; +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s1_commit: COMMIT; +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[text]:'1' +COMMIT +?column? + +stop + +starting permutation: s1_init s2_alter_tbl2_char s1_begin s1_insert_tbl1 s2_alter_tbl2_text s1_insert_tbl2 s2_alter_tbl1_char s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s2_alter_tbl2_char: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying; +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_text: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE text; +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s2_alter_tbl1_char: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; +step s1_commit: COMMIT; +step s2_alter_tbl1_char: <... completed> +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[text]:'1' +COMMIT +BEGIN +table public.pg_temp: INSERT: val1[integer]:1 val2[character varying]:'1' +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_boolean s1_insert_tbl2 s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_boolean: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE boolean; +ERROR: column "val2" cannot be cast automatically to type boolean +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s1_commit: COMMIT; +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_boolean s1_insert_tbl2 s2_alter_tbl1_boolean s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_boolean: ALTER TABLE tbl2 ALTER COLUMN val2 TYPE boolean; +ERROR: column "val2" cannot be cast automatically to type boolean +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s2_alter_tbl1_boolean: ALTER TABLE tbl1 ALTER COLUMN val2 TYPE boolean; +step s1_commit: COMMIT; +step s2_alter_tbl1_boolean: <... completed> +error in steps s1_commit s2_alter_tbl1_boolean: ERROR: column "val2" cannot be cast automatically to type boolean +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_add_int s1_insert_tbl2_3col s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s1_commit: COMMIT; +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s1_commit s1_begin s2_alter_tbl2_add_int s1_insert_tbl2_3col s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s1_commit: COMMIT; +step s1_begin: BEGIN; +step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s1_commit: COMMIT; +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_add_float s1_insert_tbl2_3col s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_add_float: ALTER TABLE tbl2 ADD COLUMN val3 FLOAT; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s1_commit: COMMIT; +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[double precision]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s1_commit s1_begin s2_alter_tbl2_add_float s1_insert_tbl2_3col s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s1_commit: COMMIT; +step s1_begin: BEGIN; +step s2_alter_tbl2_add_float: ALTER TABLE tbl2 ADD COLUMN val3 FLOAT; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s1_commit: COMMIT; +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[double precision]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s2_alter_tbl2_add_char s1_insert_tbl2_3col s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s1_commit: COMMIT; +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1' +COMMIT +?column? + +stop + +starting permutation: s1_init s1_begin s1_insert_tbl1 s1_insert_tbl2 s1_commit s1_begin s2_alter_tbl2_add_char s1_insert_tbl2_3col s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s1_commit: COMMIT; +step s1_begin: BEGIN; +step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s1_commit: COMMIT; +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1' +COMMIT +?column? + +stop + +starting permutation: s1_init s2_alter_tbl2_add_int s1_begin s1_insert_tbl2_3col s2_alter_tbl2_drop_3rd_col s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER; +step s1_begin: BEGIN; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3; +step s1_commit: COMMIT; +step s2_alter_tbl2_drop_3rd_col: <... completed> +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1 +COMMIT +BEGIN +COMMIT +?column? + +stop + +starting permutation: s1_init s2_alter_tbl2_add_int s1_begin s1_insert_tbl2_3col s2_alter_tbl2_drop_3rd_col s1_insert_tbl2 s1_commit s1_insert_tbl2 s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER; +step s1_begin: BEGIN; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3; +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s1_commit: COMMIT; +step s2_alter_tbl2_drop_3rd_col: <... completed> +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:null +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s2_alter_tbl2_add_int s1_begin s1_insert_tbl2_3col s2_alter_tbl2_drop_3rd_col s1_commit s2_get_changes s2_alter_tbl2_add_text s1_begin s1_insert_tbl2_3col s2_alter_tbl2_3rd_char s1_insert_tbl2_3col s1_commit s2_get_changes s2_alter_tbl2_3rd_int s1_insert_tbl2_3col s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s2_alter_tbl2_add_int: ALTER TABLE tbl2 ADD COLUMN val3 INTEGER; +step s1_begin: BEGIN; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3; +step s1_commit: COMMIT; +step s2_alter_tbl2_drop_3rd_col: <... completed> +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1 +COMMIT +BEGIN +COMMIT +step s2_alter_tbl2_add_text: ALTER TABLE tbl2 ADD COLUMN val3 TEXT; +step s1_begin: BEGIN; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s2_alter_tbl2_3rd_char: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE character varying; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s1_commit: COMMIT; +step s2_alter_tbl2_3rd_char: <... completed> +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1' +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1' +COMMIT +BEGIN +COMMIT +step s2_alter_tbl2_3rd_int: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE int USING val3::integer; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +table public.pg_temp: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:null +table public.pg_temp: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1 +table public.pg_temp: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1 +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[integer]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s2_alter_tbl2_add_char s1_begin s1_insert_tbl1 s1_insert_tbl2_3col s2_alter_tbl2_3rd_text s1_insert_tbl2_3col s1_commit s1_insert_tbl2_3col s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying; +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s2_alter_tbl2_3rd_text: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE text; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s1_commit: COMMIT; +step s2_alter_tbl2_3rd_text: <... completed> +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1' +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1' +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1' +COMMIT +?column? + +stop + +starting permutation: s1_init s2_alter_tbl2_add_text s1_begin s1_insert_tbl1 s1_insert_tbl2_3col s2_alter_tbl2_3rd_char s1_insert_tbl2_3col s1_commit s1_insert_tbl2_3col s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s2_alter_tbl2_add_text: ALTER TABLE tbl2 ADD COLUMN val3 TEXT; +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s2_alter_tbl2_3rd_char: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE character varying; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s1_commit: COMMIT; +step s2_alter_tbl2_3rd_char: <... completed> +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1' +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1' +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1' +COMMIT +?column? + +stop + +starting permutation: s1_init s2_alter_tbl2_add_char s1_begin s1_insert_tbl1 s2_alter_tbl2_3rd_text s1_insert_tbl2_3col s1_commit s2_alter_tbl2_drop_3rd_col s1_insert_tbl2 s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying; +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_3rd_text: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE text; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s1_commit: COMMIT; +step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3; +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[text]:'1' +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s2_alter_tbl2_add_text s1_begin s1_insert_tbl1 s2_alter_tbl2_3rd_char s1_insert_tbl2_3col s1_commit s2_alter_tbl2_drop_3rd_col s1_insert_tbl2 s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s2_alter_tbl2_add_text: ALTER TABLE tbl2 ADD COLUMN val3 TEXT; +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_3rd_char: ALTER TABLE tbl2 ALTER COLUMN val3 TYPE character varying; +step s1_insert_tbl2_3col: INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); +step s1_commit: COMMIT; +step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3; +step s1_insert_tbl2: INSERT INTO tbl2 (val1, val2) VALUES (1, 1); +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 val3[character varying]:'1' +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl2: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +?column? + +stop + +starting permutation: s1_init s2_alter_tbl2_add_char s1_begin s1_insert_tbl1 s2_alter_tbl2_drop_3rd_col s1_insert_tbl1 s1_commit s2_get_changes +step s1_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s2_alter_tbl2_add_char: ALTER TABLE tbl2 ADD COLUMN val3 character varying; +step s1_begin: BEGIN; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s2_alter_tbl2_drop_3rd_col: ALTER TABLE tbl2 DROP COLUMN val3; +step s1_insert_tbl1: INSERT INTO tbl1 (val1, val2) VALUES (1, 1); +step s1_commit: COMMIT; +step s2_get_changes: SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); +data + +BEGIN +COMMIT +BEGIN +COMMIT +BEGIN +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +table public.tbl1: INSERT: val1[integer]:1 val2[integer]:1 +COMMIT +?column? + +stop diff --git a/contrib/test_decoding/expected/ddl.out b/contrib/test_decoding/expected/ddl.out new file mode 100644 index 0000000000..986f504eff --- /dev/null +++ b/contrib/test_decoding/expected/ddl.out @@ -0,0 +1,651 @@ +-- predictability +SET synchronous_commit = on; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +-- fail because of an already existing slot +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +ERROR: replication slot "regression_slot" already exists +-- fail because of an invalid name +SELECT 'init' FROM pg_create_logical_replication_slot('Invalid Name', 'test_decoding'); +ERROR: replication slot name "Invalid Name" contains invalid character +HINT: Replication slot names may only contain letters, numbers and the underscore character. +-- fail twice because of an invalid parameter values +SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', 'frakbar'); +ERROR: could not parse value "frakbar" for parameter "include-xids" +CONTEXT: slot "regression_slot", output plugin "test_decoding", in the startup callback +SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'nonexistant-option', 'frakbar'); +ERROR: option "nonexistant-option" = "frakbar" is unknown +CONTEXT: slot "regression_slot", output plugin "test_decoding", in the startup callback +SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', 'frakbar'); +ERROR: could not parse value "frakbar" for parameter "include-xids" +CONTEXT: slot "regression_slot", output plugin "test_decoding", in the startup callback +-- succeed once +SELECT pg_drop_replication_slot('regression_slot'); + pg_drop_replication_slot +-------------------------- + +(1 row) + +-- fail +SELECT pg_drop_replication_slot('regression_slot'); +ERROR: replication slot "regression_slot" does not exist +-- check that we're detecting a streaming rep slot used for logical decoding +SELECT 'init' FROM pg_create_physical_replication_slot('repl'); + ?column? +---------- + init +(1 row) + +SELECT data FROM pg_logical_slot_get_changes('repl', NULL, NULL, 'include-xids', '0'); +ERROR: cannot use physical replication slot for logical decoding +SELECT pg_drop_replication_slot('repl'); + pg_drop_replication_slot +-------------------------- + +(1 row) + +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +/* check whether status function reports us, only reproduceable columns */ +SELECT slot_name, plugin, slot_type, active, + NOT catalog_xmin IS NULL AS catalog_xmin_set, + xmin IS NULl AS data_xmin_not_set, + pg_xlog_location_diff(restart_lsn, '0/01000000') > 0 AS some_wal +FROM pg_replication_slots; + slot_name | plugin | slot_type | active | catalog_xmin_set | data_xmin_not_set | some_wal +-----------------+---------------+-----------+--------+------------------+-------------------+---------- + regression_slot | test_decoding | logical | f | t | t | t +(1 row) + +/* + * Check that changes are handled correctly when interleaved with ddl + */ +CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120)); +BEGIN; +INSERT INTO replication_example(somedata, text) VALUES (1, 1); +INSERT INTO replication_example(somedata, text) VALUES (1, 2); +COMMIT; +ALTER TABLE replication_example ADD COLUMN bar int; +INSERT INTO replication_example(somedata, text, bar) VALUES (2, 1, 4); +BEGIN; +INSERT INTO replication_example(somedata, text, bar) VALUES (2, 2, 4); +INSERT INTO replication_example(somedata, text, bar) VALUES (2, 3, 4); +INSERT INTO replication_example(somedata, text, bar) VALUES (2, 4, NULL); +COMMIT; +ALTER TABLE replication_example DROP COLUMN bar; +INSERT INTO replication_example(somedata, text) VALUES (3, 1); +BEGIN; +INSERT INTO replication_example(somedata, text) VALUES (3, 2); +INSERT INTO replication_example(somedata, text) VALUES (3, 3); +COMMIT; +ALTER TABLE replication_example RENAME COLUMN text TO somenum; +INSERT INTO replication_example(somedata, somenum) VALUES (4, 1); +-- collect all changes +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +--------------------------------------------------------------------------------------------------------------------------- + BEGIN + COMMIT + BEGIN + table public.replication_example: INSERT: id[integer]:1 somedata[integer]:1 text[character varying]:'1' + table public.replication_example: INSERT: id[integer]:2 somedata[integer]:1 text[character varying]:'2' + COMMIT + BEGIN + COMMIT + BEGIN + table public.replication_example: INSERT: id[integer]:3 somedata[integer]:2 text[character varying]:'1' bar[integer]:4 + COMMIT + BEGIN + table public.replication_example: INSERT: id[integer]:4 somedata[integer]:2 text[character varying]:'2' bar[integer]:4 + table public.replication_example: INSERT: id[integer]:5 somedata[integer]:2 text[character varying]:'3' bar[integer]:4 + table public.replication_example: INSERT: id[integer]:6 somedata[integer]:2 text[character varying]:'4' bar[integer]:null + COMMIT + BEGIN + COMMIT + BEGIN + table public.replication_example: INSERT: id[integer]:7 somedata[integer]:3 text[character varying]:'1' + COMMIT + BEGIN + table public.replication_example: INSERT: id[integer]:8 somedata[integer]:3 text[character varying]:'2' + table public.replication_example: INSERT: id[integer]:9 somedata[integer]:3 text[character varying]:'3' + COMMIT + BEGIN + COMMIT + BEGIN + table public.replication_example: INSERT: id[integer]:10 somedata[integer]:4 somenum[character varying]:'1' + COMMIT +(30 rows) + +ALTER TABLE replication_example ALTER COLUMN somenum TYPE int4 USING (somenum::int4); +-- throw away changes, they contain oids +SELECT count(data) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + count +------- + 12 +(1 row) + +INSERT INTO replication_example(somedata, somenum) VALUES (5, 1); +BEGIN; +INSERT INTO replication_example(somedata, somenum) VALUES (6, 1); +ALTER TABLE replication_example ADD COLUMN zaphod1 int; +INSERT INTO replication_example(somedata, somenum, zaphod1) VALUES (6, 2, 1); +ALTER TABLE replication_example ADD COLUMN zaphod2 int; +INSERT INTO replication_example(somedata, somenum, zaphod2) VALUES (6, 3, 1); +INSERT INTO replication_example(somedata, somenum, zaphod1) VALUES (6, 4, 2); +COMMIT; +-- show changes +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +------------------------------------------------------------------------------------------------------------------------------------------ + BEGIN + table public.replication_example: INSERT: id[integer]:11 somedata[integer]:5 somenum[integer]:1 + COMMIT + BEGIN + table public.replication_example: INSERT: id[integer]:12 somedata[integer]:6 somenum[integer]:1 + table public.replication_example: INSERT: id[integer]:13 somedata[integer]:6 somenum[integer]:2 zaphod1[integer]:1 + table public.replication_example: INSERT: id[integer]:14 somedata[integer]:6 somenum[integer]:3 zaphod1[integer]:null zaphod2[integer]:1 + table public.replication_example: INSERT: id[integer]:15 somedata[integer]:6 somenum[integer]:4 zaphod1[integer]:2 zaphod2[integer]:null + COMMIT +(9 rows) + +-- hide changes bc of oid visible in full table rewrites +CREATE TABLE tr_unique(id2 serial unique NOT NULL, data int); +INSERT INTO tr_unique(data) VALUES(10); +ALTER TABLE tr_unique RENAME TO tr_pkey; +ALTER TABLE tr_pkey ADD COLUMN id serial primary key; +SELECT count(data) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + count +------- + 10 +(1 row) + +INSERT INTO tr_pkey(data) VALUES(1); +--show deletion with primary key +DELETE FROM tr_pkey; +/* display results */ +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +---------------------------------------------------------------------------- + BEGIN + table public.tr_pkey: INSERT: id2[integer]:2 data[integer]:1 id[integer]:2 + COMMIT + BEGIN + table public.tr_pkey: DELETE: id[integer]:1 + table public.tr_pkey: DELETE: id[integer]:2 + COMMIT +(7 rows) + +/* + * check that disk spooling works + */ +BEGIN; +CREATE TABLE tr_etoomuch (id serial primary key, data int); +INSERT INTO tr_etoomuch(data) SELECT g.i FROM generate_series(1, 10234) g(i); +DELETE FROM tr_etoomuch WHERE id < 5000; +UPDATE tr_etoomuch SET data = - data WHERE id > 5000; +COMMIT; +/* display results, but hide most of the output */ +SELECT count(*), min(data), max(data) +FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0') +GROUP BY substring(data, 1, 24) +ORDER BY 1; + count | min | max +-------+-------------------------------------------------+------------------------------------------------------------------------ + 1 | COMMIT | COMMIT + 1 | BEGIN | BEGIN + 20467 | table public.tr_etoomuch: DELETE: id[integer]:1 | table public.tr_etoomuch: UPDATE: id[integer]:9999 data[integer]:-9999 +(3 rows) + +/* + * check whether we decode subtransactions correctly in relation with each + * other + */ +CREATE TABLE tr_sub (id serial primary key, path text); +-- toplevel, subtxn, toplevel, subtxn, subtxn +BEGIN; +INSERT INTO tr_sub(path) VALUES ('1-top-#1'); +SAVEPOINT a; +INSERT INTO tr_sub(path) VALUES ('1-top-1-#1'); +INSERT INTO tr_sub(path) VALUES ('1-top-1-#2'); +RELEASE SAVEPOINT a; +SAVEPOINT b; +SAVEPOINT c; +INSERT INTO tr_sub(path) VALUES ('1-top-2-1-#1'); +INSERT INTO tr_sub(path) VALUES ('1-top-2-1-#2'); +RELEASE SAVEPOINT c; +INSERT INTO tr_sub(path) VALUES ('1-top-2-#1'); +RELEASE SAVEPOINT b; +COMMIT; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +---------------------------------------------------------------------- + BEGIN + COMMIT + BEGIN + table public.tr_sub: INSERT: id[integer]:1 path[text]:'1-top-#1' + table public.tr_sub: INSERT: id[integer]:2 path[text]:'1-top-1-#1' + table public.tr_sub: INSERT: id[integer]:3 path[text]:'1-top-1-#2' + table public.tr_sub: INSERT: id[integer]:4 path[text]:'1-top-2-1-#1' + table public.tr_sub: INSERT: id[integer]:5 path[text]:'1-top-2-1-#2' + table public.tr_sub: INSERT: id[integer]:6 path[text]:'1-top-2-#1' + COMMIT +(10 rows) + +-- check that we handle xlog assignments correctly +BEGIN; +-- nest 80 subtxns +SAVEPOINT subtop;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +-- assign xid by inserting +INSERT INTO tr_sub(path) VALUES ('2-top-1...--#1'); +INSERT INTO tr_sub(path) VALUES ('2-top-1...--#2'); +INSERT INTO tr_sub(path) VALUES ('2-top-1...--#3'); +RELEASE SAVEPOINT subtop; +INSERT INTO tr_sub(path) VALUES ('2-top-#1'); +COMMIT; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +------------------------------------------------------------------------ + BEGIN + table public.tr_sub: INSERT: id[integer]:7 path[text]:'2-top-1...--#1' + table public.tr_sub: INSERT: id[integer]:8 path[text]:'2-top-1...--#2' + table public.tr_sub: INSERT: id[integer]:9 path[text]:'2-top-1...--#3' + table public.tr_sub: INSERT: id[integer]:10 path[text]:'2-top-#1' + COMMIT +(6 rows) + +-- make sure rollbacked subtransactions aren't decoded +BEGIN; +INSERT INTO tr_sub(path) VALUES ('3-top-2-#1'); +SAVEPOINT a; +INSERT INTO tr_sub(path) VALUES ('3-top-2-1-#1'); +SAVEPOINT b; +INSERT INTO tr_sub(path) VALUES ('3-top-2-2-#1'); +ROLLBACK TO SAVEPOINT b; +INSERT INTO tr_sub(path) VALUES ('3-top-2-#2'); +COMMIT; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +----------------------------------------------------------------------- + BEGIN + table public.tr_sub: INSERT: id[integer]:11 path[text]:'3-top-2-#1' + table public.tr_sub: INSERT: id[integer]:12 path[text]:'3-top-2-1-#1' + table public.tr_sub: INSERT: id[integer]:14 path[text]:'3-top-2-#2' + COMMIT +(5 rows) + +-- test whether a known, but not yet logged toplevel xact, followed by a +-- subxact commit is handled correctly +BEGIN; +SELECT txid_current() != 0; -- so no fixed xid apears in the outfile + ?column? +---------- + t +(1 row) + +SAVEPOINT a; +INSERT INTO tr_sub(path) VALUES ('4-top-1-#1'); +RELEASE SAVEPOINT a; +COMMIT; +-- test whether a change in a subtransaction, in an unknown toplevel +-- xact is handled correctly. +BEGIN; +SAVEPOINT a; +INSERT INTO tr_sub(path) VALUES ('5-top-1-#1'); +COMMIT; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +--------------------------------------------------------------------- + BEGIN + table public.tr_sub: INSERT: id[integer]:15 path[text]:'4-top-1-#1' + COMMIT + BEGIN + table public.tr_sub: INSERT: id[integer]:16 path[text]:'5-top-1-#1' + COMMIT +(6 rows) + +/* + * Check whether treating a table as a catalog table works somewhat + */ +CREATE TABLE replication_metadata ( + id serial primary key, + relation name NOT NULL, + options text[] +) +WITH (user_catalog_table = true) +; +\d+ replication_metadata + Table "public.replication_metadata" + Column | Type | Modifiers | Storage | Stats target | Description +----------+---------+-------------------------------------------------------------------+----------+--------------+------------- + id | integer | not null default nextval('replication_metadata_id_seq'::regclass) | plain | | + relation | name | not null | plain | | + options | text[] | | extended | | +Indexes: + "replication_metadata_pkey" PRIMARY KEY, btree (id) +Has OIDs: no +Options: user_catalog_table=true + +INSERT INTO replication_metadata(relation, options) +VALUES ('foo', ARRAY['a', 'b']); +ALTER TABLE replication_metadata RESET (user_catalog_table); +\d+ replication_metadata + Table "public.replication_metadata" + Column | Type | Modifiers | Storage | Stats target | Description +----------+---------+-------------------------------------------------------------------+----------+--------------+------------- + id | integer | not null default nextval('replication_metadata_id_seq'::regclass) | plain | | + relation | name | not null | plain | | + options | text[] | | extended | | +Indexes: + "replication_metadata_pkey" PRIMARY KEY, btree (id) +Has OIDs: no + +INSERT INTO replication_metadata(relation, options) +VALUES ('bar', ARRAY['a', 'b']); +ALTER TABLE replication_metadata SET (user_catalog_table = true); +\d+ replication_metadata + Table "public.replication_metadata" + Column | Type | Modifiers | Storage | Stats target | Description +----------+---------+-------------------------------------------------------------------+----------+--------------+------------- + id | integer | not null default nextval('replication_metadata_id_seq'::regclass) | plain | | + relation | name | not null | plain | | + options | text[] | | extended | | +Indexes: + "replication_metadata_pkey" PRIMARY KEY, btree (id) +Has OIDs: no +Options: user_catalog_table=true + +INSERT INTO replication_metadata(relation, options) +VALUES ('blub', NULL); +-- make sure rewrites don't work +ALTER TABLE replication_metadata ADD COLUMN rewritemeornot int; +ALTER TABLE replication_metadata ALTER COLUMN rewritemeornot TYPE text; +ERROR: cannot rewrite table "replication_metadata" used as a catalog table +ALTER TABLE replication_metadata SET (user_catalog_table = false); +\d+ replication_metadata + Table "public.replication_metadata" + Column | Type | Modifiers | Storage | Stats target | Description +----------------+---------+-------------------------------------------------------------------+----------+--------------+------------- + id | integer | not null default nextval('replication_metadata_id_seq'::regclass) | plain | | + relation | name | not null | plain | | + options | text[] | | extended | | + rewritemeornot | integer | | plain | | +Indexes: + "replication_metadata_pkey" PRIMARY KEY, btree (id) +Has OIDs: no +Options: user_catalog_table=false + +INSERT INTO replication_metadata(relation, options) +VALUES ('zaphod', NULL); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +------------------------------------------------------------------------------------------------------------------------------------ + BEGIN + COMMIT + BEGIN + table public.replication_metadata: INSERT: id[integer]:1 relation[name]:'foo' options[text[]]:'{a,b}' + COMMIT + BEGIN + COMMIT + BEGIN + table public.replication_metadata: INSERT: id[integer]:2 relation[name]:'bar' options[text[]]:'{a,b}' + COMMIT + BEGIN + COMMIT + BEGIN + table public.replication_metadata: INSERT: id[integer]:3 relation[name]:'blub' options[text[]]:null + COMMIT + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + table public.replication_metadata: INSERT: id[integer]:4 relation[name]:'zaphod' options[text[]]:null rewritemeornot[integer]:null + COMMIT +(22 rows) + +/* + * check whether we handle updates/deletes correct with & without a pkey + */ +/* we should handle the case without a key at all more gracefully */ +CREATE TABLE table_without_key(id serial, data int); +INSERT INTO table_without_key(data) VALUES(1),(2); +DELETE FROM table_without_key WHERE data = 1; +-- won't log old keys +UPDATE table_without_key SET data = 3 WHERE data = 2; +UPDATE table_without_key SET id = -id; +UPDATE table_without_key SET id = -id; +-- should log the full old row now +ALTER TABLE table_without_key REPLICA IDENTITY FULL; +UPDATE table_without_key SET data = 3 WHERE data = 2; +UPDATE table_without_key SET id = -id; +UPDATE table_without_key SET id = -id; +DELETE FROM table_without_key WHERE data = 3; +CREATE TABLE table_with_pkey(id serial primary key, data int); +INSERT INTO table_with_pkey(data) VALUES(1), (2); +DELETE FROM table_with_pkey WHERE data = 1; +-- should log the old pkey +UPDATE table_with_pkey SET data = 3 WHERE data = 2; +UPDATE table_with_pkey SET id = -id; +UPDATE table_with_pkey SET id = -id; +-- check that we log nothing despite having a pkey +ALTER TABLE table_without_key REPLICA IDENTITY NOTHING; +UPDATE table_with_pkey SET id = -id; +-- check that we log everything despite having a pkey +ALTER TABLE table_without_key REPLICA IDENTITY FULL; +UPDATE table_with_pkey SET id = -id; +DELETE FROM table_with_pkey WHERE data = 3; +CREATE TABLE table_with_unique_not_null(id serial unique, data int); +ALTER TABLE table_with_unique_not_null ALTER COLUMN id SET NOT NULL; --already set +-- won't log anything, replica identity not setup +INSERT INTO table_with_unique_not_null(data) VALUES(1), (2); +DELETE FROM table_with_unique_not_null WHERE data = 1; +UPDATE table_with_unique_not_null SET data = 3 WHERE data = 2; +UPDATE table_with_unique_not_null SET id = -id; +UPDATE table_with_unique_not_null SET id = -id; +DELETE FROM table_with_unique_not_null WHERE data = 3; +-- should log old key +ALTER TABLE table_with_unique_not_null REPLICA IDENTITY USING INDEX table_with_unique_not_null_id_key; +INSERT INTO table_with_unique_not_null(data) VALUES(1), (2); +DELETE FROM table_with_unique_not_null WHERE data = 1; +UPDATE table_with_unique_not_null SET data = 3 WHERE data = 2; +UPDATE table_with_unique_not_null SET id = -id; +UPDATE table_with_unique_not_null SET id = -id; +DELETE FROM table_with_unique_not_null WHERE data = 3; +-- check toast support +BEGIN; +CREATE SEQUENCE toasttable_rand_seq START 79 INCREMENT 1499; -- portable "random" +CREATE TABLE toasttable( + id serial primary key, + toasted_col1 text, + rand1 float8 DEFAULT nextval('toasttable_rand_seq'), + toasted_col2 text, + rand2 float8 DEFAULT nextval('toasttable_rand_seq') + ); +COMMIT; +-- uncompressed external toast data +INSERT INTO toasttable(toasted_col1) SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i); +-- compressed external toast data +INSERT INTO toasttable(toasted_col2) SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i); +-- update of existing column +UPDATE toasttable + SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i)) +WHERE id = 1; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + BEGIN + COMMIT + BEGIN + table public.table_without_key: INSERT: id[integer]:1 data[integer]:1 + table public.table_without_key: INSERT: id[integer]:2 data[integer]:2 + COMMIT + BEGIN + table public.table_without_key: DELETE: (no-tuple-data) + COMMIT + BEGIN + table public.table_without_key: UPDATE: id[integer]:2 data[integer]:3 + COMMIT + BEGIN + table public.table_without_key: UPDATE: id[integer]:-2 data[integer]:3 + COMMIT + BEGIN + table public.table_without_key: UPDATE: id[integer]:2 data[integer]:3 + COMMIT + BEGIN + COMMIT + BEGIN + table public.table_without_key: UPDATE: old-key: id[integer]:2 data[integer]:3 new-tuple: id[integer]:-2 data[integer]:3 + COMMIT + BEGIN + table public.table_without_key: UPDATE: old-key: id[integer]:-2 data[integer]:3 new-tuple: id[integer]:2 data[integer]:3 + COMMIT + BEGIN + table public.table_without_key: DELETE: id[integer]:2 data[integer]:3 + COMMIT + BEGIN + COMMIT + BEGIN + table public.table_with_pkey: INSERT: id[integer]:1 data[integer]:1 + table public.table_with_pkey: INSERT: id[integer]:2 data[integer]:2 + COMMIT + BEGIN + table public.table_with_pkey: DELETE: id[integer]:1 + COMMIT + BEGIN + table public.table_with_pkey: UPDATE: id[integer]:2 data[integer]:3 + COMMIT + BEGIN + table public.table_with_pkey: UPDATE: old-key: id[integer]:2 new-tuple: id[integer]:-2 data[integer]:3 + COMMIT + BEGIN + table public.table_with_pkey: UPDATE: old-key: id[integer]:-2 new-tuple: id[integer]:2 data[integer]:3 + COMMIT + BEGIN + COMMIT + BEGIN + table public.table_with_pkey: UPDATE: old-key: id[integer]:2 new-tuple: id[integer]:-2 data[integer]:3 + COMMIT + BEGIN + COMMIT + BEGIN + table public.table_with_pkey: UPDATE: old-key: id[integer]:-2 new-tuple: id[integer]:2 data[integer]:3 + COMMIT + BEGIN + table public.table_with_pkey: DELETE: id[integer]:2 + COMMIT + BEGIN + COMMIT + BEGIN + table public.table_with_unique_not_null: INSERT: id[integer]:1 data[integer]:1 + table public.table_with_unique_not_null: INSERT: id[integer]:2 data[integer]:2 + COMMIT + BEGIN + table public.table_with_unique_not_null: DELETE: (no-tuple-data) + COMMIT + BEGIN + table public.table_with_unique_not_null: UPDATE: id[integer]:2 data[integer]:3 + COMMIT + BEGIN + table public.table_with_unique_not_null: UPDATE: id[integer]:-2 data[integer]:3 + COMMIT + BEGIN + table public.table_with_unique_not_null: UPDATE: id[integer]:2 data[integer]:3 + COMMIT + BEGIN + table public.table_with_unique_not_null: DELETE: (no-tuple-data) + COMMIT + BEGIN + COMMIT + BEGIN + table public.table_with_unique_not_null: INSERT: id[integer]:3 data[integer]:1 + table public.table_with_unique_not_null: INSERT: id[integer]:4 data[integer]:2 + COMMIT + BEGIN + table public.table_with_unique_not_null: DELETE: id[integer]:3 + COMMIT + BEGIN + table public.table_with_unique_not_null: UPDATE: id[integer]:4 data[integer]:3 + COMMIT + BEGIN + table public.table_with_unique_not_null: UPDATE: old-key: id[integer]:4 new-tuple: id[integer]:-4 data[integer]:3 + COMMIT + BEGIN + table public.table_with_unique_not_null: UPDATE: old-key: id[integer]:-4 new-tuple: id[integer]:4 data[integer]:3 + COMMIT + BEGIN + table public.table_with_unique_not_null: DELETE: id[integer]:4 + COMMIT + BEGIN + COMMIT + BEGIN + table public.toasttable: INSERT: id[integer]:1 toasted_col1[text]:'12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000' rand1[double precision]:79 toasted_col2[text]:null rand2[double precision]:1578 + COMMIT + BEGIN + table public.toasttable: INSERT: id[integer]:2 toasted_col1[text]:null rand1[double precision]:3077 toasted_col2[text]:'0001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500000100020003000400050006000700080009001000110012001300140015001600170018001900200021002200230024002500260027002800290030003100320033003400350036003700380039004000410042004300440045004600470048004900500051005200530054005500560057005800590060006100620063006400650066006700680069007000710072007300740075007600770078007900800081008200830084008500860087008800890090009100920093009400950096009700980099010001010102010301040105010601070108010901100111011201130114011501160117011801190120012101220123012401250126012701280129013001310132013301340135013601370138013901400141014201430144014501460147014801490150015101520153015401550156015701580159016001610162016301640165016601670168016901700171017201730174017501760177017801790180018101820183018401850186018701880189019001910192019301940195019601970198019902000201020202030204020502060207020802090210021102120213021402150216021702180219022002210222022302240225022602270228022902300231023202330234023502360237023802390240024102420243024402450246024702480249025002510252025302540255025602570258025902600261026202630264026502660267026802690270027102720273027402750276027702780279028002810282028302840285028602870288028902900291029202930294029502960297029802990300030103020303030403050306030703080309031003110312031303140315031603170318031903200321032203230324032503260327032803290330033103320333033403350336033703380339034003410342034303440345034603470348034903500351035203530354035503560357035803590360036103620363036403650366036703680369037003710372037303740375037603770378037903800381038203830384038503860387038803890390039103920393039403950396039703980399040004010402040304040405040604070408040904100411041204130414041504160417041804190420042104220423042404250426042704280429043004310432043304340435043604370438043904400441044204430444044504460447044804490450045104520453045404550456045704580459046004610462046304640465046604670468046904700471047204730474047504760477047804790480048104820483048404850486048704880489049004910492049304940495049604970498049905000001000200030004000500060007000800090010001100120013001400150016001700180019002000210022002300240025002600270028002900300031003200330034003500360037003800390040004100420043004400450046004700480049005000510052005300540055005600570058005900600061006200630064006500660067006800690070007100720073007400750076007700780079008000810082008300840085008600870088008900900091009200930094009500960097009800990100010101020103010401050106010701080109011001110112011301140115011601170118011901200121012201230124012501260127012801290130013101320133013401350136013701380139014001410142014301440145014601470148014901500151015201530154015501560157015801590160016101620163016401650166016701680169017001710172017301740175017601770178017901800181018201830184018501860187018801890190019101920193019401950196019701980199020002010202020302040205020602070208020902100211021202130214021502160217021802190220022102220223022402250226022702280229023002310232023302340235023602370238023902400241024202430244024502460247024802490250025102520253025402550256025702580259026002610262026302640265026602670268026902700271027202730274027502760277027802790280028102820283028402850286028702880289029002910292029302940295029602970298029903000301030203030304030503060307030803090310031103120313031403150316031703180319032003210322032303240325032603270328032903300331033203330334033503360337033803390340034103420343034403450346034703480349035003510352035303540355035603570358035903600361036203630364036503660367036803690370037103720373037403750376037703780379038003810382038303840385038603870388038903900391039203930394039503960397039803990400040104020403040404050406040704080409041004110412041304140415041604170418041904200421042204230424042504260427042804290430043104320433043404350436043704380439044004410442044304440445044604470448044904500451045204530454045504560457045804590460046104620463046404650466046704680469047004710472047304740475047604770478047904800481048204830484048504860487048804890490049104920493049404950496049704980499050000010002000300040005000600070008000900100011001200130014001500160017001800190020002100220023002400250026002700280029003000310032003300340035003600370038003900400041004200430044004500460047004800490050005100520053005400550056005700580059006000610062006300640065006600670068006900700071007200730074007500760077007800790080008100820083008400850086008700880089009000910092009300940095009600970098009901000101010201030104010501060107010801090110011101120113011401150116011701180119012001210122012301240125012601270128012901300131013201330134013501360137013801390140014101420143014401450146014701480149015001510152015301540155015601570158015901600161016201630164016501660167016801690170017101720173017401750176017701780179018001810182018301840185018601870188018901900191019201930194019501960197019801990200020102020203020402050206020702080209021002110212021302140215021602170218021902200221022202230224022502260227022802290230023102320233023402350236023702380239024002410242024302440245024602470248024902500251025202530254025502560257025802590260026102620263026402650266026702680269027002710272027302740275027602770278027902800281028202830284028502860287028802890290029102920293029402950296029702980299030003010302030303040305030603070308030903100311031203130314031503160317031803190320032103220323032403250326032703280329033003310332033303340335033603370338033903400341034203430344034503460347034803490350035103520353035403550356035703580359036003610362036303640365036603670368036903700371037203730374037503760377037803790380038103820383038403850386038703880389039003910392039303940395039603970398039904000401040204030404040504060407040804090410041104120413041404150416041704180419042004210422042304240425042604270428042904300431043204330434043504360437043804390440044104420443044404450446044704480449045004510452045304540455045604570458045904600461046204630464046504660467046804690470047104720473047404750476047704780479048004810482048304840485048604870488048904900491049204930494049504960497049804990500' rand2[double precision]:4576 + COMMIT + BEGIN + table public.toasttable: UPDATE: id[integer]:1 toasted_col1[text]:'12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000' rand1[double precision]:79 toasted_col2[text]:null rand2[double precision]:1578 + COMMIT +(113 rows) + +INSERT INTO toasttable(toasted_col1) SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i); +-- update of second column, first column unchanged +UPDATE toasttable + SET toasted_col2 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i)) +WHERE id = 1; +-- make sure we decode correctly even if the toast table is gone +DROP TABLE toasttable; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + BEGIN + table public.toasttable: INSERT: id[integer]:3 toasted_col1[text]:'12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000' rand1[double precision]:6075 toasted_col2[text]:null rand2[double precision]:7574 + COMMIT + BEGIN + table public.toasttable: UPDATE: id[integer]:1 toasted_col1[text]:unchanged-toast-datum rand1[double precision]:79 toasted_col2[text]:'12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000' rand2[double precision]:1578 + COMMIT + BEGIN + COMMIT +(8 rows) + +-- done, free logical replication slot +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +------ +(0 rows) + +SELECT pg_drop_replication_slot('regression_slot'); + pg_drop_replication_slot +-------------------------- + +(1 row) + +/* check that we aren't visible anymore now */ +SELECT * FROM pg_stat_replication; + pid | usesysid | usename | application_name | client_addr | client_hostname | client_port | backend_start | backend_xmin | state | sent_location | write_location | flush_location | replay_location | sync_priority | sync_state +-----+----------+---------+------------------+-------------+-----------------+-------------+---------------+--------------+-------+---------------+----------------+----------------+-----------------+---------------+------------ +(0 rows) + diff --git a/contrib/test_decoding/expected/decoding_in_xact.out b/contrib/test_decoding/expected/decoding_in_xact.out new file mode 100644 index 0000000000..d15b0b542b --- /dev/null +++ b/contrib/test_decoding/expected/decoding_in_xact.out @@ -0,0 +1,89 @@ +-- predictability +SET synchronous_commit = on; +-- fail because we're creating a slot while in an xact with xid +BEGIN; +SELECT txid_current() = 0; + ?column? +---------- + f +(1 row) + +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +ERROR: cannot create logical replication slot in transaction that has performed writes +ROLLBACK; +-- fail because we're creating a slot while in an subxact whose topxact has a xid +BEGIN; +SELECT txid_current() = 0; + ?column? +---------- + f +(1 row) + +SAVEPOINT barf; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +ERROR: cannot create logical replication slot in transaction that has performed writes +ROLLBACK TO SAVEPOINT barf; +ROLLBACK; +-- succeed, outside tx. +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +SELECT 'stop' FROM pg_drop_replication_slot('regression_slot'); + ?column? +---------- + stop +(1 row) + +-- succeed, in tx without xid. +BEGIN; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +COMMIT; +CREATE TABLE nobarf(id serial primary key, data text); +INSERT INTO nobarf(data) VALUES('1'); +-- decoding works in transaction with xid +BEGIN; +SELECT txid_current() = 0; + ?column? +---------- + f +(1 row) + +-- don't show yet, haven't committed +INSERT INTO nobarf(data) VALUES('2'); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +----------------------------------------------------------- + BEGIN + COMMIT + BEGIN + table public.nobarf: INSERT: id[integer]:1 data[text]:'1' + COMMIT +(5 rows) + +COMMIT; +INSERT INTO nobarf(data) VALUES('3'); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +----------------------------------------------------------- + BEGIN + table public.nobarf: INSERT: id[integer]:2 data[text]:'2' + COMMIT + BEGIN + table public.nobarf: INSERT: id[integer]:3 data[text]:'3' + COMMIT +(6 rows) + +SELECT 'stop' FROM pg_drop_replication_slot('regression_slot'); + ?column? +---------- + stop +(1 row) + diff --git a/contrib/test_decoding/expected/delayed_startup.out b/contrib/test_decoding/expected/delayed_startup.out new file mode 100644 index 0000000000..db8c525ac4 --- /dev/null +++ b/contrib/test_decoding/expected/delayed_startup.out @@ -0,0 +1,38 @@ +Parsed test spec with 2 sessions + +starting permutation: s1b s1w s2init s1c s2start s1b s1w s1c s2start s1b s1w s2start s1c s2start +step s1b: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s1w: INSERT INTO do_write DEFAULT VALUES; +step s2init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +step s1c: COMMIT; +step s2init: <... completed> +?column? + +init +step s2start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false'); +data + +step s1b: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s1w: INSERT INTO do_write DEFAULT VALUES; +step s1c: COMMIT; +step s2start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false'); +data + +BEGIN +table public.do_write: INSERT: id[integer]:2 +COMMIT +step s1b: BEGIN ISOLATION LEVEL SERIALIZABLE; +step s1w: INSERT INTO do_write DEFAULT VALUES; +step s2start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false'); +data + +step s1c: COMMIT; +step s2start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false'); +data + +BEGIN +table public.do_write: INSERT: id[integer]:3 +COMMIT +?column? + +stop diff --git a/contrib/test_decoding/expected/mxact.out b/contrib/test_decoding/expected/mxact.out new file mode 100644 index 0000000000..f0d96cc67d --- /dev/null +++ b/contrib/test_decoding/expected/mxact.out @@ -0,0 +1,66 @@ +Parsed test spec with 3 sessions + +starting permutation: s0init s0start s1begin s1sharepgclass s2begin s2sharepgclass s0w s0start s2commit s1commit +step s0init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s0start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false'); +data + +step s1begin: BEGIN; +step s1sharepgclass: SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR SHARE) s; +?column? + +t +step s2begin: BEGIN; +step s2sharepgclass: SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR SHARE) s; +?column? + +t +step s0w: INSERT INTO do_write DEFAULT VALUES; +step s0start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false'); +data + +BEGIN +table public.do_write: INSERT: id[integer]:1 +COMMIT +step s2commit: COMMIT; +step s1commit: COMMIT; +?column? + +stop + +starting permutation: s0init s0start s1begin s1keysharepgclass s2begin s2keysharepgclass s0alter s0w s0start s2commit s1commit +step s0init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); +?column? + +init +step s0start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false'); +data + +step s1begin: BEGIN; +step s1keysharepgclass: SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR KEY SHARE) s; +?column? + +t +step s2begin: BEGIN; +step s2keysharepgclass: SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR KEY SHARE) s; +?column? + +t +step s0alter: ALTER TABLE do_write ADD column ts timestamptz; +step s0w: INSERT INTO do_write DEFAULT VALUES; +step s0start: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false'); +data + +BEGIN +COMMIT +BEGIN +table public.do_write: INSERT: id[integer]:1 ts[timestamp with time zone]:null +COMMIT +step s2commit: COMMIT; +step s1commit: COMMIT; +?column? + +stop diff --git a/contrib/test_decoding/expected/permissions.out b/contrib/test_decoding/expected/permissions.out new file mode 100644 index 0000000000..85b7f5d625 --- /dev/null +++ b/contrib/test_decoding/expected/permissions.out @@ -0,0 +1,130 @@ +-- predictability +SET synchronous_commit = on; +-- setup +CREATE ROLE lr_normal; +CREATE ROLE lr_superuser SUPERUSER; +CREATE ROLE lr_replication REPLICATION; +CREATE TABLE lr_test(data text); +-- superuser can control replication +SET ROLE lr_superuser; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +INSERT INTO lr_test VALUES('lr_superuser_init'); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +-------------------------------------------------------------- + BEGIN + table public.lr_test: INSERT: data[text]:'lr_superuser_init' + COMMIT +(3 rows) + +SELECT pg_drop_replication_slot('regression_slot'); + pg_drop_replication_slot +-------------------------- + +(1 row) + +RESET ROLE; +-- replication user can control replication +SET ROLE lr_replication; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +INSERT INTO lr_test VALUES('lr_superuser_init'); +ERROR: permission denied for relation lr_test +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +------ +(0 rows) + +SELECT pg_drop_replication_slot('regression_slot'); + pg_drop_replication_slot +-------------------------- + +(1 row) + +RESET ROLE; +-- plain user *can't* can control replication +SET ROLE lr_normal; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +ERROR: must be superuser or replication role to use replication slots +INSERT INTO lr_test VALUES('lr_superuser_init'); +ERROR: permission denied for relation lr_test +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); +ERROR: must be superuser or replication role to use replication slots +SELECT pg_drop_replication_slot('regression_slot'); +ERROR: must be superuser or replication role to use replication slots +RESET ROLE; +-- replication users can drop superuser created slots +SET ROLE lr_superuser; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +RESET ROLE; +SET ROLE lr_replication; +SELECT pg_drop_replication_slot('regression_slot'); + pg_drop_replication_slot +-------------------------- + +(1 row) + +RESET ROLE; +-- normal users can't drop existing slots +SET ROLE lr_superuser; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +RESET ROLE; +SET ROLE lr_normal; +SELECT pg_drop_replication_slot('regression_slot'); +ERROR: must be superuser or replication role to use replication slots +RESET ROLE; +-- all users can see existing slots +SET ROLE lr_superuser; +SELECT slot_name, plugin FROM pg_replication_slots; + slot_name | plugin +-----------------+--------------- + regression_slot | test_decoding +(1 row) + +RESET ROLE; +SET ROLE lr_replication; +SELECT slot_name, plugin FROM pg_replication_slots; + slot_name | plugin +-----------------+--------------- + regression_slot | test_decoding +(1 row) + +RESET ROLE; +SET ROLE lr_normal; +SELECT slot_name, plugin FROM pg_replication_slots; + slot_name | plugin +-----------------+--------------- + regression_slot | test_decoding +(1 row) + +RESET ROLE; +-- cleanup +SELECT pg_drop_replication_slot('regression_slot'); + pg_drop_replication_slot +-------------------------- + +(1 row) + +DROP ROLE lr_normal; +DROP ROLE lr_superuser; +DROP ROLE lr_replication; +DROP TABLE lr_test; diff --git a/contrib/test_decoding/expected/rewrite.out b/contrib/test_decoding/expected/rewrite.out new file mode 100644 index 0000000000..ec23ab9024 --- /dev/null +++ b/contrib/test_decoding/expected/rewrite.out @@ -0,0 +1,107 @@ +-- predictability +SET synchronous_commit = on; +DROP TABLE IF EXISTS replication_example; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120)); +INSERT INTO replication_example(somedata) VALUES (1); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +---------------------------------------------------------------------------------------------------------- + BEGIN + COMMIT + BEGIN + table public.replication_example: INSERT: id[integer]:1 somedata[integer]:1 text[character varying]:null + COMMIT +(5 rows) + +BEGIN; +INSERT INTO replication_example(somedata) VALUES (2); +ALTER TABLE replication_example ADD COLUMN testcolumn1 int; +INSERT INTO replication_example(somedata, testcolumn1) VALUES (3, 1); +COMMIT; +BEGIN; +INSERT INTO replication_example(somedata) VALUES (3); +ALTER TABLE replication_example ADD COLUMN testcolumn2 int; +INSERT INTO replication_example(somedata, testcolumn1, testcolumn2) VALUES (4, 2, 1); +COMMIT; +VACUUM FULL pg_am; +VACUUM FULL pg_amop; +VACUUM FULL pg_proc; +VACUUM FULL pg_opclass; +VACUUM FULL pg_type; +VACUUM FULL pg_index; +VACUUM FULL pg_database; +-- repeated rewrites that fail +BEGIN; +CLUSTER pg_class USING pg_class_oid_index; +CLUSTER pg_class USING pg_class_oid_index; +ROLLBACK; +-- repeated rewrites that succeed +BEGIN; +CLUSTER pg_class USING pg_class_oid_index; +CLUSTER pg_class USING pg_class_oid_index; +CLUSTER pg_class USING pg_class_oid_index; +COMMIT; + -- repeated rewrites in different transactions +VACUUM FULL pg_class; +VACUUM FULL pg_class; +INSERT INTO replication_example(somedata, testcolumn1) VALUES (5, 3); +BEGIN; +INSERT INTO replication_example(somedata, testcolumn1) VALUES (6, 4); +ALTER TABLE replication_example ADD COLUMN testcolumn3 int; +INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (7, 5, 1); +COMMIT; +-- make old files go away +CHECKPOINT; +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + data +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + BEGIN + table public.replication_example: INSERT: id[integer]:2 somedata[integer]:2 text[character varying]:null + table public.replication_example: INSERT: id[integer]:3 somedata[integer]:3 text[character varying]:null testcolumn1[integer]:1 + COMMIT + BEGIN + table public.replication_example: INSERT: id[integer]:4 somedata[integer]:3 text[character varying]:null testcolumn1[integer]:null + table public.replication_example: INSERT: id[integer]:5 somedata[integer]:4 text[character varying]:null testcolumn1[integer]:2 testcolumn2[integer]:1 + COMMIT + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + table public.replication_example: INSERT: id[integer]:6 somedata[integer]:5 text[character varying]:null testcolumn1[integer]:3 testcolumn2[integer]:null + COMMIT + BEGIN + table public.replication_example: INSERT: id[integer]:7 somedata[integer]:6 text[character varying]:null testcolumn1[integer]:4 testcolumn2[integer]:null + table public.replication_example: INSERT: id[integer]:8 somedata[integer]:7 text[character varying]:null testcolumn1[integer]:5 testcolumn2[integer]:null testcolumn3[integer]:1 + COMMIT +(35 rows) + +SELECT pg_drop_replication_slot('regression_slot'); + pg_drop_replication_slot +-------------------------- + +(1 row) + +DROP TABLE IF EXISTS replication_example; diff --git a/contrib/test_decoding/expected/toast.out b/contrib/test_decoding/expected/toast.out new file mode 100644 index 0000000000..6adef83f02 --- /dev/null +++ b/contrib/test_decoding/expected/toast.out @@ -0,0 +1,90 @@ +-- predictability +SET synchronous_commit = on; +DROP TABLE IF EXISTS xpto; +NOTICE: table "xpto" does not exist, skipping +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + ?column? +---------- + init +(1 row) + +CREATE SEQUENCE xpto_rand_seq START 79 INCREMENT 1499; -- portable "random" +CREATE TABLE xpto ( + id serial primary key, + toasted_col1 text, + rand1 float8 DEFAULT nextval('xpto_rand_seq'), + toasted_col2 text, + rand2 float8 DEFAULT nextval('xpto_rand_seq') +); +-- uncompressed external toast data +INSERT INTO xpto (toasted_col1, toasted_col2) SELECT string_agg(g.i::text, ''), string_agg((g.i*2)::text, '') FROM generate_series(1, 2000) g(i); +-- compressed external toast data +INSERT INTO xpto (toasted_col2) SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i); +-- update of existing column +UPDATE xpto SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i)) WHERE id = 1; +UPDATE xpto SET rand1 = 123.456 WHERE id = 1; +DELETE FROM xpto WHERE id = 1; +DROP TABLE IF EXISTS toasted_key; +NOTICE: table "toasted_key" does not exist, skipping +CREATE TABLE toasted_key ( + id serial, + toasted_key text PRIMARY KEY, + toasted_col1 text, + toasted_col2 text +); +ALTER TABLE toasted_key ALTER COLUMN toasted_key SET STORAGE EXTERNAL; +ALTER TABLE toasted_key ALTER COLUMN toasted_col1 SET STORAGE EXTERNAL; +INSERT INTO toasted_key(toasted_key, toasted_col1) VALUES(repeat('1234567890', 200), repeat('9876543210', 200)); +-- test update of a toasted key without changing it +UPDATE toasted_key SET toasted_col2 = toasted_col1; +-- test update of a toasted key, changing it +UPDATE toasted_key SET toasted_key = toasted_key || '1'; +DELETE FROM toasted_key; +SELECT substr(data, 1, 200) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + substr +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + table public.xpto: INSERT: id[integer]:1 toasted_col1[text]:'1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 + COMMIT + BEGIN + table public.xpto: INSERT: id[integer]:2 toasted_col1[text]:null rand1[double precision]:3077 toasted_col2[text]:'00010002000300040005000600070008000900100011001200130014001500160017001800190020002100 + COMMIT + BEGIN + table public.xpto: UPDATE: id[integer]:1 toasted_col1[text]:'1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 + COMMIT + BEGIN + table public.xpto: UPDATE: id[integer]:1 toasted_col1[text]:unchanged-toast-datum rand1[double precision]:123.456 toasted_col2[text]:unchanged-toast-datum rand2[double precision]:1578 + COMMIT + BEGIN + table public.xpto: DELETE: id[integer]:1 + COMMIT + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + COMMIT + BEGIN + table public.toasted_key: INSERT: id[integer]:1 toasted_key[text]:'1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123 + COMMIT + BEGIN + table public.toasted_key: UPDATE: id[integer]:1 toasted_key[text]:unchanged-toast-datum toasted_col1[text]:unchanged-toast-datum toasted_col2[text]:'987654321098765432109876543210987654321098765432109 + COMMIT + BEGIN + table public.toasted_key: UPDATE: old-key: toasted_key[text]:'123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678 + COMMIT + BEGIN + table public.toasted_key: DELETE: toasted_key[text]:'123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 + COMMIT +(37 rows) + +SELECT pg_drop_replication_slot('regression_slot'); + pg_drop_replication_slot +-------------------------- + +(1 row) + diff --git a/contrib/test_decoding/logical.conf b/contrib/test_decoding/logical.conf new file mode 100644 index 0000000000..367f706651 --- /dev/null +++ b/contrib/test_decoding/logical.conf @@ -0,0 +1,2 @@ +wal_level = logical +max_replication_slots = 4 diff --git a/contrib/test_decoding/specs/concurrent_ddl_dml.spec b/contrib/test_decoding/specs/concurrent_ddl_dml.spec new file mode 100644 index 0000000000..7c8a7c7977 --- /dev/null +++ b/contrib/test_decoding/specs/concurrent_ddl_dml.spec @@ -0,0 +1,94 @@ +setup +{ + DROP TABLE IF EXISTS tbl1; + DROP TABLE IF EXISTS tbl2; + CREATE TABLE tbl1(val1 integer, val2 integer); + CREATE TABLE tbl2(val1 integer, val2 integer); +} + +teardown +{ + DROP TABLE tbl1; + DROP TABLE tbl2; + SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot'); +} + +session "s1" +step "s1_init" { SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); } +step "s1_begin" { BEGIN; } +step "s1_insert_tbl1" { INSERT INTO tbl1 (val1, val2) VALUES (1, 1); } +step "s1_insert_tbl1_3col" { INSERT INTO tbl1 (val1, val2, val3) VALUES (1, 1, 1); } +step "s1_insert_tbl2" { INSERT INTO tbl2 (val1, val2) VALUES (1, 1); } +step "s1_insert_tbl2_3col" { INSERT INTO tbl2 (val1, val2, val3) VALUES (1, 1, 1); } +step "s1_commit" { COMMIT; } + +session "s2" +step "s2_alter_tbl1_float" { ALTER TABLE tbl1 ALTER COLUMN val2 TYPE float; } +step "s2_alter_tbl1_char" { ALTER TABLE tbl1 ALTER COLUMN val2 TYPE character varying; } +step "s2_alter_tbl1_text" { ALTER TABLE tbl1 ALTER COLUMN val2 TYPE text; } +step "s2_alter_tbl1_boolean" { ALTER TABLE tbl1 ALTER COLUMN val2 TYPE boolean; } + +step "s2_alter_tbl1_add_int" { ALTER TABLE tbl1 ADD COLUMN val3 INTEGER; } +step "s2_alter_tbl1_add_float" { ALTER TABLE tbl1 ADD COLUMN val3 FLOAT; } +step "s2_alter_tbl1_add_char" { ALTER TABLE tbl1 ADD COLUMN val3 character varying; } +step "s2_alter_tbl1_add_boolean" { ALTER TABLE tbl1 ADD COLUMN val3 BOOLEAN; } +step "s2_alter_tbl1_add_text" { ALTER TABLE tbl1 ADD COLUMN val3 TEXT; } + +step "s2_alter_tbl2_float" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE float; } +step "s2_alter_tbl2_char" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE character varying; } +step "s2_alter_tbl2_text" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE text; } +step "s2_alter_tbl2_boolean" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE boolean; } +step "s2_alter_tbl2_text" { ALTER TABLE tbl2 ALTER COLUMN val2 TYPE boolean; } + +step "s2_alter_tbl2_add_int" { ALTER TABLE tbl2 ADD COLUMN val3 INTEGER; } +step "s2_alter_tbl2_add_float" { ALTER TABLE tbl2 ADD COLUMN val3 FLOAT; } +step "s2_alter_tbl2_add_char" { ALTER TABLE tbl2 ADD COLUMN val3 character varying; } +step "s2_alter_tbl2_add_boolean" { ALTER TABLE tbl2 ADD COLUMN val3 BOOLEAN; } +step "s2_alter_tbl2_add_text" { ALTER TABLE tbl2 ADD COLUMN val3 TEXT; } +step "s2_alter_tbl2_drop_3rd_col" { ALTER TABLE tbl2 DROP COLUMN val3; } +step "s2_alter_tbl2_3rd_char" { ALTER TABLE tbl2 ALTER COLUMN val3 TYPE character varying; } +step "s2_alter_tbl2_3rd_text" { ALTER TABLE tbl2 ALTER COLUMN val3 TYPE text; } +step "s2_alter_tbl2_3rd_int" { ALTER TABLE tbl2 ALTER COLUMN val3 TYPE int USING val3::integer; } + +step "s2_get_changes" { SELECT regexp_replace(data, 'temp_\d+', 'temp') AS data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', '0'); } + + + +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_float" "s1_insert_tbl2" "s1_commit" "s2_get_changes" +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl1_float" "s1_insert_tbl2" "s1_commit" "s2_get_changes" +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_char" "s1_insert_tbl2" "s1_commit" "s2_get_changes" +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl1_char" "s1_insert_tbl2" "s1_commit" "s2_get_changes" + +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s2_alter_tbl1_float" "s1_commit" "s2_get_changes" +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s2_alter_tbl1_char" "s1_commit" "s2_get_changes" + +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_float" "s1_insert_tbl2" "s2_alter_tbl1_float" "s1_commit" "s2_get_changes" +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_char" "s1_insert_tbl2" "s2_alter_tbl1_char" "s1_commit" "s2_get_changes" + +permutation "s1_init" "s2_alter_tbl2_char" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_text" "s1_insert_tbl2" "s1_commit" "s2_get_changes" +permutation "s1_init" "s2_alter_tbl2_char" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_text" "s1_insert_tbl2" "s2_alter_tbl1_char" "s1_commit" "s2_get_changes" + +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_boolean" "s1_insert_tbl2" "s1_commit" "s2_get_changes" +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_boolean" "s1_insert_tbl2" "s2_alter_tbl1_boolean" "s1_commit" "s2_get_changes" + +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_add_int" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes" +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s1_commit" "s1_begin" "s2_alter_tbl2_add_int" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes" + +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_add_float" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes" +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s1_commit" "s1_begin" "s2_alter_tbl2_add_float" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes" + +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_add_char" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes" +permutation "s1_init" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2" "s1_commit" "s1_begin" "s2_alter_tbl2_add_char" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes" + +permutation "s1_init" "s2_alter_tbl2_add_int" "s1_begin" "s1_insert_tbl2_3col" "s2_alter_tbl2_drop_3rd_col" "s1_commit" "s2_get_changes" +permutation "s1_init" "s2_alter_tbl2_add_int" "s1_begin" "s1_insert_tbl2_3col" "s2_alter_tbl2_drop_3rd_col" "s1_insert_tbl2" "s1_commit" "s1_insert_tbl2" "s2_get_changes" + +permutation "s1_init" "s2_alter_tbl2_add_int" "s1_begin" "s1_insert_tbl2_3col" "s2_alter_tbl2_drop_3rd_col" "s1_commit" "s2_get_changes" "s2_alter_tbl2_add_text" "s1_begin" "s1_insert_tbl2_3col" "s2_alter_tbl2_3rd_char" "s1_insert_tbl2_3col" "s1_commit" "s2_get_changes" "s2_alter_tbl2_3rd_int" "s1_insert_tbl2_3col" "s2_get_changes" + +permutation "s1_init" "s2_alter_tbl2_add_char" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2_3col" "s2_alter_tbl2_3rd_text" "s1_insert_tbl2_3col" "s1_commit" "s1_insert_tbl2_3col" "s2_get_changes" +permutation "s1_init" "s2_alter_tbl2_add_text" "s1_begin" "s1_insert_tbl1" "s1_insert_tbl2_3col" "s2_alter_tbl2_3rd_char" "s1_insert_tbl2_3col" "s1_commit" "s1_insert_tbl2_3col" "s2_get_changes" + +permutation "s1_init" "s2_alter_tbl2_add_char" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_3rd_text" "s1_insert_tbl2_3col" "s1_commit" "s2_alter_tbl2_drop_3rd_col" "s1_insert_tbl2" "s2_get_changes" +permutation "s1_init" "s2_alter_tbl2_add_text" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_3rd_char" "s1_insert_tbl2_3col" "s1_commit" "s2_alter_tbl2_drop_3rd_col" "s1_insert_tbl2" "s2_get_changes" + +permutation "s1_init" "s2_alter_tbl2_add_char" "s1_begin" "s1_insert_tbl1" "s2_alter_tbl2_drop_3rd_col" "s1_insert_tbl1" "s1_commit" "s2_get_changes" diff --git a/contrib/test_decoding/specs/delayed_startup.spec b/contrib/test_decoding/specs/delayed_startup.spec new file mode 100644 index 0000000000..b7fe8148ce --- /dev/null +++ b/contrib/test_decoding/specs/delayed_startup.spec @@ -0,0 +1,24 @@ +setup +{ + DROP TABLE IF EXISTS do_write; + CREATE TABLE do_write(id serial primary key); +} + +teardown +{ + DROP TABLE do_write; + SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot'); +} + +session "s1" +setup { SET synchronous_commit=on; } +step "s1b" { BEGIN ISOLATION LEVEL SERIALIZABLE; } +step "s1w" { INSERT INTO do_write DEFAULT VALUES; } +step "s1c" { COMMIT; } +session "s2" +setup { SET synchronous_commit=on; } +step "s2init" {SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');} +step "s2start" {SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');} + + +permutation "s1b" "s1w" "s2init" "s1c" "s2start" "s1b" "s1w" "s1c" "s2start" "s1b" "s1w" "s2start" "s1c" "s2start" diff --git a/contrib/test_decoding/specs/mxact.spec b/contrib/test_decoding/specs/mxact.spec new file mode 100644 index 0000000000..ea5b1aa2d6 --- /dev/null +++ b/contrib/test_decoding/specs/mxact.spec @@ -0,0 +1,38 @@ +setup +{ + DROP TABLE IF EXISTS do_write; + CREATE TABLE do_write(id serial primary key); +} + +teardown +{ + DROP TABLE IF EXISTS do_write; + SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot'); +} + +session "s0" +setup { SET synchronous_commit=on; } +step "s0init" {SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');} +step "s0start" {SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'include-xids', 'false');} +step "s0alter" {ALTER TABLE do_write ADD column ts timestamptz; } +step "s0w" { INSERT INTO do_write DEFAULT VALUES; } + +session "s1" +setup { SET synchronous_commit=on; } +step "s1begin" {BEGIN;} +step "s1sharepgclass" { SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR SHARE) s; } +step "s1keysharepgclass" { SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR KEY SHARE) s; } +step "s1commit" {COMMIT;} + +session "s2" +setup { SET synchronous_commit=on; } +step "s2begin" {BEGIN;} +step "s2sharepgclass" { SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR SHARE) s; } +step "s2keysharepgclass" { SELECT count(*) > 1 FROM (SELECT * FROM pg_class FOR KEY SHARE) s; } +step "s2commit" {COMMIT;} + +# test that we're handling an update-only mxact xmax correctly +permutation "s0init" "s0start" "s1begin" "s1sharepgclass" "s2begin" "s2sharepgclass" "s0w" "s0start" "s2commit" "s1commit" + +# test that we're handling an update-only mxact xmax correctly +permutation "s0init" "s0start" "s1begin" "s1keysharepgclass" "s2begin" "s2keysharepgclass" "s0alter" "s0w" "s0start" "s2commit" "s1commit" diff --git a/contrib/test_decoding/sql/binary.sql b/contrib/test_decoding/sql/binary.sql new file mode 100644 index 0000000000..619f00b3bc --- /dev/null +++ b/contrib/test_decoding/sql/binary.sql @@ -0,0 +1,14 @@ +-- predictability +SET synchronous_commit = on; + +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +-- succeeds, textual plugin, textual consumer +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'force-binary', '0'); +-- fails, binary plugin, textual consumer +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'force-binary', '1'); +-- succeeds, textual plugin, binary consumer +SELECT data FROM pg_logical_slot_get_binary_changes('regression_slot', NULL, NULL, 'force-binary', '0'); +-- succeeds, binary plugin, binary consumer +SELECT data FROM pg_logical_slot_get_binary_changes('regression_slot', NULL, NULL, 'force-binary', '1'); + +SELECT 'init' FROM pg_drop_replication_slot('regression_slot'); diff --git a/contrib/test_decoding/sql/ddl.sql b/contrib/test_decoding/sql/ddl.sql new file mode 100644 index 0000000000..b4807e991b --- /dev/null +++ b/contrib/test_decoding/sql/ddl.sql @@ -0,0 +1,337 @@ +-- predictability +SET synchronous_commit = on; + +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +-- fail because of an already existing slot +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +-- fail because of an invalid name +SELECT 'init' FROM pg_create_logical_replication_slot('Invalid Name', 'test_decoding'); + +-- fail twice because of an invalid parameter values +SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', 'frakbar'); +SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'nonexistant-option', 'frakbar'); +SELECT 'init' FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', 'frakbar'); + +-- succeed once +SELECT pg_drop_replication_slot('regression_slot'); +-- fail +SELECT pg_drop_replication_slot('regression_slot'); + +-- check that we're detecting a streaming rep slot used for logical decoding +SELECT 'init' FROM pg_create_physical_replication_slot('repl'); +SELECT data FROM pg_logical_slot_get_changes('repl', NULL, NULL, 'include-xids', '0'); +SELECT pg_drop_replication_slot('repl'); + + +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + +/* check whether status function reports us, only reproduceable columns */ +SELECT slot_name, plugin, slot_type, active, + NOT catalog_xmin IS NULL AS catalog_xmin_set, + xmin IS NULl AS data_xmin_not_set, + pg_xlog_location_diff(restart_lsn, '0/01000000') > 0 AS some_wal +FROM pg_replication_slots; + +/* + * Check that changes are handled correctly when interleaved with ddl + */ +CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120)); +BEGIN; +INSERT INTO replication_example(somedata, text) VALUES (1, 1); +INSERT INTO replication_example(somedata, text) VALUES (1, 2); +COMMIT; + +ALTER TABLE replication_example ADD COLUMN bar int; + +INSERT INTO replication_example(somedata, text, bar) VALUES (2, 1, 4); + +BEGIN; +INSERT INTO replication_example(somedata, text, bar) VALUES (2, 2, 4); +INSERT INTO replication_example(somedata, text, bar) VALUES (2, 3, 4); +INSERT INTO replication_example(somedata, text, bar) VALUES (2, 4, NULL); +COMMIT; + +ALTER TABLE replication_example DROP COLUMN bar; +INSERT INTO replication_example(somedata, text) VALUES (3, 1); + +BEGIN; +INSERT INTO replication_example(somedata, text) VALUES (3, 2); +INSERT INTO replication_example(somedata, text) VALUES (3, 3); +COMMIT; + +ALTER TABLE replication_example RENAME COLUMN text TO somenum; + +INSERT INTO replication_example(somedata, somenum) VALUES (4, 1); + +-- collect all changes +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +ALTER TABLE replication_example ALTER COLUMN somenum TYPE int4 USING (somenum::int4); +-- throw away changes, they contain oids +SELECT count(data) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +INSERT INTO replication_example(somedata, somenum) VALUES (5, 1); + +BEGIN; +INSERT INTO replication_example(somedata, somenum) VALUES (6, 1); +ALTER TABLE replication_example ADD COLUMN zaphod1 int; +INSERT INTO replication_example(somedata, somenum, zaphod1) VALUES (6, 2, 1); +ALTER TABLE replication_example ADD COLUMN zaphod2 int; +INSERT INTO replication_example(somedata, somenum, zaphod2) VALUES (6, 3, 1); +INSERT INTO replication_example(somedata, somenum, zaphod1) VALUES (6, 4, 2); +COMMIT; + +-- show changes +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +-- hide changes bc of oid visible in full table rewrites +CREATE TABLE tr_unique(id2 serial unique NOT NULL, data int); +INSERT INTO tr_unique(data) VALUES(10); +ALTER TABLE tr_unique RENAME TO tr_pkey; +ALTER TABLE tr_pkey ADD COLUMN id serial primary key; +SELECT count(data) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +INSERT INTO tr_pkey(data) VALUES(1); +--show deletion with primary key +DELETE FROM tr_pkey; + +/* display results */ +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +/* + * check that disk spooling works + */ +BEGIN; +CREATE TABLE tr_etoomuch (id serial primary key, data int); +INSERT INTO tr_etoomuch(data) SELECT g.i FROM generate_series(1, 10234) g(i); +DELETE FROM tr_etoomuch WHERE id < 5000; +UPDATE tr_etoomuch SET data = - data WHERE id > 5000; +COMMIT; + +/* display results, but hide most of the output */ +SELECT count(*), min(data), max(data) +FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0') +GROUP BY substring(data, 1, 24) +ORDER BY 1; + +/* + * check whether we decode subtransactions correctly in relation with each + * other + */ +CREATE TABLE tr_sub (id serial primary key, path text); + +-- toplevel, subtxn, toplevel, subtxn, subtxn +BEGIN; +INSERT INTO tr_sub(path) VALUES ('1-top-#1'); + +SAVEPOINT a; +INSERT INTO tr_sub(path) VALUES ('1-top-1-#1'); +INSERT INTO tr_sub(path) VALUES ('1-top-1-#2'); +RELEASE SAVEPOINT a; + +SAVEPOINT b; +SAVEPOINT c; +INSERT INTO tr_sub(path) VALUES ('1-top-2-1-#1'); +INSERT INTO tr_sub(path) VALUES ('1-top-2-1-#2'); +RELEASE SAVEPOINT c; +INSERT INTO tr_sub(path) VALUES ('1-top-2-#1'); +RELEASE SAVEPOINT b; +COMMIT; + +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +-- check that we handle xlog assignments correctly +BEGIN; +-- nest 80 subtxns +SAVEPOINT subtop;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a;SAVEPOINT a; +-- assign xid by inserting +INSERT INTO tr_sub(path) VALUES ('2-top-1...--#1'); +INSERT INTO tr_sub(path) VALUES ('2-top-1...--#2'); +INSERT INTO tr_sub(path) VALUES ('2-top-1...--#3'); +RELEASE SAVEPOINT subtop; +INSERT INTO tr_sub(path) VALUES ('2-top-#1'); +COMMIT; + +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +-- make sure rollbacked subtransactions aren't decoded +BEGIN; +INSERT INTO tr_sub(path) VALUES ('3-top-2-#1'); +SAVEPOINT a; +INSERT INTO tr_sub(path) VALUES ('3-top-2-1-#1'); +SAVEPOINT b; +INSERT INTO tr_sub(path) VALUES ('3-top-2-2-#1'); +ROLLBACK TO SAVEPOINT b; +INSERT INTO tr_sub(path) VALUES ('3-top-2-#2'); +COMMIT; + +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +-- test whether a known, but not yet logged toplevel xact, followed by a +-- subxact commit is handled correctly +BEGIN; +SELECT txid_current() != 0; -- so no fixed xid apears in the outfile +SAVEPOINT a; +INSERT INTO tr_sub(path) VALUES ('4-top-1-#1'); +RELEASE SAVEPOINT a; +COMMIT; + +-- test whether a change in a subtransaction, in an unknown toplevel +-- xact is handled correctly. +BEGIN; +SAVEPOINT a; +INSERT INTO tr_sub(path) VALUES ('5-top-1-#1'); +COMMIT; + + +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + + +/* + * Check whether treating a table as a catalog table works somewhat + */ +CREATE TABLE replication_metadata ( + id serial primary key, + relation name NOT NULL, + options text[] +) +WITH (user_catalog_table = true) +; +\d+ replication_metadata + +INSERT INTO replication_metadata(relation, options) +VALUES ('foo', ARRAY['a', 'b']); + +ALTER TABLE replication_metadata RESET (user_catalog_table); +\d+ replication_metadata + +INSERT INTO replication_metadata(relation, options) +VALUES ('bar', ARRAY['a', 'b']); + +ALTER TABLE replication_metadata SET (user_catalog_table = true); +\d+ replication_metadata + +INSERT INTO replication_metadata(relation, options) +VALUES ('blub', NULL); + +-- make sure rewrites don't work +ALTER TABLE replication_metadata ADD COLUMN rewritemeornot int; +ALTER TABLE replication_metadata ALTER COLUMN rewritemeornot TYPE text; + +ALTER TABLE replication_metadata SET (user_catalog_table = false); +\d+ replication_metadata + +INSERT INTO replication_metadata(relation, options) +VALUES ('zaphod', NULL); + +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +/* + * check whether we handle updates/deletes correct with & without a pkey + */ + +/* we should handle the case without a key at all more gracefully */ +CREATE TABLE table_without_key(id serial, data int); +INSERT INTO table_without_key(data) VALUES(1),(2); +DELETE FROM table_without_key WHERE data = 1; +-- won't log old keys +UPDATE table_without_key SET data = 3 WHERE data = 2; +UPDATE table_without_key SET id = -id; +UPDATE table_without_key SET id = -id; +-- should log the full old row now +ALTER TABLE table_without_key REPLICA IDENTITY FULL; +UPDATE table_without_key SET data = 3 WHERE data = 2; +UPDATE table_without_key SET id = -id; +UPDATE table_without_key SET id = -id; +DELETE FROM table_without_key WHERE data = 3; + +CREATE TABLE table_with_pkey(id serial primary key, data int); +INSERT INTO table_with_pkey(data) VALUES(1), (2); +DELETE FROM table_with_pkey WHERE data = 1; +-- should log the old pkey +UPDATE table_with_pkey SET data = 3 WHERE data = 2; +UPDATE table_with_pkey SET id = -id; +UPDATE table_with_pkey SET id = -id; +-- check that we log nothing despite having a pkey +ALTER TABLE table_without_key REPLICA IDENTITY NOTHING; +UPDATE table_with_pkey SET id = -id; +-- check that we log everything despite having a pkey +ALTER TABLE table_without_key REPLICA IDENTITY FULL; +UPDATE table_with_pkey SET id = -id; +DELETE FROM table_with_pkey WHERE data = 3; + +CREATE TABLE table_with_unique_not_null(id serial unique, data int); +ALTER TABLE table_with_unique_not_null ALTER COLUMN id SET NOT NULL; --already set +-- won't log anything, replica identity not setup +INSERT INTO table_with_unique_not_null(data) VALUES(1), (2); +DELETE FROM table_with_unique_not_null WHERE data = 1; +UPDATE table_with_unique_not_null SET data = 3 WHERE data = 2; +UPDATE table_with_unique_not_null SET id = -id; +UPDATE table_with_unique_not_null SET id = -id; +DELETE FROM table_with_unique_not_null WHERE data = 3; +-- should log old key +ALTER TABLE table_with_unique_not_null REPLICA IDENTITY USING INDEX table_with_unique_not_null_id_key; +INSERT INTO table_with_unique_not_null(data) VALUES(1), (2); +DELETE FROM table_with_unique_not_null WHERE data = 1; +UPDATE table_with_unique_not_null SET data = 3 WHERE data = 2; +UPDATE table_with_unique_not_null SET id = -id; +UPDATE table_with_unique_not_null SET id = -id; +DELETE FROM table_with_unique_not_null WHERE data = 3; + +-- check toast support +BEGIN; +CREATE SEQUENCE toasttable_rand_seq START 79 INCREMENT 1499; -- portable "random" +CREATE TABLE toasttable( + id serial primary key, + toasted_col1 text, + rand1 float8 DEFAULT nextval('toasttable_rand_seq'), + toasted_col2 text, + rand2 float8 DEFAULT nextval('toasttable_rand_seq') + ); +COMMIT; +-- uncompressed external toast data +INSERT INTO toasttable(toasted_col1) SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i); + +-- compressed external toast data +INSERT INTO toasttable(toasted_col2) SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i); + +-- update of existing column +UPDATE toasttable + SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i)) +WHERE id = 1; + +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +INSERT INTO toasttable(toasted_col1) SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i); + +-- update of second column, first column unchanged +UPDATE toasttable + SET toasted_col2 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i)) +WHERE id = 1; + +-- make sure we decode correctly even if the toast table is gone +DROP TABLE toasttable; + +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +-- done, free logical replication slot +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); +SELECT pg_drop_replication_slot('regression_slot'); + +/* check that we aren't visible anymore now */ +SELECT * FROM pg_stat_replication; diff --git a/contrib/test_decoding/sql/decoding_in_xact.sql b/contrib/test_decoding/sql/decoding_in_xact.sql new file mode 100644 index 0000000000..2771afee7a --- /dev/null +++ b/contrib/test_decoding/sql/decoding_in_xact.sql @@ -0,0 +1,41 @@ +-- predictability +SET synchronous_commit = on; + +-- fail because we're creating a slot while in an xact with xid +BEGIN; +SELECT txid_current() = 0; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +ROLLBACK; + +-- fail because we're creating a slot while in an subxact whose topxact has a xid +BEGIN; +SELECT txid_current() = 0; +SAVEPOINT barf; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +ROLLBACK TO SAVEPOINT barf; +ROLLBACK; + +-- succeed, outside tx. +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +SELECT 'stop' FROM pg_drop_replication_slot('regression_slot'); + +-- succeed, in tx without xid. +BEGIN; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +COMMIT; + +CREATE TABLE nobarf(id serial primary key, data text); +INSERT INTO nobarf(data) VALUES('1'); + +-- decoding works in transaction with xid +BEGIN; +SELECT txid_current() = 0; +-- don't show yet, haven't committed +INSERT INTO nobarf(data) VALUES('2'); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); +COMMIT; + +INSERT INTO nobarf(data) VALUES('3'); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +SELECT 'stop' FROM pg_drop_replication_slot('regression_slot'); diff --git a/contrib/test_decoding/sql/permissions.sql b/contrib/test_decoding/sql/permissions.sql new file mode 100644 index 0000000000..39d70b56b0 --- /dev/null +++ b/contrib/test_decoding/sql/permissions.sql @@ -0,0 +1,69 @@ +-- predictability +SET synchronous_commit = on; + +-- setup +CREATE ROLE lr_normal; +CREATE ROLE lr_superuser SUPERUSER; +CREATE ROLE lr_replication REPLICATION; +CREATE TABLE lr_test(data text); + +-- superuser can control replication +SET ROLE lr_superuser; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +INSERT INTO lr_test VALUES('lr_superuser_init'); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); +SELECT pg_drop_replication_slot('regression_slot'); +RESET ROLE; + +-- replication user can control replication +SET ROLE lr_replication; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +INSERT INTO lr_test VALUES('lr_superuser_init'); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); +SELECT pg_drop_replication_slot('regression_slot'); +RESET ROLE; + +-- plain user *can't* can control replication +SET ROLE lr_normal; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +INSERT INTO lr_test VALUES('lr_superuser_init'); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); +SELECT pg_drop_replication_slot('regression_slot'); +RESET ROLE; + +-- replication users can drop superuser created slots +SET ROLE lr_superuser; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +RESET ROLE; +SET ROLE lr_replication; +SELECT pg_drop_replication_slot('regression_slot'); +RESET ROLE; + +-- normal users can't drop existing slots +SET ROLE lr_superuser; +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +RESET ROLE; +SET ROLE lr_normal; +SELECT pg_drop_replication_slot('regression_slot'); +RESET ROLE; + +-- all users can see existing slots +SET ROLE lr_superuser; +SELECT slot_name, plugin FROM pg_replication_slots; +RESET ROLE; + +SET ROLE lr_replication; +SELECT slot_name, plugin FROM pg_replication_slots; +RESET ROLE; + +SET ROLE lr_normal; +SELECT slot_name, plugin FROM pg_replication_slots; +RESET ROLE; + +-- cleanup +SELECT pg_drop_replication_slot('regression_slot'); + +DROP ROLE lr_normal; +DROP ROLE lr_superuser; +DROP ROLE lr_replication; +DROP TABLE lr_test; diff --git a/contrib/test_decoding/sql/rewrite.sql b/contrib/test_decoding/sql/rewrite.sql new file mode 100644 index 0000000000..9a3dcbf857 --- /dev/null +++ b/contrib/test_decoding/sql/rewrite.sql @@ -0,0 +1,62 @@ +-- predictability +SET synchronous_commit = on; + +DROP TABLE IF EXISTS replication_example; + +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); +CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120)); +INSERT INTO replication_example(somedata) VALUES (1); +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); + +BEGIN; +INSERT INTO replication_example(somedata) VALUES (2); +ALTER TABLE replication_example ADD COLUMN testcolumn1 int; +INSERT INTO replication_example(somedata, testcolumn1) VALUES (3, 1); +COMMIT; + +BEGIN; +INSERT INTO replication_example(somedata) VALUES (3); +ALTER TABLE replication_example ADD COLUMN testcolumn2 int; +INSERT INTO replication_example(somedata, testcolumn1, testcolumn2) VALUES (4, 2, 1); +COMMIT; + +VACUUM FULL pg_am; +VACUUM FULL pg_amop; +VACUUM FULL pg_proc; +VACUUM FULL pg_opclass; +VACUUM FULL pg_type; +VACUUM FULL pg_index; +VACUUM FULL pg_database; + +-- repeated rewrites that fail +BEGIN; +CLUSTER pg_class USING pg_class_oid_index; +CLUSTER pg_class USING pg_class_oid_index; +ROLLBACK; + +-- repeated rewrites that succeed +BEGIN; +CLUSTER pg_class USING pg_class_oid_index; +CLUSTER pg_class USING pg_class_oid_index; +CLUSTER pg_class USING pg_class_oid_index; +COMMIT; + + -- repeated rewrites in different transactions +VACUUM FULL pg_class; +VACUUM FULL pg_class; + +INSERT INTO replication_example(somedata, testcolumn1) VALUES (5, 3); + +BEGIN; +INSERT INTO replication_example(somedata, testcolumn1) VALUES (6, 4); +ALTER TABLE replication_example ADD COLUMN testcolumn3 int; +INSERT INTO replication_example(somedata, testcolumn1, testcolumn3) VALUES (7, 5, 1); +COMMIT; + +-- make old files go away +CHECKPOINT; + +SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); +SELECT pg_drop_replication_slot('regression_slot'); + +DROP TABLE IF EXISTS replication_example; diff --git a/contrib/test_decoding/sql/toast.sql b/contrib/test_decoding/sql/toast.sql new file mode 100644 index 0000000000..943db9d2ee --- /dev/null +++ b/contrib/test_decoding/sql/toast.sql @@ -0,0 +1,51 @@ +-- predictability +SET synchronous_commit = on; + +DROP TABLE IF EXISTS xpto; + +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'test_decoding'); + +CREATE SEQUENCE xpto_rand_seq START 79 INCREMENT 1499; -- portable "random" +CREATE TABLE xpto ( + id serial primary key, + toasted_col1 text, + rand1 float8 DEFAULT nextval('xpto_rand_seq'), + toasted_col2 text, + rand2 float8 DEFAULT nextval('xpto_rand_seq') +); + +-- uncompressed external toast data +INSERT INTO xpto (toasted_col1, toasted_col2) SELECT string_agg(g.i::text, ''), string_agg((g.i*2)::text, '') FROM generate_series(1, 2000) g(i); + +-- compressed external toast data +INSERT INTO xpto (toasted_col2) SELECT repeat(string_agg(to_char(g.i, 'FM0000'), ''), 50) FROM generate_series(1, 500) g(i); + +-- update of existing column +UPDATE xpto SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i)) WHERE id = 1; + +UPDATE xpto SET rand1 = 123.456 WHERE id = 1; + +DELETE FROM xpto WHERE id = 1; + +DROP TABLE IF EXISTS toasted_key; +CREATE TABLE toasted_key ( + id serial, + toasted_key text PRIMARY KEY, + toasted_col1 text, + toasted_col2 text +); + +ALTER TABLE toasted_key ALTER COLUMN toasted_key SET STORAGE EXTERNAL; +ALTER TABLE toasted_key ALTER COLUMN toasted_col1 SET STORAGE EXTERNAL; + +INSERT INTO toasted_key(toasted_key, toasted_col1) VALUES(repeat('1234567890', 200), repeat('9876543210', 200)); + +-- test update of a toasted key without changing it +UPDATE toasted_key SET toasted_col2 = toasted_col1; +-- test update of a toasted key, changing it +UPDATE toasted_key SET toasted_key = toasted_key || '1'; + +DELETE FROM toasted_key; + +SELECT substr(data, 1, 200) FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0'); +SELECT pg_drop_replication_slot('regression_slot'); diff --git a/contrib/test_decoding/test_decoding.c b/contrib/test_decoding/test_decoding.c new file mode 100644 index 0000000000..ea463fb2e7 --- /dev/null +++ b/contrib/test_decoding/test_decoding.c @@ -0,0 +1,404 @@ +/*------------------------------------------------------------------------- + * + * test_decoding.c + * example logical decoding output plugin + * + * Copyright (c) 2012-2014, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/test_decoding/test_decoding.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/sysattr.h" + +#include "catalog/pg_class.h" +#include "catalog/pg_type.h" + +#include "nodes/parsenodes.h" + +#include "replication/output_plugin.h" +#include "replication/logical.h" + +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/relcache.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + + +PG_MODULE_MAGIC; + +extern void _PG_init(void); +extern void _PG_output_plugin_init(OutputPluginCallbacks *cb); + +typedef struct +{ + MemoryContext context; + bool include_xids; + bool include_timestamp; +} TestDecodingData; + +/* These must be available to pg_dlsym() */ +static void pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt, + bool is_init); +static void pg_decode_shutdown(LogicalDecodingContext *ctx); +static void pg_decode_begin_txn(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn); +static void pg_decode_commit_txn(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, XLogRecPtr commit_lsn); +static void pg_decode_change(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, Relation rel, + ReorderBufferChange *change); + +void +_PG_init(void) +{ + /* other plugins can perform things here */ +} + +/* specify output plugin callbacks */ +void +_PG_output_plugin_init(OutputPluginCallbacks *cb) +{ + AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit); + + cb->startup_cb = pg_decode_startup; + cb->begin_cb = pg_decode_begin_txn; + cb->change_cb = pg_decode_change; + cb->commit_cb = pg_decode_commit_txn; + cb->shutdown_cb = pg_decode_shutdown; +} + + +/* initialize this plugin */ +static void +pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt, + bool is_init) +{ + ListCell *option; + TestDecodingData *data; + + data = palloc(sizeof(TestDecodingData)); + data->context = AllocSetContextCreate(ctx->context, + "text conversion context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + data->include_xids = true; + data->include_timestamp = false; + + ctx->output_plugin_private = data; + + opt->output_type = OUTPUT_PLUGIN_TEXTUAL_OUTPUT; + + foreach(option, ctx->output_plugin_options) + { + DefElem *elem = lfirst(option); + + Assert(elem->arg == NULL || IsA(elem->arg, String)); + + if (strcmp(elem->defname, "include-xids") == 0) + { + /* if option does not provide a value, it means its value is true */ + if (elem->arg == NULL) + data->include_xids = true; + else if (!parse_bool(strVal(elem->arg), &data->include_xids)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse value \"%s\" for parameter \"%s\"", + strVal(elem->arg), elem->defname))); + } + else if (strcmp(elem->defname, "include-timestamp") == 0) + { + if (elem->arg == NULL) + data->include_timestamp = true; + else if (!parse_bool(strVal(elem->arg), &data->include_timestamp)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse value \"%s\" for parameter \"%s\"", + strVal(elem->arg), elem->defname))); + } + else if (strcmp(elem->defname, "force-binary") == 0) + { + bool force_binary; + + if (elem->arg == NULL) + continue; + else if (!parse_bool(strVal(elem->arg), &force_binary)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse value \"%s\" for parameter \"%s\"", + strVal(elem->arg), elem->defname))); + + if (force_binary) + opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("option \"%s\" = \"%s\" is unknown", + elem->defname, + elem->arg ? strVal(elem->arg) : "(null)"))); + } + } +} + +/* cleanup this plugin's resources */ +static void +pg_decode_shutdown(LogicalDecodingContext *ctx) +{ + TestDecodingData *data = ctx->output_plugin_private; + + /* cleanup our own resources via memory context reset */ + MemoryContextDelete(data->context); +} + +/* BEGIN callback */ +static void +pg_decode_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn) +{ + TestDecodingData *data = ctx->output_plugin_private; + + OutputPluginPrepareWrite(ctx, true); + if (data->include_xids) + appendStringInfo(ctx->out, "BEGIN %u", txn->xid); + else + appendStringInfoString(ctx->out, "BEGIN"); + OutputPluginWrite(ctx, true); +} + +/* COMMIT callback */ +static void +pg_decode_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, + XLogRecPtr commit_lsn) +{ + TestDecodingData *data = ctx->output_plugin_private; + + OutputPluginPrepareWrite(ctx, true); + if (data->include_xids) + appendStringInfo(ctx->out, "COMMIT %u", txn->xid); + else + appendStringInfoString(ctx->out, "COMMIT"); + + if (data->include_timestamp) + appendStringInfo(ctx->out, " (at %s)", + timestamptz_to_str(txn->commit_time)); + + OutputPluginWrite(ctx, true); +} + +/* + * Print literal `outputstr' already represented as string of type `typid' + * into stringbuf `s'. + * + * Some builtin types aren't quoted, the rest is quoted. Escaping is done as + * if standard_conforming_strings were enabled. + */ +static void +print_literal(StringInfo s, Oid typid, char *outputstr) +{ + const char *valptr; + + switch (typid) + { + case INT2OID: + case INT4OID: + case INT8OID: + case OIDOID: + case FLOAT4OID: + case FLOAT8OID: + case NUMERICOID: + /* NB: We don't care about Inf, NaN et al. */ + appendStringInfoString(s, outputstr); + break; + + case BITOID: + case VARBITOID: + appendStringInfo(s, "B'%s'", outputstr); + break; + + case BOOLOID: + if (strcmp(outputstr, "t") == 0) + appendStringInfoString(s, "true"); + else + appendStringInfoString(s, "false"); + break; + + default: + appendStringInfoChar(s, '\''); + for (valptr = outputstr; *valptr; valptr++) + { + char ch = *valptr; + + if (SQL_STR_DOUBLE(ch, false)) + appendStringInfoChar(s, ch); + appendStringInfoChar(s, ch); + } + appendStringInfoChar(s, '\''); + break; + } +} + +/* print the tuple 'tuple' into the StringInfo s */ +static void +tuple_to_stringinfo(StringInfo s, TupleDesc tupdesc, HeapTuple tuple, bool skip_nulls) +{ + int natt; + Oid oid; + + /* print oid of tuple, it's not included in the TupleDesc */ + if ((oid = HeapTupleHeaderGetOid(tuple->t_data)) != InvalidOid) + { + appendStringInfo(s, " oid[oid]:%u", oid); + } + + /* print all columns individually */ + for (natt = 0; natt < tupdesc->natts; natt++) + { + Form_pg_attribute attr; /* the attribute itself */ + Oid typid; /* type of current attribute */ + Oid typoutput; /* output function */ + bool typisvarlena; + Datum origval; /* possibly toasted Datum */ + bool isnull; /* column is null? */ + + attr = tupdesc->attrs[natt]; + + /* + * don't print dropped columns, we can't be sure everything is + * available for them + */ + if (attr->attisdropped) + continue; + + /* + * Don't print system columns, oid will already have been printed if + * present. + */ + if (attr->attnum < 0) + continue; + + typid = attr->atttypid; + + /* get Datum from tuple */ + origval = fastgetattr(tuple, natt + 1, tupdesc, &isnull); + + if (isnull && skip_nulls) + continue; + + /* print attribute name */ + appendStringInfoChar(s, ' '); + appendStringInfoString(s, quote_identifier(NameStr(attr->attname))); + + /* print attribute type */ + appendStringInfoChar(s, '['); + appendStringInfoString(s, format_type_be(typid)); + appendStringInfoChar(s, ']'); + + /* query output function */ + getTypeOutputInfo(typid, + &typoutput, &typisvarlena); + + /* print separator */ + appendStringInfoChar(s, ':'); + + /* print data */ + if (isnull) + appendStringInfoString(s, "null"); + else if (typisvarlena && VARATT_IS_EXTERNAL_ONDISK(origval)) + appendStringInfoString(s, "unchanged-toast-datum"); + else if (!typisvarlena) + print_literal(s, typid, + OidOutputFunctionCall(typoutput, origval)); + else + { + Datum val; /* definitely detoasted Datum */ + val = PointerGetDatum(PG_DETOAST_DATUM(origval)); + print_literal(s, typid, OidOutputFunctionCall(typoutput, val)); + } + } +} + +/* + * callback for individual changed tuples + */ +static void +pg_decode_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change) +{ + TestDecodingData *data; + Form_pg_class class_form; + TupleDesc tupdesc; + MemoryContext old; + + data = ctx->output_plugin_private; + class_form = RelationGetForm(relation); + tupdesc = RelationGetDescr(relation); + + /* Avoid leaking memory by using and resetting our own context */ + old = MemoryContextSwitchTo(data->context); + + OutputPluginPrepareWrite(ctx, true); + + appendStringInfoString(ctx->out, "table "); + appendStringInfoString(ctx->out, + quote_qualified_identifier( + get_namespace_name( + get_rel_namespace(RelationGetRelid(relation))), + NameStr(class_form->relname))); + appendStringInfoString(ctx->out, ":"); + + switch (change->action) + { + case REORDER_BUFFER_CHANGE_INSERT: + appendStringInfoString(ctx->out, " INSERT:"); + if (change->tp.newtuple == NULL) + appendStringInfoString(ctx->out, " (no-tuple-data)"); + else + tuple_to_stringinfo(ctx->out, tupdesc, + &change->tp.newtuple->tuple, + false); + break; + case REORDER_BUFFER_CHANGE_UPDATE: + appendStringInfoString(ctx->out, " UPDATE:"); + if (change->tp.oldtuple != NULL) + { + appendStringInfoString(ctx->out, " old-key:"); + tuple_to_stringinfo(ctx->out, tupdesc, + &change->tp.oldtuple->tuple, + true); + appendStringInfoString(ctx->out, " new-tuple:"); + } + + if (change->tp.newtuple == NULL) + appendStringInfoString(ctx->out, " (no-tuple-data)"); + else + tuple_to_stringinfo(ctx->out, tupdesc, + &change->tp.newtuple->tuple, + false); + break; + case REORDER_BUFFER_CHANGE_DELETE: + appendStringInfoString(ctx->out, " DELETE:"); + + /* if there was no PK, we only know that a delete happened */ + if (change->tp.oldtuple == NULL) + appendStringInfoString(ctx->out, " (no-tuple-data)"); + /* In DELETE, only the replica identity is present; display that */ + else + tuple_to_stringinfo(ctx->out, tupdesc, + &change->tp.oldtuple->tuple, + true); + break; + } + + MemoryContextSwitchTo(old); + MemoryContextReset(data->context); + + OutputPluginWrite(ctx, true); +} diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index 336ba0c564..ec68f10b65 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -140,6 +140,7 @@ CREATE EXTENSION module_name FROM unpackaged; &sslinfo; &tablefunc; &tcn; + &test-decoding; &test-parser; &test-shm-mq; &tsearch2; diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 09de4bd051..0e863ee064 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -143,6 +143,7 @@ + diff --git a/doc/src/sgml/test-decoding.sgml b/doc/src/sgml/test-decoding.sgml new file mode 100644 index 0000000000..0fe3d0a45a --- /dev/null +++ b/doc/src/sgml/test-decoding.sgml @@ -0,0 +1,42 @@ + + + + test_decoding + + + test_decoding + + + + test_decoding is an example of a logical decoding + output plugin. It doesn't do anything especially useful, but can serve as + a starting point for developing your own decoder. + + + + test_decoding receives WAL through the logical decoding + mechanism and decodes it into text representations of the operations + performed. + + + + Typical output from this plugin, used over the SQL logical decoding + interface, might be: + + +postgres=# SELECT * FROM pg_logical_slot_get_changes('test_slot', 'now', 'include-xids', '0'); + location | xid | data +-----------+-----+-------------------------------------------------- + 0/16D30F8 | 691 | BEGIN + 0/16D32A0 | 691 | table public.data: INSERT: id[int4]:2 data[text]:'arg' + 0/16D32A0 | 691 | table public.data: INSERT: id[int4]:3 data[text]:'demo' + 0/16D32A0 | 691 | COMMIT + 0/16D32D8 | 692 | BEGIN + 0/16D3398 | 692 | table public.data: DELETE: id[int4]:2 + 0/16D3398 | 692 | table public.data: DELETE: id[int4]:3 + 0/16D3398 | 692 | COMMIT +(8 rows) + + + + diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 209d1bdf4d..cdddf492f4 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -468,6 +468,8 @@ pg_regress_installcheck = $(top_builddir)/src/test/regress/pg_regress --inputdir pg_regress_clean_files = results/ regression.diffs regression.out tmp_check/ log/ +pg_isolation_regress_check = $(top_builddir)/src/test/isolation/pg_isolation_regress --inputdir=$(srcdir) --temp-install=./tmp_check --top-builddir=$(top_builddir) $(pg_regress_locale_flags) +pg_isolation_regress_installcheck = $(top_builddir)/src/test/isolation/pg_isolation_regress --inputdir=$(srcdir) --top-builddir=$(top_builddir) $(pg_regress_locale_flags) ########################################################################## # diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index de4befa93f..71ec74015c 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -347,8 +347,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) /* * Prune and repair fragmentation for the whole page, if possible. */ - Assert(TransactionIdIsValid(RecentGlobalXmin)); - heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin); + heap_page_prune_opt(scan->rs_rd, buffer); /* * We must hold share lock on the buffer content while examining tuple @@ -1750,10 +1749,22 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, */ if (!skip) { + /* + * For the benefit of logical decoding, have t_self point at the + * element of the HOT chain we're currently investigating instead + * of the root tuple of the HOT chain. This is important because + * the *Satisfies routine for historical mvcc snapshots needs the + * correct tid to decide about the visibility in some cases. + */ + ItemPointerSet(&(heapTuple->t_self), BufferGetBlockNumber(buffer), offnum); + /* If it's visible per the snapshot, we must return it */ valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer); CheckForSerializableConflictOut(valid, relation, heapTuple, buffer, snapshot); + /* reset to original, non-redirected, tid */ + heapTuple->t_self = *tid; + if (valid) { ItemPointerSetOffsetNumber(tid, offnum); @@ -8207,6 +8218,9 @@ heap2_redo(XLogRecPtr lsn, XLogRecord *record) * decoding. */ break; + case XLOG_HEAP2_REWRITE: + heap_xlog_logical_rewrite(lsn, record); + break; default: elog(PANIC, "heap2_redo: unknown op code %u", info); } diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 27cbac8525..3c69e1bada 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -18,13 +18,14 @@ #include "access/heapam_xlog.h" #include "access/transam.h" #include "access/htup_details.h" +#include "catalog/catalog.h" #include "miscadmin.h" #include "pgstat.h" #include "storage/bufmgr.h" +#include "utils/snapmgr.h" #include "utils/rel.h" #include "utils/tqual.h" - /* Working data for heap_page_prune and subroutines */ typedef struct { @@ -70,10 +71,34 @@ static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum); * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum). */ void -heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin) +heap_page_prune_opt(Relation relation, Buffer buffer) { Page page = BufferGetPage(buffer); Size minfree; + TransactionId OldestXmin; + + /* + * We can't write WAL in recovery mode, so there's no point trying to + * clean the page. The master will likely issue a cleaning WAL record soon + * anyway, so this is no particular loss. + */ + if (RecoveryInProgress()) + return; + + /* + * Use the appropriate xmin horizon for this relation. If it's a proper + * catalog relation or a user defined, additional, catalog relation, we + * need to use the horizon that includes slots, otherwise the data-only + * horizon can be used. Note that the toast relation of user defined + * relations are *not* considered catalog relations. + */ + if (IsCatalogRelation(relation) || + RelationIsAccessibleInLogicalDecoding(relation)) + OldestXmin = RecentGlobalXmin; + else + OldestXmin = RecentGlobalDataXmin; + + Assert(TransactionIdIsValid(OldestXmin)); /* * Let's see if we really need pruning. @@ -84,14 +109,6 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin) if (!PageIsPrunable(page, OldestXmin)) return; - /* - * We can't write WAL in recovery mode, so there's no point trying to - * clean the page. The master will likely issue a cleaning WAL record soon - * anyway, so this is no particular loss. - */ - if (RecoveryInProgress()) - return; - /* * We prune when a previous UPDATE failed to find enough space on the page * for a new tuple version, or when free space falls below the relation's diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index c34ab9865f..239c7dad0c 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -102,17 +102,34 @@ */ #include "postgres.h" +#include +#include + +#include "miscadmin.h" + #include "access/heapam.h" #include "access/heapam_xlog.h" #include "access/rewriteheap.h" #include "access/transam.h" #include "access/tuptoaster.h" +#include "access/xact.h" + +#include "catalog/catalog.h" + +#include "lib/ilist.h" + +#include "replication/logical.h" +#include "replication/slot.h" + #include "storage/bufmgr.h" +#include "storage/fd.h" #include "storage/smgr.h" + #include "utils/memutils.h" #include "utils/rel.h" #include "utils/tqual.h" +#include "storage/procarray.h" /* * State associated with a rewrite operation. This is opaque to the user @@ -120,21 +137,28 @@ */ typedef struct RewriteStateData { + Relation rs_old_rel; /* source heap */ Relation rs_new_rel; /* destination heap */ Page rs_buffer; /* page currently being built */ BlockNumber rs_blockno; /* block where page will go */ bool rs_buffer_valid; /* T if any tuples in buffer */ bool rs_use_wal; /* must we WAL-log inserts? */ + bool rs_logical_rewrite; /* do we need to do logical rewriting */ TransactionId rs_oldest_xmin; /* oldest xmin used by caller to * determine tuple visibility */ TransactionId rs_freeze_xid;/* Xid that will be used as freeze cutoff * point */ + TransactionId rs_logical_xmin; /* Xid that will be used as cutoff + * point for logical rewrites */ MultiXactId rs_cutoff_multi;/* MultiXactId that will be used as cutoff * point for multixacts */ MemoryContext rs_cxt; /* for hash tables and entries and tuples in * them */ + XLogRecPtr rs_begin_lsn; /* XLogInsertLsn when starting the rewrite */ HTAB *rs_unresolved_tups; /* unmatched A tuples */ HTAB *rs_old_new_tid_map; /* unmatched B tuples */ + HTAB *rs_logical_mappings; /* logical remapping files */ + uint32 rs_num_rewrite_mappings; /* # in memory mappings */ } RewriteStateData; /* @@ -169,14 +193,45 @@ typedef struct typedef OldToNewMappingData *OldToNewMapping; +/* + * In-Memory data for a xid that might need logical remapping entries + * to be logged. + */ +typedef struct RewriteMappingFile +{ + TransactionId xid; /* xid that might need to see the row */ + int vfd; /* fd of mappings file */ + off_t off; /* how far have we written yet */ + uint32 num_mappings; /* number of in-memory mappings */ + dlist_head mappings; /* list of in-memory mappings */ + char path[MAXPGPATH]; /* path, for error messages */ +} RewriteMappingFile; + +/* + * A single In-Memeory logical rewrite mapping, hanging of + * RewriteMappingFile->mappings. + */ +typedef struct RewriteMappingDataEntry +{ + LogicalRewriteMappingData map; /* map between old and new location of + * the tuple */ + dlist_node node; +} RewriteMappingDataEntry; + /* prototypes for internal functions */ static void raw_heap_insert(RewriteState state, HeapTuple tup); +/* internal logical remapping prototypes */ +static void logical_begin_heap_rewrite(RewriteState state); +static void logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid, HeapTuple new_tuple); +static void logical_end_heap_rewrite(RewriteState state); + /* * Begin a rewrite of a table * + * old_heap old, locked heap relation tuples will be read from * new_heap new, locked heap relation to insert tuples to * oldest_xmin xid used by the caller to determine which tuples are dead * freeze_xid xid before which tuples will be frozen @@ -187,7 +242,7 @@ static void raw_heap_insert(RewriteState state, HeapTuple tup); * to be used in subsequent calls to the other functions. */ RewriteState -begin_heap_rewrite(Relation new_heap, TransactionId oldest_xmin, +begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId cutoff_multi, bool use_wal) { @@ -210,6 +265,7 @@ begin_heap_rewrite(Relation new_heap, TransactionId oldest_xmin, /* Create and fill in the state struct */ state = palloc0(sizeof(RewriteStateData)); + state->rs_old_rel = old_heap; state->rs_new_rel = new_heap; state->rs_buffer = (Page) palloc(BLCKSZ); /* new_heap needn't be empty, just locked */ @@ -244,6 +300,8 @@ begin_heap_rewrite(Relation new_heap, TransactionId oldest_xmin, MemoryContextSwitchTo(old_cxt); + logical_begin_heap_rewrite(state); + return state; } @@ -301,6 +359,8 @@ end_heap_rewrite(RewriteState state) if (RelationNeedsWAL(state->rs_new_rel)) heap_sync(state->rs_new_rel); + logical_end_heap_rewrite(state); + /* Deleting the context frees everything */ MemoryContextDelete(state->rs_cxt); } @@ -429,6 +489,8 @@ rewrite_heap_tuple(RewriteState state, raw_heap_insert(state, new_tuple); new_tid = new_tuple->t_self; + logical_rewrite_heap_tuple(state, old_tid, new_tuple); + /* * If the tuple is the updated version of a row, and the prior version * wouldn't be DEAD yet, then we need to either resolve the prior @@ -678,3 +740,545 @@ raw_heap_insert(RewriteState state, HeapTuple tup) if (heaptup != tup) heap_freetuple(heaptup); } + +/* ------------------------------------------------------------------------ + * Logical rewrite support + * + * When doing logical decoding - which relies on using cmin/cmax of catalog + * tuples, via xl_heap_new_cid records - heap rewrites have to log enough + * information to allow the decoding backend to updates its internal mapping + * of (relfilenode,ctid) => (cmin, cmax) to be correct for the rewritten heap. + * + * For that, every time we find a tuple that's been modified in a catalog + * relation within the xmin horizon of any decoding slot, we log a mapping + * from the old to the new location. + * + * To deal with rewrites that abort the filename of a mapping file contains + * the xid of the transaction performing the rewrite, which then can be + * checked before being read in. + * + * For efficiency we don't immediately spill every single map mapping for a + * row to disk but only do so in batches when we've collected several of them + * in memory or when end_heap_rewrite() has been called. + * + * Crash-Safety: This module diverts from the usual patterns of doing WAL + * since it cannot rely on checkpoint flushing out all buffers and thus + * waiting for exlusive locks on buffers. Usually the XLogInsert() covering + * buffer modifications is performed while the buffer(s) that are being + * modified are exlusively locked guaranteeing that both the WAL record and + * the modified heap are on either side of the checkpoint. But since the + * mapping files we log aren't in shared_buffers that interlock doesn't work. + * + * Instead we simply write the mapping files out to disk, *before* the + * XLogInsert() is performed. That guarantees that either the XLogInsert() is + * inserted after the checkpoint's redo pointer or that the checkpoint (via + * LogicalRewriteHeapCheckpoint()) has flushed the (partial) mapping file to + * disk. That leaves the tail end that has not yet been flushed open to + * corruption, which is solved by including the current offset in the + * xl_heap_rewrite_mapping records and truncating the mapping file to it + * during replay. Every time a rewrite is finished all generated mapping files + * are synced to disk. + * + * Note that if we were only concerned about crash safety we wouldn't have to + * deal with WAL logging at all - an fsync() at the end of a rewrite would be + * sufficient for crash safety. Any mapping that hasn't been safely flushed to + * disk has to be by an aborted (explicitly or via a crash) transaction and is + * ignored by virtue of the xid in it's name being subject to a + * TransactionDidCommit() check. But we want to support having standbys via + * physical replication, both for availability and to to do logical decoding + * there. + * ------------------------------------------------------------------------ + */ + +/* + * Do preparations for logging logical mappings during a rewrite if + * necessary. If we detect that we don't need to log anything we'll prevent + * any further action by the various logical rewrite functions. + */ +static void +logical_begin_heap_rewrite(RewriteState state) +{ + HASHCTL hash_ctl; + TransactionId logical_xmin; + + /* + * We only need to persist these mappings if the rewritten table can be + * accessed during logical decoding, if not, we can skip doing any + * additional work. + */ + state->rs_logical_rewrite = + RelationIsAccessibleInLogicalDecoding(state->rs_old_rel); + + if (!state->rs_logical_rewrite) + return; + + Assert(ReplicationSlotCtl != NULL); + + ProcArrayGetReplicationSlotXmin(NULL, &logical_xmin); + + /* + * If there are no logical slots in progress we don't need to do anything, + * there cannot be any remappings for relevant rows yet. The relation's + * lock protects us against races. + */ + if (logical_xmin == InvalidTransactionId) + { + state->rs_logical_rewrite = false; + return; + } + + state->rs_logical_xmin = logical_xmin; + state->rs_begin_lsn = GetXLogInsertRecPtr(); + state->rs_num_rewrite_mappings = 0; + + memset(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = sizeof(TransactionId); + hash_ctl.entrysize = sizeof(RewriteMappingFile); + hash_ctl.hcxt = state->rs_cxt; + hash_ctl.hash = tag_hash; + + state->rs_logical_mappings = + hash_create("Logical rewrite mapping", + 128, /* arbitrary initial size */ + &hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); +} + +/* + * Flush all logical in-memory mappings to disk, but don't fsync them yet. + */ +static void +logical_heap_rewrite_flush_mappings(RewriteState state) +{ + HASH_SEQ_STATUS seq_status; + RewriteMappingFile *src; + dlist_mutable_iter iter; + + Assert(state->rs_logical_rewrite); + + /* no logical rewrite in progress, no need to iterate over mappings */ + if (state->rs_num_rewrite_mappings == 0) + return; + + elog(DEBUG1, "flushing %u logical rewrite mapping entries", + state->rs_num_rewrite_mappings); + + hash_seq_init(&seq_status, state->rs_logical_mappings); + while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL) + { + XLogRecData rdata[2]; + char *waldata; + char *waldata_start; + xl_heap_rewrite_mapping xlrec; + Oid dboid; + uint32 len; + int written; + + /* this file hasn't got any new mappings */ + if (src->num_mappings == 0) + continue; + + if (state->rs_old_rel->rd_rel->relisshared) + dboid = InvalidOid; + else + dboid = MyDatabaseId; + + xlrec.num_mappings = src->num_mappings; + xlrec.mapped_rel = RelationGetRelid(state->rs_old_rel); + xlrec.mapped_xid = src->xid; + xlrec.mapped_db = dboid; + xlrec.offset = src->off; + xlrec.start_lsn = state->rs_begin_lsn; + + rdata[0].data = (char *) (&xlrec); + rdata[0].len = sizeof(xlrec); + rdata[0].buffer = InvalidBuffer; + rdata[0].next = &(rdata[1]); + + /* write all mappings consecutively */ + len = src->num_mappings * sizeof(LogicalRewriteMappingData); + waldata = palloc(len); + waldata_start = waldata; + + /* + * collect data we need to write out, but don't modify ondisk data yet + */ + dlist_foreach_modify(iter, &src->mappings) + { + RewriteMappingDataEntry *pmap; + + pmap = dlist_container(RewriteMappingDataEntry, node, iter.cur); + + memcpy(waldata, &pmap->map, sizeof(pmap->map)); + waldata += sizeof(pmap->map); + + /* remove from the list and free */ + dlist_delete(&pmap->node); + pfree(pmap); + + /* update bookkeeping */ + state->rs_num_rewrite_mappings--; + src->num_mappings--; + } + + /* + * Note that we deviate from the usual WAL coding practices here, + * check the above "Logical rewrite support" comment for reasoning. + */ + written = FileWrite(src->vfd, waldata_start, len); + if (written != len) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\", wrote %d of %d: %m", src->path, + written, len))); + src->off += len; + + Assert(src->num_mappings == 0); + + rdata[1].data = waldata_start; + rdata[1].len = len; + rdata[1].buffer = InvalidBuffer; + rdata[1].next = NULL; + + /* write xlog record */ + XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE, rdata); + + } + Assert(state->rs_num_rewrite_mappings == 0); +} + +/* + * Logical remapping part of end_heap_rewrite(). + */ +static void +logical_end_heap_rewrite(RewriteState state) +{ + HASH_SEQ_STATUS seq_status; + RewriteMappingFile *src; + + /* done, no logical rewrite in progress */ + if (!state->rs_logical_rewrite) + return; + + /* writeout remaining in-memory entries */ + if (state->rs_num_rewrite_mappings > 0 ) + logical_heap_rewrite_flush_mappings(state); + + /* Iterate over all mappings we have written and fsync the files. */ + hash_seq_init(&seq_status, state->rs_logical_mappings); + while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL) + { + if(FileSync(src->vfd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", src->path))); + FileClose(src->vfd); + } + /* memory context cleanup will deal with the rest */ +} + +/* + * Log a single (old->new) mapping for 'xid'. + */ +static void +logical_rewrite_log_mapping(RewriteState state, TransactionId xid, + LogicalRewriteMappingData *map) +{ + RewriteMappingFile *src; + RewriteMappingDataEntry *pmap; + Oid relid; + bool found; + + relid = RelationGetRelid(state->rs_old_rel); + + /* look for existing mappings for this 'mapped' xid */ + src = hash_search(state->rs_logical_mappings, &xid, + HASH_ENTER, &found); + + /* + * We haven't yet had the need to map anything for this xid, create + * per-xid data structures. + */ + if (!found) + { + char path[MAXPGPATH]; + Oid dboid; + + if (state->rs_old_rel->rd_rel->relisshared) + dboid = InvalidOid; + else + dboid = MyDatabaseId; + + snprintf(path, MAXPGPATH, + "pg_llog/mappings/" LOGICAL_REWRITE_FORMAT, + dboid, relid, + (uint32) (state->rs_begin_lsn >> 32), + (uint32) state->rs_begin_lsn, + xid, GetCurrentTransactionId()); + + dlist_init(&src->mappings); + src->num_mappings = 0; + src->off = 0; + memcpy(src->path, path, sizeof(path)); + src->vfd = PathNameOpenFile(path, + O_CREAT | O_EXCL | O_WRONLY | PG_BINARY, + S_IRUSR | S_IWUSR); + if (src->vfd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", path))); + } + + pmap = MemoryContextAlloc(state->rs_cxt, + sizeof(RewriteMappingDataEntry)); + memcpy(&pmap->map, map, sizeof(LogicalRewriteMappingData)); + dlist_push_tail(&src->mappings, &pmap->node); + src->num_mappings++; + state->rs_num_rewrite_mappings++; + + /* + * Write out buffer every time we've too many in-memory entries across all + * mapping files. + */ + if (state->rs_num_rewrite_mappings >= 1000 /* arbitrary number */) + logical_heap_rewrite_flush_mappings(state); +} + +/* + * Perform logical remapping for a tuple that's mapped from old_tid to + * new_tuple->t_self by rewrite_heap_tuple() iff necessary for the tuple. + */ +static void +logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid, + HeapTuple new_tuple) +{ + ItemPointerData new_tid = new_tuple->t_self; + TransactionId cutoff = state->rs_logical_xmin; + TransactionId xmin; + TransactionId xmax; + bool do_log_xmin = false; + bool do_log_xmax = false; + LogicalRewriteMappingData map; + + /* no logical rewrite in progress, we don't need to log anything */ + if (!state->rs_logical_rewrite) + return; + + xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); + /* use *GetUpdateXid to correctly deal with multixacts */ + xmax = HeapTupleHeaderGetUpdateXid(new_tuple->t_data); + + /* + * Log the mapping iff the tuple has been created recently. + */ + if (TransactionIdIsNormal(xmin) && !TransactionIdPrecedes(xmin, cutoff)) + do_log_xmin = true; + + if (!TransactionIdIsNormal(xmax)) + { + /* + * no xmax is set, can't have any permanent ones, so this check is + * sufficient + */ + } + else if (HEAP_XMAX_IS_LOCKED_ONLY(new_tuple->t_data->t_infomask)) + { + /* only locked, we don't care */ + } + else if (!TransactionIdPrecedes(xmax, cutoff)) + { + /* tuple has been deleted recently, log */ + do_log_xmax = true; + } + + /* if neither needs to be logged, we're done */ + if (!do_log_xmin && !do_log_xmax) + return; + + /* fill out mapping information */ + map.old_node = state->rs_old_rel->rd_node; + map.old_tid = old_tid; + map.new_node = state->rs_new_rel->rd_node; + map.new_tid = new_tid; + + /* --- + * Now persist the mapping for the individual xids that are affected. We + * need to log for both xmin and xmax if they aren't the same transaction + * since the mapping files are per "affected" xid. + * We don't muster all that much effort detecting whether xmin and xmax + * are actually the same transaction, we just check whether the xid is the + * same disregarding subtransactions. Logging too much is relatively + * harmless and we could never do the check fully since subtransaction + * data is thrown away during restarts. + * --- + */ + if (do_log_xmin) + logical_rewrite_log_mapping(state, xmin, &map); + /* separately log mapping for xmax unless it'd be redundant */ + if (do_log_xmax && !TransactionIdEquals(xmin, xmax)) + logical_rewrite_log_mapping(state, xmax, &map); +} + +/* + * Replay XLOG_HEAP2_REWRITE records + */ +void +heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r) +{ + char path[MAXPGPATH]; + int fd; + xl_heap_rewrite_mapping *xlrec; + uint32 len; + char *data; + + xlrec = (xl_heap_rewrite_mapping *) XLogRecGetData(r); + + snprintf(path, MAXPGPATH, + "pg_llog/mappings/" LOGICAL_REWRITE_FORMAT, + xlrec->mapped_db, xlrec->mapped_rel, + (uint32) (xlrec->start_lsn >> 32), + (uint32) xlrec->start_lsn, + xlrec->mapped_xid, r->xl_xid); + + fd = OpenTransientFile(path, + O_CREAT | O_WRONLY | PG_BINARY, + S_IRUSR | S_IWUSR); + if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", path))); + /* + * Truncate all data that's not guaranteed to have been safely fsynced (by + * previous record or by the last checkpoint). + */ + if (ftruncate(fd, xlrec->offset) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not truncate file \"%s\" to %u: %m", + path, (uint32) xlrec->offset))); + + /* now seek to the position we want to write our data to */ + if (lseek(fd, xlrec->offset, SEEK_SET) != xlrec->offset) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not seek to the end of file \"%s\": %m", + path))); + + data = XLogRecGetData(r) + sizeof(*xlrec); + + len = xlrec->num_mappings * sizeof(LogicalRewriteMappingData); + + /* write out tail end of mapping file (again) */ + if (write(fd, data, len) != len) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", path))); + /* + * Now fsync all previously written data. We could improve things and only + * do this for the last write to a file, but the required bookkeeping + * doesn't seem worth the trouble. + */ + if (pg_fsync(fd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", path))); + + CloseTransientFile(fd); +} + +/* --- + * Perform a checkpoint for logical rewrite mappings + * + * This serves two tasks: + * 1) Remove all mappings not needed anymore based on the logical restart LSN + * 2) Flush all remaining mappings to disk, so that replay after a checkpoint + * only has to deal with the parts of a mapping that have been written out + * after the checkpoint started. + * --- + */ +void +CheckPointLogicalRewriteHeap(void) +{ + XLogRecPtr cutoff; + XLogRecPtr redo; + DIR *mappings_dir; + struct dirent *mapping_de; + char path[MAXPGPATH]; + + /* + * We start of with a minimum of the last redo pointer. No new decoding + * slot will start before that, so that's a safe upper bound for removal. + */ + redo = GetRedoRecPtr(); + + /* now check for the restart ptrs from existing slots */ + cutoff = ReplicationSlotsComputeLogicalRestartLSN(); + + /* don't start earlier than the restart lsn */ + if (cutoff != InvalidXLogRecPtr && redo < cutoff) + cutoff = redo; + + mappings_dir = AllocateDir("pg_llog/mappings"); + while ((mapping_de = ReadDir(mappings_dir, "pg_llog/mappings")) != NULL) + { + struct stat statbuf; + Oid dboid; + Oid relid; + XLogRecPtr lsn; + TransactionId rewrite_xid; + TransactionId create_xid; + uint32 hi, lo; + + if (strcmp(mapping_de->d_name, ".") == 0 || + strcmp(mapping_de->d_name, "..") == 0) + continue; + + snprintf(path, MAXPGPATH, "pg_llog/mappings/%s", mapping_de->d_name); + if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode)) + continue; + + /* Skip over files that cannot be ours. */ + if (strncmp(mapping_de->d_name, "map-", 4) != 0) + continue; + + if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, + &dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6) + elog(ERROR,"could not parse filename \"%s\"", mapping_de->d_name); + + lsn = ((uint64) hi) << 32 | lo; + + if (lsn < cutoff || cutoff == InvalidXLogRecPtr) + { + elog(DEBUG1, "removing logical rewrite file \"%s\"", path); + if (unlink(path) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not unlink file \"%s\": %m", path))); + } + else + { + int fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0); + + /* + * The file cannot vanish due to concurrency since this function + * is the only one removing logical mappings and it's run while + * CheckpointLock is held exclusively. + */ + if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", path))); + /* + * We could try to avoid fsyncing files that either haven't + * changed or have only been created since the checkpoint's start, + * but it's currently not deemed worth the effort. + */ + else if (pg_fsync(fd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", path))); + CloseTransientFile(fd); + } + } + FreeDir(mappings_dir); +} diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 97c9f238a7..9a821d3e1c 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -44,32 +44,6 @@ #undef TOAST_DEBUG -/* - * Testing whether an externally-stored value is compressed now requires - * comparing extsize (the actual length of the external data) to rawsize - * (the original uncompressed datum's size). The latter includes VARHDRSZ - * overhead, the former doesn't. We never use compression unless it actually - * saves space, so we expect either equality or less-than. - */ -#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \ - ((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ) - -/* - * Macro to fetch the possibly-unaligned contents of an EXTERNAL datum - * into a local "struct varatt_external" toast pointer. This should be - * just a memcpy, but some versions of gcc seem to produce broken code - * that assumes the datum contents are aligned. Introducing an explicit - * intermediate "varattrib_1b_e *" variable seems to fix it. - */ -#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr) \ -do { \ - varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \ - Assert(VARATT_IS_EXTERNAL(attre)); \ - Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \ - memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \ -} while (0) - - static void toast_delete_datum(Relation rel, Datum value); static Datum toast_save_datum(Relation rel, Datum value, struct varlena * oldexternal, int options); diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 1aba2f04cc..a4b5f3d698 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -67,7 +67,10 @@ #include "access/relscan.h" #include "access/transam.h" +#include "access/xlog.h" + #include "catalog/index.h" +#include "catalog/catalog.h" #include "pgstat.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" @@ -520,8 +523,7 @@ index_fetch_heap(IndexScanDesc scan) * Prune page, but only if we weren't already on this page */ if (prev_buf != scan->xs_cbuf) - heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf, - RecentGlobalXmin); + heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf); } /* Obtain share-lock on the buffer so we can examine visibility */ diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index 89ba09a206..c8a61669dd 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -149,6 +149,10 @@ heap2_desc(StringInfo buf, uint8 xl_info, char *rec) xlrec->node.relNode, xlrec->block, xlrec->cutoff_xid, xlrec->ntuples); } + else if (info == XLOG_HEAP2_REWRITE) + { + appendStringInfoString(buf, "heap rewrite:"); + } else if (info == XLOG_HEAP2_CLEANUP_INFO) { xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec; diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 0487be17df..b20d9732e7 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -1074,8 +1074,16 @@ RecordTransactionCommit(void) /* * Do we need the long commit record? If not, use the compact format. + * + * For now always use the non-compact version if wal_level=logical, so + * we can hide commits from other databases. TODO: In the future we + * should merge compact and non-compact commits and use a flags + * variable to determine if it contains subxacts, relations or + * invalidation messages, that's more extensible and degrades more + * gracefully. Till then, it's just 20 bytes of overhead. */ - if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit) + if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit || + XLogLogicalInfoActive()) { XLogRecData rdata[4]; int lastrdata = 0; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index ad46eb0ceb..53a20b1e60 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -23,6 +23,7 @@ #include "access/clog.h" #include "access/multixact.h" +#include "access/rewriteheap.h" #include "access/subtrans.h" #include "access/timeline.h" #include "access/transam.h" @@ -39,7 +40,9 @@ #include "pgstat.h" #include "postmaster/bgwriter.h" #include "postmaster/startup.h" +#include "replication/logical.h" #include "replication/slot.h" +#include "replication/snapbuild.h" #include "replication/walreceiver.h" #include "replication/walsender.h" #include "storage/barrier.h" @@ -4015,6 +4018,27 @@ CheckXLogRemoved(XLogSegNo segno, TimeLineID tli) } } +/* + * Return the last WAL segment removed, or 0 if no segment has been removed + * since startup. + * + * NB: the result can be out of date arbitrarily fast, the caller has to deal + * with that. + */ +XLogSegNo +XLogGetLastRemovedSegno(void) +{ + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + XLogSegNo lastRemovedSegNo; + + SpinLockAcquire(&xlogctl->info_lck); + lastRemovedSegNo = xlogctl->lastRemovedSegNo; + SpinLockRelease(&xlogctl->info_lck); + + return lastRemovedSegNo; +} + /* * Update the last removed segno pointer in shared memory, to reflect * that the given XLOG file has been removed. @@ -6558,6 +6582,12 @@ StartupXLOG(void) */ StartupReplicationSlots(checkPoint.redo); + /* + * Startup logical state, needs to be setup now so we have proper data + * during crash recovery. + */ + StartupReorderBuffer(); + /* * Startup MultiXact. We need to do this early for two reasons: one * is that we might try to access multixacts when we do tuple freezing, @@ -8589,7 +8619,7 @@ CreateCheckPoint(int flags) * StartupSUBTRANS hasn't been called yet. */ if (!RecoveryInProgress()) - TruncateSUBTRANS(GetOldestXmin(true, false)); + TruncateSUBTRANS(GetOldestXmin(NULL, false)); /* Real work is done, but log and update stats before releasing lock. */ LogCheckpointEnd(false); @@ -8674,6 +8704,8 @@ CheckPointGuts(XLogRecPtr checkPointRedo, int flags) CheckPointPredicate(); CheckPointRelationMap(); CheckPointReplicationSlots(); + CheckPointSnapBuild(); + CheckPointLogicalRewriteHeap(); CheckPointBuffers(flags); /* performs all required fsyncs */ /* We deliberately delay 2PC checkpointing as long as possible */ CheckPointTwoPhase(checkPointRedo); @@ -8965,7 +8997,7 @@ CreateRestartPoint(int flags) * this because StartupSUBTRANS hasn't been called yet. */ if (EnableHotStandby) - TruncateSUBTRANS(GetOldestXmin(true, false)); + TruncateSUBTRANS(GetOldestXmin(NULL, false)); /* Real work is done, but log and update before releasing lock. */ LogCheckpointEnd(true); diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index cebca95ac8..877d7678f7 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -2156,7 +2156,7 @@ IndexBuildHeapScan(Relation heapRelation, { snapshot = SnapshotAny; /* okay to ignore lazy VACUUMs here */ - OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true); + OldestXmin = GetOldestXmin(heapRelation, true); } scan = heap_beginscan_strat(heapRelation, /* relation */ diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 04dfbb0ee5..0500a73e1b 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -619,11 +619,13 @@ CREATE VIEW pg_stat_replication AS CREATE VIEW pg_replication_slots AS SELECT L.slot_name, + L.plugin, L.slot_type, L.datoid, D.datname AS database, L.active, L.xmin, + L.catalog_xmin, L.restart_lsn FROM pg_get_replication_slots() AS L LEFT JOIN pg_database D ON (L.datoid = D.oid); @@ -822,3 +824,35 @@ CREATE OR REPLACE FUNCTION CREATE OR REPLACE FUNCTION json_populate_recordset(base anyelement, from_json json, use_json_as_text boolean DEFAULT false) RETURNS SETOF anyelement LANGUAGE internal STABLE ROWS 100 AS 'json_populate_recordset'; + +CREATE OR REPLACE FUNCTION pg_logical_slot_get_changes( + IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}', + OUT location pg_lsn, OUT xid xid, OUT data text) +RETURNS SETOF RECORD +LANGUAGE INTERNAL +VOLATILE ROWS 1000 COST 1000 +AS 'pg_logical_slot_get_changes'; + +CREATE OR REPLACE FUNCTION pg_logical_slot_peek_changes( + IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}', + OUT location pg_lsn, OUT xid xid, OUT data text) +RETURNS SETOF RECORD +LANGUAGE INTERNAL +VOLATILE ROWS 1000 COST 1000 +AS 'pg_logical_slot_peek_changes'; + +CREATE OR REPLACE FUNCTION pg_logical_slot_get_binary_changes( + IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}', + OUT location pg_lsn, OUT xid xid, OUT data bytea) +RETURNS SETOF RECORD +LANGUAGE INTERNAL +VOLATILE ROWS 1000 COST 1000 +AS 'pg_logical_slot_get_binary_changes'; + +CREATE OR REPLACE FUNCTION pg_logical_slot_peek_binary_changes( + IN slotname name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}', + OUT location pg_lsn, OUT xid xid, OUT data bytea) +RETURNS SETOF RECORD +LANGUAGE INTERNAL +VOLATILE ROWS 1000 COST 1000 +AS 'pg_logical_slot_peek_binary_changes'; diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index e7fcb55868..a04adeaac7 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -22,6 +22,7 @@ #include "access/tuptoaster.h" #include "access/visibilitymap.h" #include "access/xact.h" +#include "catalog/catalog.h" #include "catalog/index.h" #include "catalog/indexing.h" #include "catalog/pg_collation.h" @@ -1081,7 +1082,7 @@ acquire_sample_rows(Relation onerel, int elevel, totalblocks = RelationGetNumberOfBlocks(onerel); /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */ - OldestXmin = GetOldestXmin(onerel->rd_rel->relisshared, true); + OldestXmin = GetOldestXmin(onerel, true); /* Prepare for sampling block numbers */ BlockSampler_Init(&bs, totalblocks, targrows); diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 8b18e4acb7..b6b40e724e 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -850,7 +850,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, * Since we're going to rewrite the whole table anyway, there's no reason * not to be aggressive about this. */ - vacuum_set_xid_limits(0, 0, 0, 0, OldHeap->rd_rel->relisshared, + vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff, NULL); @@ -869,7 +869,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, is_system_catalog = IsSystemRelation(OldHeap); /* Initialize the rewrite operation */ - rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid, + rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid, MultiXactCutoff, use_wal); /* diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 5d540aa3a0..4996a2e7cd 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -45,6 +45,7 @@ #include "miscadmin.h" #include "pgstat.h" #include "postmaster/bgwriter.h" +#include "replication/slot.h" #include "storage/copydir.h" #include "storage/fd.h" #include "storage/lmgr.h" @@ -750,6 +751,7 @@ dropdb(const char *dbname, bool missing_ok) HeapTuple tup; int notherbackends; int npreparedxacts; + int nslots, nslots_active; /* * Look up the target database's OID, and get exclusive lock on it. We @@ -806,6 +808,19 @@ dropdb(const char *dbname, bool missing_ok) (errcode(ERRCODE_OBJECT_IN_USE), errmsg("cannot drop the currently open database"))); + /* + * Check whether there are, possibly unconnected, logical slots that refer + * to the to-be-dropped database. The database lock we are holding + * prevents the creation of new slots using the database. + */ + if (ReplicationSlotsCountDBSlots(db_id, &nslots, &nslots_active)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("database \"%s\" is used by a logical decoding slot", + dbname), + errdetail("There are %d slot(s), %d of them active", + nslots, nslots_active))); + /* * Check for other backends in the target database. (Because we hold the * database lock, no new ones can start after this.) diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 5ae7763534..ded1841dc6 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -398,11 +398,11 @@ get_rel_oids(Oid relid, const RangeVar *vacrel) * not interested. */ void -vacuum_set_xid_limits(int freeze_min_age, +vacuum_set_xid_limits(Relation rel, + int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, - bool sharedRel, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, @@ -425,7 +425,7 @@ vacuum_set_xid_limits(int freeze_min_age, * working on a particular table at any time, and that each vacuum is * always an independent transaction. */ - *oldestXmin = GetOldestXmin(sharedRel, true); + *oldestXmin = GetOldestXmin(rel, true); Assert(TransactionIdIsNormal(*oldestXmin)); @@ -795,7 +795,7 @@ vac_update_datfrozenxid(void) * committed pg_class entries for new tables; see AddNewRelationTuple(). * So we cannot produce a wrong minimum by starting with this. */ - newFrozenXid = GetOldestXmin(true, true); + newFrozenXid = GetOldestXmin(NULL, true); /* * Similarly, initialize the MultiXact "min" with the value that would be diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index d77892ee7f..d5db917d97 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -44,6 +44,7 @@ #include "access/multixact.h" #include "access/transam.h" #include "access/visibilitymap.h" +#include "catalog/catalog.h" #include "catalog/storage.h" #include "commands/dbcommands.h" #include "commands/vacuum.h" @@ -204,10 +205,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, vac_strategy = bstrategy; - vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age, + vacuum_set_xid_limits(onerel, + vacstmt->freeze_min_age, vacstmt->freeze_table_age, vacstmt->multixact_freeze_min_age, vacstmt->multixact_freeze_table_age, - onerel->rd_rel->relisshared, &OldestXmin, &FreezeLimit, &xidFullScanLimit, &MultiXactCutoff, &mxactFullScanLimit); diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 1a8d4e5143..7d8a3f2c24 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -336,8 +336,7 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres) /* * Prune and repair fragmentation for the whole page, if possible. */ - Assert(TransactionIdIsValid(RecentGlobalXmin)); - heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin); + heap_page_prune_opt(scan->rs_rd, buffer); /* * We must hold share lock on the buffer content while examining tuple diff --git a/src/backend/replication/Makefile b/src/backend/replication/Makefile index 7941cb8d5e..6f17b08a6a 100644 --- a/src/backend/replication/Makefile +++ b/src/backend/replication/Makefile @@ -17,6 +17,8 @@ override CPPFLAGS := -I$(srcdir) $(CPPFLAGS) OBJS = walsender.o walreceiverfuncs.o walreceiver.o basebackup.o \ repl_gram.o slot.o slotfuncs.o syncrep.o +SUBDIRS = logical + include $(top_srcdir)/src/backend/common.mk # repl_scanner is compiled as part of repl_gram diff --git a/src/backend/replication/logical/Makefile b/src/backend/replication/logical/Makefile new file mode 100644 index 0000000000..310a45c5c0 --- /dev/null +++ b/src/backend/replication/logical/Makefile @@ -0,0 +1,19 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for src/backend/replication/logical +# +# IDENTIFICATION +# src/backend/replication/logical/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/replication/logical +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +override CPPFLAGS := -I$(srcdir) $(CPPFLAGS) + +OBJS = decode.o logical.o logicalfuncs.o reorderbuffer.o snapbuild.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c new file mode 100644 index 0000000000..e8949aab32 --- /dev/null +++ b/src/backend/replication/logical/decode.c @@ -0,0 +1,826 @@ +/* ------------------------------------------------------------------------- + * + * decode.c + * This module decodes WAL records read using xlogreader.h's APIs for the + * purpose of logical decoding by passing information to the + * reorderbuffer module (containing the actual changes) and to the + * snapbuild module to build a fitting catalog snapshot (to be able to + * properly decode the changes in the reorderbuffer). + * + * NOTE: + * This basically tries to handle all low level xlog stuff for + * reorderbuffer.c and snapbuild.c. There's some minor leakage where a + * specific record's struct is used to pass data along, but those just + * happen to contain the right amount of data in a convenient + * format. There isn't and shouldn't be much intelligence about the + * contents of records in here except turning them into a more usable + * format. + * + * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/replication/logical/decode.c + * + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/heapam.h" +#include "access/heapam_xlog.h" +#include "access/transam.h" +#include "access/xact.h" +#include "access/xlog_internal.h" +#include "access/xlogreader.h" + +#include "catalog/pg_control.h" + +#include "replication/decode.h" +#include "replication/logical.h" +#include "replication/reorderbuffer.h" +#include "replication/snapbuild.h" + +#include "storage/standby.h" + +typedef struct XLogRecordBuffer +{ + XLogRecPtr origptr; + XLogRecPtr endptr; + XLogRecord record; + char *record_data; +} XLogRecordBuffer; + +/* RMGR Handlers */ +static void DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); +static void DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); +static void DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); +static void DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); +static void DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); + +/* individual record(group)'s handlers */ +static void DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); +static void DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); +static void DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); +static void DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf); +static void DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, + TransactionId xid, Oid dboid, + TimestampTz commit_time, + int nsubxacts, TransactionId *sub_xids, + int ninval_msgs, SharedInvalidationMessage *msg); +static void DecodeAbort(LogicalDecodingContext *ctx, XLogRecPtr lsn, + TransactionId xid, TransactionId *sub_xids, int nsubxacts); + +/* common function to decode tuples */ +static void DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tup); + +/* + * Take every XLogReadRecord()ed record and perform the actions required to + * decode it using the output plugin already setup in the logical decoding + * context. + */ +void +LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record) +{ + XLogRecordBuffer buf; + + buf.origptr = ctx->reader->ReadRecPtr; + buf.endptr = ctx->reader->EndRecPtr; + buf.record = *record; + buf.record_data = XLogRecGetData(record); + + /* cast so we get a warning when new rmgrs are added */ + switch ((RmgrIds) buf.record.xl_rmid) + { + /* + * Rmgrs we care about for logical decoding. Add new rmgrs in + * rmgrlist.h's order. + */ + case RM_XLOG_ID: + DecodeXLogOp(ctx, &buf); + break; + + case RM_XACT_ID: + DecodeXactOp(ctx, &buf); + break; + + case RM_STANDBY_ID: + DecodeStandbyOp(ctx, &buf); + break; + + case RM_HEAP2_ID: + DecodeHeap2Op(ctx, &buf); + break; + + case RM_HEAP_ID: + DecodeHeapOp(ctx, &buf); + break; + + /* + * Rmgrs irrelevant for logical decoding; they describe stuff not + * represented in logical decoding. Add new rmgrs in rmgrlist.h's + * order. + */ + case RM_SMGR_ID: + case RM_CLOG_ID: + case RM_DBASE_ID: + case RM_TBLSPC_ID: + case RM_MULTIXACT_ID: + case RM_RELMAP_ID: + case RM_BTREE_ID: + case RM_HASH_ID: + case RM_GIN_ID: + case RM_GIST_ID: + case RM_SEQ_ID: + case RM_SPGIST_ID: + break; + case RM_NEXT_ID: + elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) buf.record.xl_rmid); + } +} + +/* + * Handle rmgr XLOG_ID records for DecodeRecordIntoReorderBuffer(). + */ +static void +DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) +{ + SnapBuild *builder = ctx->snapshot_builder; + uint8 info = buf->record.xl_info & ~XLR_INFO_MASK; + + switch (info) + { + /* this is also used in END_OF_RECOVERY checkpoints */ + case XLOG_CHECKPOINT_SHUTDOWN: + case XLOG_END_OF_RECOVERY: + SnapBuildSerializationPoint(builder, buf->origptr); + + break; + case XLOG_CHECKPOINT_ONLINE: + /* + * a RUNNING_XACTS record will have been logged near to this, we + * can restart from there. + */ + break; + case XLOG_NOOP: + case XLOG_NEXTOID: + case XLOG_SWITCH: + case XLOG_BACKUP_END: + case XLOG_PARAMETER_CHANGE: + case XLOG_RESTORE_POINT: + case XLOG_FPW_CHANGE: + case XLOG_FPI: + break; + default: + elog(ERROR, "unexpected RM_XLOG_ID record type: %u", info); + } +} + +/* + * Handle rmgr XACT_ID records for DecodeRecordIntoReorderBuffer(). + */ +static void +DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) +{ + SnapBuild *builder = ctx->snapshot_builder; + ReorderBuffer *reorder = ctx->reorder; + XLogRecord *r = &buf->record; + uint8 info = r->xl_info & ~XLR_INFO_MASK; + + /* no point in doing anything yet, data could not be decoded anyway */ + if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT) + return; + + switch (info) + { + case XLOG_XACT_COMMIT: + { + xl_xact_commit *xlrec; + TransactionId *subxacts = NULL; + SharedInvalidationMessage *invals = NULL; + + xlrec = (xl_xact_commit *) buf->record_data; + + subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); + invals = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]); + + DecodeCommit(ctx, buf, r->xl_xid, xlrec->dbId, + xlrec->xact_time, + xlrec->nsubxacts, subxacts, + xlrec->nmsgs, invals); + + break; + } + case XLOG_XACT_COMMIT_PREPARED: + { + xl_xact_commit_prepared *prec; + xl_xact_commit *xlrec; + TransactionId *subxacts; + SharedInvalidationMessage *invals = NULL; + + /* Prepared commits contain a normal commit record... */ + prec = (xl_xact_commit_prepared *) buf->record_data; + xlrec = &prec->crec; + + subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); + invals = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]); + + DecodeCommit(ctx, buf, r->xl_xid, xlrec->dbId, + xlrec->xact_time, + xlrec->nsubxacts, subxacts, + xlrec->nmsgs, invals); + + break; + } + case XLOG_XACT_COMMIT_COMPACT: + { + xl_xact_commit_compact *xlrec; + + xlrec = (xl_xact_commit_compact *) buf->record_data; + + DecodeCommit(ctx, buf, r->xl_xid, InvalidOid, + xlrec->xact_time, + xlrec->nsubxacts, xlrec->subxacts, + 0, NULL); + break; + } + case XLOG_XACT_ABORT: + { + xl_xact_abort *xlrec; + TransactionId *sub_xids; + + xlrec = (xl_xact_abort *) buf->record_data; + + sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); + + DecodeAbort(ctx, buf->origptr, r->xl_xid, + sub_xids, xlrec->nsubxacts); + break; + } + case XLOG_XACT_ABORT_PREPARED: + { + xl_xact_abort_prepared *prec; + xl_xact_abort *xlrec; + TransactionId *sub_xids; + + /* prepared abort contain a normal commit abort... */ + prec = (xl_xact_abort_prepared *) buf->record_data; + xlrec = &prec->arec; + + sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]); + + /* r->xl_xid is committed in a separate record */ + DecodeAbort(ctx, buf->origptr, prec->xid, + sub_xids, xlrec->nsubxacts); + break; + } + + case XLOG_XACT_ASSIGNMENT: + { + xl_xact_assignment *xlrec; + int i; + TransactionId *sub_xid; + + xlrec = (xl_xact_assignment *) buf->record_data; + + sub_xid = &xlrec->xsub[0]; + + for (i = 0; i < xlrec->nsubxacts; i++) + { + ReorderBufferAssignChild(reorder, xlrec->xtop, + *(sub_xid++), buf->origptr); + } + break; + } + case XLOG_XACT_PREPARE: + /* + * Currently decoding ignores PREPARE TRANSACTION and will just + * decode the transaction when the COMMIT PREPARED is sent or + * throw away the transaction's contents when a ROLLBACK PREPARED + * is received. In the future we could add code to expose prepared + * transactions in the changestream allowing for a kind of + * distributed 2PC. + */ + break; + default: + elog(ERROR, "unexpected RM_XACT_ID record type: %u", info); + } +} + +/* + * Handle rmgr STANDBY_ID records for DecodeRecordIntoReorderBuffer(). + */ +static void +DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) +{ + SnapBuild *builder = ctx->snapshot_builder; + XLogRecord *r = &buf->record; + uint8 info = r->xl_info & ~XLR_INFO_MASK; + + switch (info) + { + case XLOG_RUNNING_XACTS: + { + xl_running_xacts *running = (xl_running_xacts *) buf->record_data; + SnapBuildProcessRunningXacts(builder, buf->origptr, running); + /* + * Abort all transactions that we keep track of, that are + * older than the record's oldestRunningXid. This is the most + * convenient spot for doing so since, in contrast to shutdown + * or end-of-recovery checkpoints, we have information about + * all running transactions which includes prepared ones, + * while shutdown checkpoints just know that no non-prepared + * transactions are in progress. + */ + ReorderBufferAbortOld(ctx->reorder, running->oldestRunningXid); + } + break; + case XLOG_STANDBY_LOCK: + break; + default: + elog(ERROR, "unexpected RM_STANDBY_ID record type: %u", info); + } +} + +/* + * Handle rmgr HEAP2_ID records for DecodeRecordIntoReorderBuffer(). + */ +static void +DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) +{ + uint8 info = buf->record.xl_info & XLOG_HEAP_OPMASK; + TransactionId xid = buf->record.xl_xid; + SnapBuild *builder = ctx->snapshot_builder; + + /* no point in doing anything yet */ + if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT) + return; + + switch (info) + { + case XLOG_HEAP2_MULTI_INSERT: + if (SnapBuildProcessChange(builder, xid, buf->origptr)) + DecodeMultiInsert(ctx, buf); + break; + case XLOG_HEAP2_NEW_CID: + { + xl_heap_new_cid *xlrec; + xlrec = (xl_heap_new_cid *) buf->record_data; + SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec); + + break; + } + case XLOG_HEAP2_REWRITE: + /* + * Although these records only exist to serve the needs of logical + * decoding, all the work happens as part of crash or archive + * recovery, so we don't need to do anything here. + */ + break; + /* + * Everything else here is just low level physical stuff we're + * not interested in. + */ + case XLOG_HEAP2_FREEZE_PAGE: + case XLOG_HEAP2_CLEAN: + case XLOG_HEAP2_CLEANUP_INFO: + case XLOG_HEAP2_VISIBLE: + case XLOG_HEAP2_LOCK_UPDATED: + break; + default: + elog(ERROR, "unexpected RM_HEAP2_ID record type: %u", info); + } +} + +/* + * Handle rmgr HEAP_ID records for DecodeRecordIntoReorderBuffer(). + */ +static void +DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) +{ + uint8 info = buf->record.xl_info & XLOG_HEAP_OPMASK; + TransactionId xid = buf->record.xl_xid; + SnapBuild *builder = ctx->snapshot_builder; + + /* no point in doing anything yet */ + if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT) + return; + + switch (info) + { + case XLOG_HEAP_INSERT: + if (SnapBuildProcessChange(builder, xid, buf->origptr)) + DecodeInsert(ctx, buf); + break; + + /* + * Treat HOT update as normal updates. There is no useful + * information in the fact that we could make it a HOT update + * locally and the WAL layout is compatible. + */ + case XLOG_HEAP_HOT_UPDATE: + case XLOG_HEAP_UPDATE: + if (SnapBuildProcessChange(builder, xid, buf->origptr)) + DecodeUpdate(ctx, buf); + break; + + case XLOG_HEAP_DELETE: + if (SnapBuildProcessChange(builder, xid, buf->origptr)) + DecodeDelete(ctx, buf); + break; + + case XLOG_HEAP_NEWPAGE: + /* + * This is only used in places like indexams and CLUSTER which + * don't contain changes relevant for logical replication. + */ + break; + + case XLOG_HEAP_INPLACE: + /* + * Inplace updates are only ever performed on catalog tuples and + * can, per definition, not change tuple visibility. Since we + * don't decode catalog tuples, we're not interested in the + * record's contents. + * + * In-place updates can be used either by XID-bearing transactions + * (e.g. in CREATE INDEX CONCURRENTLY) or by XID-less + * transactions (e.g. VACUUM). In the former case, the commit + * record will include cache invalidations, so we mark the + * transaction as catalog modifying here. Currently that's + * redundant because the commit will do that as well, but once we + * support decoding in-progress relations, this will be important. + */ + if (!TransactionIdIsValid(xid)) + break; + + SnapBuildProcessChange(builder, xid, buf->origptr); + ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr); + break; + + case XLOG_HEAP_LOCK: + /* we don't care about row level locks for now */ + break; + + default: + elog(ERROR, "unexpected RM_HEAP_ID record type: %u", info); + break; + } +} + +/* + * Consolidated commit record handling between the different form of commit + * records. + */ +static void +DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf, + TransactionId xid, Oid dboid, + TimestampTz commit_time, + int nsubxacts, TransactionId *sub_xids, + int ninval_msgs, SharedInvalidationMessage *msgs) +{ + int i; + + /* + * Process invalidation messages, even if we're not interested in the + * transaction's contents, since the various caches need to always be + * consistent. + */ + if (ninval_msgs > 0) + { + ReorderBufferAddInvalidations(ctx->reorder, xid, buf->origptr, + ninval_msgs, msgs); + ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr); + } + + SnapBuildCommitTxn(ctx->snapshot_builder, buf->origptr, xid, + nsubxacts, sub_xids); + + /* ---- + * Check whether we are interested in this specific transaction, and tell + * the the reorderbuffer to forget the content of the (sub-)transactions + * if not. + * + * There basically two reasons we might not be interested in this + * transaction: + * 1) We might not be interested in decoding transactions up to this + * LSN. This can happen because we previously decoded it and now just + * are restarting or if we haven't assembled a consistent snapshot yet. + * 2) The transaction happened in another database. + * + * We can't just use ReorderBufferAbort() here, because we need to execute + * the transaction's invalidations. This currently won't be needed if + * we're just skipping over the transaction because currently we only do + * so during startup, to get to the first transaction the client needs. As + * we have reset the catalog caches before starting to read WAL, and we + * haven't yet touched any catalogs, there can't be anything to invalidate. + * But if we're "forgetting" this commit because it's it happened in + * another database, the invalidations might be important, because they + * could be for shared catalogs and we might have loaded data into the + * relevant syscaches. + * --- + */ + if (SnapBuildXactNeedsSkip(ctx->snapshot_builder, buf->origptr) || + (dboid != InvalidOid && dboid != ctx->slot->data.database)) + { + for (i = 0; i < nsubxacts; i++) + { + ReorderBufferForget(ctx->reorder, *sub_xids, buf->origptr); + sub_xids++; + } + ReorderBufferForget(ctx->reorder, xid, buf->origptr); + + return; + } + + /* tell the reorderbuffer about the surviving subtransactions */ + for (i = 0; i < nsubxacts; i++) + { + ReorderBufferCommitChild(ctx->reorder, xid, *sub_xids, + buf->origptr, buf->endptr); + sub_xids++; + } + + /* replay actions of all transaction + subtransactions in order */ + ReorderBufferCommit(ctx->reorder, xid, buf->origptr, buf->endptr, + commit_time); +} + +/* + * Get the data from the various forms of abort records and pass it on to + * snapbuild.c and reorderbuffer.c + */ +static void +DecodeAbort(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, + TransactionId *sub_xids, int nsubxacts) +{ + int i; + + SnapBuildAbortTxn(ctx->snapshot_builder, lsn, xid, nsubxacts, sub_xids); + + for (i = 0; i < nsubxacts; i++) + { + ReorderBufferAbort(ctx->reorder, *sub_xids, lsn); + sub_xids++; + } + + ReorderBufferAbort(ctx->reorder, xid, lsn); +} + +/* + * Parse XLOG_HEAP_INSERT (not MULTI_INSERT!) records into tuplebufs. + * + * Deletes can contain the new tuple. + */ +static void +DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) +{ + XLogRecord *r = &buf->record; + xl_heap_insert *xlrec; + ReorderBufferChange *change; + + xlrec = (xl_heap_insert *) buf->record_data; + + /* only interested in our database */ + if (xlrec->target.node.dbNode != ctx->slot->data.database) + return; + + change = ReorderBufferGetChange(ctx->reorder); + change->action = REORDER_BUFFER_CHANGE_INSERT; + memcpy(&change->tp.relnode, &xlrec->target.node, sizeof(RelFileNode)); + + if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE) + { + Assert(r->xl_len > (SizeOfHeapInsert + SizeOfHeapHeader)); + + change->tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder); + + DecodeXLogTuple((char *) xlrec + SizeOfHeapInsert, + r->xl_len - SizeOfHeapInsert, + change->tp.newtuple); + } + + ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change); +} + +/* + * Parse XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE, which have the same layout + * in the record, from wal into proper tuplebufs. + * + * Updates can possibly contain a new tuple and the old primary key. + */ +static void +DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) +{ + XLogRecord *r = &buf->record; + xl_heap_update *xlrec; + xl_heap_header_len *xlhdr; + ReorderBufferChange *change; + char *data; + + xlrec = (xl_heap_update *) buf->record_data; + xlhdr = (xl_heap_header_len *) (buf->record_data + SizeOfHeapUpdate); + + /* only interested in our database */ + if (xlrec->target.node.dbNode != ctx->slot->data.database) + return; + + change = ReorderBufferGetChange(ctx->reorder); + change->action = REORDER_BUFFER_CHANGE_UPDATE; + memcpy(&change->tp.relnode, &xlrec->target.node, sizeof(RelFileNode)); + + data = (char *) &xlhdr->header; + + if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE) + { + Assert(r->xl_len > (SizeOfHeapUpdate + SizeOfHeapHeaderLen)); + + change->tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder); + + DecodeXLogTuple(data, + xlhdr->t_len + SizeOfHeapHeader, + change->tp.newtuple); + /* skip over the rest of the tuple header */ + data += SizeOfHeapHeader; + /* skip over the tuple data */ + data += xlhdr->t_len; + } + + if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD) + { + xlhdr = (xl_heap_header_len *) data; + change->tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder); + DecodeXLogTuple((char *) &xlhdr->header, + xlhdr->t_len + SizeOfHeapHeader, + change->tp.oldtuple); + data = (char *) &xlhdr->header; + data += SizeOfHeapHeader; + data += xlhdr->t_len; + } + + ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change); +} + +/* + * Parse XLOG_HEAP_DELETE from wal into proper tuplebufs. + * + * Deletes can possibly contain the old primary key. + */ +static void +DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) +{ + XLogRecord *r = &buf->record; + xl_heap_delete *xlrec; + ReorderBufferChange *change; + + xlrec = (xl_heap_delete *) buf->record_data; + + /* only interested in our database */ + if (xlrec->target.node.dbNode != ctx->slot->data.database) + return; + + change = ReorderBufferGetChange(ctx->reorder); + change->action = REORDER_BUFFER_CHANGE_DELETE; + + memcpy(&change->tp.relnode, &xlrec->target.node, sizeof(RelFileNode)); + + /* old primary key stored */ + if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD) + { + Assert(r->xl_len > (SizeOfHeapDelete + SizeOfHeapHeader)); + + change->tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder); + + DecodeXLogTuple((char *) xlrec + SizeOfHeapDelete, + r->xl_len - SizeOfHeapDelete, + change->tp.oldtuple); + } + ReorderBufferQueueChange(ctx->reorder, r->xl_xid, buf->origptr, change); +} + +/* + * Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs. + * + * Currently MULTI_INSERT will always contain the full tuples. + */ +static void +DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) +{ + XLogRecord *r = &buf->record; + xl_heap_multi_insert *xlrec; + int i; + char *data; + bool isinit = (r->xl_info & XLOG_HEAP_INIT_PAGE) != 0; + + xlrec = (xl_heap_multi_insert *) buf->record_data; + + /* only interested in our database */ + if (xlrec->node.dbNode != ctx->slot->data.database) + return; + + data = buf->record_data + SizeOfHeapMultiInsert; + + /* + * OffsetNumbers (which are not of interest to us) are stored when + * XLOG_HEAP_INIT_PAGE is not set -- skip over them. + */ + if (!isinit) + data += sizeof(OffsetNumber) * xlrec->ntuples; + + for (i = 0; i < xlrec->ntuples; i++) + { + ReorderBufferChange *change; + xl_multi_insert_tuple *xlhdr; + int datalen; + ReorderBufferTupleBuf *tuple; + + change = ReorderBufferGetChange(ctx->reorder); + change->action = REORDER_BUFFER_CHANGE_INSERT; + memcpy(&change->tp.relnode, &xlrec->node, sizeof(RelFileNode)); + + /* + * CONTAINS_NEW_TUPLE will always be set currently as multi_insert + * isn't used for catalogs, but better be future proof. + * + * We decode the tuple in pretty much the same way as DecodeXLogTuple, + * but since the layout is slightly different, we can't use it here. + */ + if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE) + { + change->tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder); + + tuple = change->tp.newtuple; + + /* not a disk based tuple */ + ItemPointerSetInvalid(&tuple->tuple.t_self); + + xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(data); + data = ((char *) xlhdr) + SizeOfMultiInsertTuple; + datalen = xlhdr->datalen; + + /* + * We can only figure this out after reassembling the + * transactions. + */ + tuple->tuple.t_tableOid = InvalidOid; + tuple->tuple.t_data = &tuple->header; + tuple->tuple.t_len = datalen + + offsetof(HeapTupleHeaderData, t_bits); + + memset(&tuple->header, 0, sizeof(HeapTupleHeaderData)); + + memcpy((char *) &tuple->header + + offsetof(HeapTupleHeaderData, t_bits), + (char *) data, + datalen); + data += datalen; + + tuple->header.t_infomask = xlhdr->t_infomask; + tuple->header.t_infomask2 = xlhdr->t_infomask2; + tuple->header.t_hoff = xlhdr->t_hoff; + } + + ReorderBufferQueueChange(ctx->reorder, r->xl_xid, + buf->origptr, change); + } +} + +/* + * Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete + * (but not by heap_multi_insert) into a tuplebuf. + * + * The size 'len' and the pointer 'data' in the record need to be + * computed outside as they are record specific. + */ +static void +DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple) +{ + xl_heap_header xlhdr; + int datalen = len - SizeOfHeapHeader; + + Assert(datalen >= 0); + Assert(datalen <= MaxHeapTupleSize); + + tuple->tuple.t_len = datalen + offsetof(HeapTupleHeaderData, t_bits); + + /* not a disk based tuple */ + ItemPointerSetInvalid(&tuple->tuple.t_self); + + /* we can only figure this out after reassembling the transactions */ + tuple->tuple.t_tableOid = InvalidOid; + tuple->tuple.t_data = &tuple->header; + + /* data is not stored aligned, copy to aligned storage */ + memcpy((char *) &xlhdr, + data, + SizeOfHeapHeader); + + memset(&tuple->header, 0, sizeof(HeapTupleHeaderData)); + + memcpy((char *) &tuple->header + offsetof(HeapTupleHeaderData, t_bits), + data + SizeOfHeapHeader, + datalen); + + tuple->header.t_infomask = xlhdr.t_infomask; + tuple->header.t_infomask2 = xlhdr.t_infomask2; + tuple->header.t_hoff = xlhdr.t_hoff; +} diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c new file mode 100644 index 0000000000..4fb0974f29 --- /dev/null +++ b/src/backend/replication/logical/logical.c @@ -0,0 +1,920 @@ +/*------------------------------------------------------------------------- + * logical.c + * PostgreSQL logical decoding coordination + * + * Copyright (c) 2012-2014, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/replication/logical/logical.c + * + * NOTES + * This file coordinates interaction between the various modules that + * together providethe logical decoding, primarily by providing so + * called LogicalDecodingContexts. The goal is to encapsulate most of the + * internal complexity for consumers of logical decoding, so they can + * create and consume a changestream with a low amount of code. + * + * The idea is that a consumer provides three callbacks, one to read WAL, + * one to prepare a data write, and a final one for actually writing since + * their implementation depends on the type of consumer. Check + * logicalfunc.c for an example implementations of a fairly simple consumer + * and a implementation of a WAL reading callback that's suitable for + * simpler consumers. + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include +#include + +#include "miscadmin.h" + +#include "access/xact.h" + +#include "replication/decode.h" +#include "replication/logical.h" +#include "replication/reorderbuffer.h" +#include "replication/snapbuild.h" + +#include "storage/proc.h" +#include "storage/procarray.h" + +#include "utils/memutils.h" + +/* data for errcontext callback */ +typedef struct LogicalErrorCallbackState +{ + LogicalDecodingContext *ctx; + const char *callback_name; + XLogRecPtr report_location; +} LogicalErrorCallbackState; + +/* wrappers around output plugin callbacks */ +static void output_plugin_error_callback(void *arg); +static void startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt, + bool is_init); +static void shutdown_cb_wrapper(LogicalDecodingContext *ctx); +static void begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn); +static void commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, + XLogRecPtr commit_lsn); +static void change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change); + +static void LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin); + +/* + * Make sure the current settings & environment are capable of doing logical + * decoding. + */ +void +CheckLogicalDecodingRequirements(void) +{ + CheckSlotRequirements(); + + if (wal_level < WAL_LEVEL_LOGICAL) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical decoding requires wal_level >= logical"))); + + if (MyDatabaseId == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical decoding requires a database connection"))); + + /* ---- + * TODO: We got to change that someday soon... + * + * There's basically three things missing to allow this: + * 1) We need to be able to correctly and quickly identify the timeline a + * LSN belongs to + * 2) We need to force hot_standby_feedback to be enabled at all times so + * the primary cannot remove rows we need. + * 3) support dropping replication slots referring to a database, in + * dbase_redo. There can't be any active ones due to HS recovery + * conflicts, so that should be relatively easy. + * ---- + */ + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("logical decoding cannot be used while in recovery"))); +} + +/* + * Helper function for CreateInitialDecodingContext() and + * CreateDecodingContext() performing common tasks. + */ +static LogicalDecodingContext * +StartupDecodingContext(List *output_plugin_options, + XLogRecPtr start_lsn, + TransactionId xmin_horizon, + XLogPageReadCB read_page, + LogicalOutputPluginWriterPrepareWrite prepare_write, + LogicalOutputPluginWriterWrite do_write) +{ + ReplicationSlot *slot; + MemoryContext context, old_context; + LogicalDecodingContext *ctx; + + /* shorter lines... */ + slot = MyReplicationSlot; + + context = AllocSetContextCreate(CurrentMemoryContext, + "Changeset Extraction Context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + old_context = MemoryContextSwitchTo(context); + ctx = palloc0(sizeof(LogicalDecodingContext)); + + ctx->context = context; + + /* (re-)load output plugins, so we detect a bad (removed) output plugin now. */ + LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin)); + + /* + * Now that the slot's xmin has been set, we can announce ourselves as a + * logical decoding backend which doesn't need to be checked individually + * when computing the xmin horizon because the xmin is enforced via + * replication slots. + */ + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + MyPgXact->vacuumFlags |= PROC_IN_LOGICAL_DECODING; + LWLockRelease(ProcArrayLock); + + ctx->slot = slot; + + ctx->reader = XLogReaderAllocate(read_page, ctx); + ctx->reader->private_data = ctx; + + ctx->reorder = ReorderBufferAllocate(); + ctx->snapshot_builder = + AllocateSnapshotBuilder(ctx->reorder, xmin_horizon, start_lsn); + + ctx->reorder->private_data = ctx; + + /* wrap output plugin callbacks, so we can add error context information */ + ctx->reorder->begin = begin_cb_wrapper; + ctx->reorder->apply_change = change_cb_wrapper; + ctx->reorder->commit = commit_cb_wrapper; + + ctx->out = makeStringInfo(); + ctx->prepare_write = prepare_write; + ctx->write = do_write; + + ctx->output_plugin_options = output_plugin_options; + + MemoryContextSwitchTo(old_context); + + return ctx; +} + +/* + * Create a new decoding context, for a new logical slot. + * + * plugin contains the name of the output plugin + * output_plugin_options contains options passed to the output plugin + * read_page, prepare_write, do_write are callbacks that have to be filled to + * perform the use-case dependent, actual, work. + * + * Needs to be called while in a memory context that's at least as long lived + * as the the decoding context because further memory contexts will be created + * inside it. + * + * Returns an initialized decoding context after calling the output plugin's + * startup function. + */ +LogicalDecodingContext * +CreateInitDecodingContext(char *plugin, + List *output_plugin_options, + XLogPageReadCB read_page, + LogicalOutputPluginWriterPrepareWrite prepare_write, + LogicalOutputPluginWriterWrite do_write) +{ + TransactionId xmin_horizon = InvalidTransactionId; + ReplicationSlot *slot; + LogicalDecodingContext *ctx; + MemoryContext old_context; + + /* shorter lines... */ + slot = MyReplicationSlot; + + /* first some sanity checks that are unlikely to be violated */ + if (slot == NULL) + elog(ERROR, "cannot perform logical decoding without a acquired slot"); + + if (plugin == NULL) + elog(ERROR, "cannot initialize logical decoding without a specified plugin"); + + /* Make sure the passed slot is suitable. These are user facing errors. */ + if (slot->data.database == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot use physical replication slot created for logical decoding"))); + + if (slot->data.database != MyDatabaseId) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("replication slot \"%s\" was not created in this database", + NameStr(slot->data.name)))); + + if (IsTransactionState() && + GetTopTransactionIdIfAny() != InvalidTransactionId) + ereport(ERROR, + (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), + errmsg("cannot create logical replication slot in transaction that has performed writes"))); + + /* register output plugin name with slot */ + SpinLockAcquire(&slot->mutex); + strncpy(NameStr(slot->data.plugin), plugin, + NAMEDATALEN); + NameStr(slot->data.plugin)[NAMEDATALEN - 1] = '\0'; + SpinLockRelease(&slot->mutex); + + /* + * The replication slot mechanism is used to prevent removal of required + * WAL. As there is no interlock between this and checkpoints required WAL + * could be removed before ReplicationSlotsComputeRequiredLSN() has been + * called to prevent that. In the very unlikely case that this happens + * we'll just retry. + */ + while (true) + { + XLogSegNo segno; + + /* + * Let's start with enough information if we can, so log a standby + * snapshot and start decoding at exactly that position. + */ + if (!RecoveryInProgress()) + { + XLogRecPtr flushptr; + + /* start at current insert position*/ + slot->data.restart_lsn = GetXLogInsertRecPtr(); + + /* make sure we have enough information to start */ + flushptr = LogStandbySnapshot(); + + /* and make sure it's fsynced to disk */ + XLogFlush(flushptr); + } + else + slot->data.restart_lsn = GetRedoRecPtr(); + + /* prevent WAL removal as fast as possible */ + ReplicationSlotsComputeRequiredLSN(); + + /* + * If all required WAL is still there, great, otherwise retry. The + * slot should prevent further removal of WAL, unless there's a + * concurrent ReplicationSlotsComputeRequiredLSN() after we've written + * the new restart_lsn above, so normally we should never need to loop + * more than twice. + */ + XLByteToSeg(slot->data.restart_lsn, segno); + if (XLogGetLastRemovedSegno() < segno) + break; + } + + + /* ---- + * This is a bit tricky: We need to determine a safe xmin horizon to start + * decoding from, to avoid starting from a running xacts record referring + * to xids whose rows have been vacuumed or pruned + * already. GetOldestSafeDecodingTransactionId() returns such a value, but + * without further interlock it's return value might immediately be out of + * date. + * + * So we have to acquire the ProcArrayLock to prevent computation of new + * xmin horizons by other backends, get the safe decoding xid, and inform + * the slot machinery about the new limit. Once that's done the + * ProcArrayLock can be be released as the slot machinery now is + * protecting against vacuum. + * ---- + */ + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + + slot->effective_catalog_xmin = GetOldestSafeDecodingTransactionId(); + slot->data.catalog_xmin = slot->effective_catalog_xmin; + + ReplicationSlotsComputeRequiredXmin(true); + + LWLockRelease(ProcArrayLock); + + /* + * tell the snapshot builder to only assemble snapshot once reaching + * the a running_xact's record with the respective xmin. + */ + xmin_horizon = slot->data.catalog_xmin; + + ReplicationSlotMarkDirty(); + ReplicationSlotSave(); + + ctx = StartupDecodingContext(NIL, InvalidXLogRecPtr, xmin_horizon, + read_page, prepare_write, do_write); + + /* call output plugin initialization callback */ + old_context = MemoryContextSwitchTo(ctx->context); + if (ctx->callbacks.startup_cb != NULL) + startup_cb_wrapper(ctx, &ctx->options, true); + MemoryContextSwitchTo(old_context); + + return ctx; +} + +/* + * Create a new decoding context, for a logical slot that has previously been + * used already. + * + * start_lsn contains the LSN of the last received data or InvalidXLogRecPtr + * output_plugin_options contains options passed to the output plugin + * read_page, prepare_write, do_write are callbacks that have to be filled to + * perform the use-case dependent, actual, work. + * + * Needs to be called while in a memory context that's at least as long lived + * as the the decoding context because further memory contexts will be created + * inside it. + * + * Returns an initialized decoding context after calling the output plugin's + * startup function. + */ +LogicalDecodingContext * +CreateDecodingContext(XLogRecPtr start_lsn, + List *output_plugin_options, + XLogPageReadCB read_page, + LogicalOutputPluginWriterPrepareWrite prepare_write, + LogicalOutputPluginWriterWrite do_write) +{ + LogicalDecodingContext *ctx; + ReplicationSlot *slot; + MemoryContext old_context; + + /* shorter lines... */ + slot = MyReplicationSlot; + + /* first some sanity checks that are unlikely to be violated */ + if (slot == NULL) + elog(ERROR, "cannot perform logical decoding without a acquired slot"); + + /* make sure the passed slot is suitable, these are user facing errors */ + if (slot->data.database == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + (errmsg("cannot use physical replication slot for logical decoding")))); + + if (slot->data.database != MyDatabaseId) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + (errmsg("replication slot \"%s\" was not created in this database", + NameStr(slot->data.name))))); + + if (start_lsn == InvalidXLogRecPtr) + { + /* continue from last position */ + start_lsn = slot->data.confirmed_flush; + } + else if (start_lsn < slot->data.confirmed_flush) + { + /* + * It might seem like we should error out in this case, but it's + * pretty common for a client to acknowledge a LSN it doesn't have to + * do anything for, and thus didn't store persistently, because the + * xlog records didn't result in anything relevant for logical + * decoding. Clients have to be able to do that to support + * synchronous replication. + */ + start_lsn = slot->data.confirmed_flush; + elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding", + (uint32)(start_lsn >> 32), (uint32)start_lsn, + (uint32)(slot->data.confirmed_flush >> 32), + (uint32)slot->data.confirmed_flush); + } + + ctx = StartupDecodingContext(output_plugin_options, + start_lsn, InvalidTransactionId, + read_page, prepare_write, do_write); + + /* call output plugin initialization callback */ + old_context = MemoryContextSwitchTo(ctx->context); + if (ctx->callbacks.startup_cb != NULL) + startup_cb_wrapper(ctx, &ctx->options, true); + MemoryContextSwitchTo(old_context); + + ereport(LOG, + (errmsg("starting logical decoding for slot %s", + NameStr(slot->data.name)), + errdetail("streaming transactions committing after %X/%X, reading WAL from %X/%X", + (uint32)(slot->data.confirmed_flush >> 32), + (uint32)slot->data.confirmed_flush, + (uint32)(slot->data.restart_lsn >> 32), + (uint32)slot->data.restart_lsn))); + + return ctx; +} + +/* + * Returns true if an consistent initial decoding snapshot has been built. + */ +bool +DecodingContextReady(LogicalDecodingContext *ctx) +{ + return SnapBuildCurrentState(ctx->snapshot_builder) == SNAPBUILD_CONSISTENT; +} + +/* + * Read from the decoding slot, until it is ready to start extracting changes. + */ +void +DecodingContextFindStartpoint(LogicalDecodingContext *ctx) +{ + XLogRecPtr startptr; + + /* Initialize from where to start reading WAL. */ + startptr = ctx->slot->data.restart_lsn; + + elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X", + (uint32)(ctx->slot->data.restart_lsn >> 32), + (uint32)ctx->slot->data.restart_lsn); + + /* Wait for a consistent starting point */ + for (;;) + { + XLogRecord *record; + char *err = NULL; + + /* + * If the caller requires that interrupts be checked, the read_page + * callback should do so, as those will often wait. + */ + + /* the read_page callback waits for new WAL */ + record = XLogReadRecord(ctx->reader, startptr, &err); + if (err) + elog(ERROR, "%s", err); + + Assert(record); + + startptr = InvalidXLogRecPtr; + + LogicalDecodingProcessRecord(ctx, record); + + /* only continue till we found a consistent spot */ + if (DecodingContextReady(ctx)) + break; + } + + ctx->slot->data.confirmed_flush = ctx->reader->EndRecPtr; +} + +/* + * Free a previously allocated decoding context, invoking the shutdown + * callback if necessary. + */ +void +FreeDecodingContext(LogicalDecodingContext *ctx) +{ + if (ctx->callbacks.shutdown_cb != NULL) + shutdown_cb_wrapper(ctx); + + ReorderBufferFree(ctx->reorder); + FreeSnapshotBuilder(ctx->snapshot_builder); + XLogReaderFree(ctx->reader); + MemoryContextDelete(ctx->context); +} + +/* + * Prepare a write using the context's output routine. + */ +void +OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write) +{ + if (!ctx->accept_writes) + elog(ERROR, "writes are only accepted in commit, begin and change callbacks"); + + ctx->prepare_write(ctx, ctx->write_location, ctx->write_xid, last_write); + ctx->prepared_write = true; +} + +/* + * Perform a write using the context's output routine. + */ +void +OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write) +{ + if (!ctx->prepared_write) + elog(ERROR, "OutputPluginPrepareWrite needs to be called before OutputPluginWrite"); + + ctx->write(ctx, ctx->write_location, ctx->write_xid, last_write); + ctx->prepared_write = false; +} + +/* + * Load the output plugin, lookup its output plugin init function, and check + * that it provides the required callbacks. + */ +static void +LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin) +{ + LogicalOutputPluginInit plugin_init; + + plugin_init = (LogicalOutputPluginInit) + load_external_function(plugin, "_PG_output_plugin_init", false, NULL); + + if (plugin_init == NULL) + elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol"); + + /* ask the output plugin to fill the callback struct */ + plugin_init(callbacks); + + if (callbacks->begin_cb == NULL) + elog(ERROR, "output plugins have to register a begin callback"); + if (callbacks->change_cb == NULL) + elog(ERROR, "output plugins have to register a change callback"); + if (callbacks->commit_cb == NULL) + elog(ERROR, "output plugins have to register a commit callback"); +} + +static void +output_plugin_error_callback(void *arg) +{ + LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg; + /* not all callbacks have an associated LSN */ + if (state->report_location != InvalidXLogRecPtr) + errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X", + NameStr(state->ctx->slot->data.name), + NameStr(state->ctx->slot->data.plugin), + state->callback_name, + (uint32)(state->report_location >> 32), + (uint32)state->report_location); + else + errcontext("slot \"%s\", output plugin \"%s\", in the %s callback", + NameStr(state->ctx->slot->data.name), + NameStr(state->ctx->slot->data.plugin), + state->callback_name); +} + +static void +startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is_init) +{ + LogicalErrorCallbackState state; + ErrorContextCallback errcallback; + + /* Push callback + info on the error context stack */ + state.ctx = ctx; + state.callback_name = "startup"; + state.report_location = InvalidXLogRecPtr; + errcallback.callback = output_plugin_error_callback; + errcallback.arg = (void *) &state; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* set output state */ + ctx->accept_writes = false; + + /* do the actual work: call callback */ + ctx->callbacks.startup_cb(ctx, opt, is_init); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; +} + +static void +shutdown_cb_wrapper(LogicalDecodingContext *ctx) +{ + LogicalErrorCallbackState state; + ErrorContextCallback errcallback; + + /* Push callback + info on the error context stack */ + state.ctx = ctx; + state.callback_name = "shutdown"; + state.report_location = InvalidXLogRecPtr; + errcallback.callback = output_plugin_error_callback; + errcallback.arg = (void *) &state; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* set output state */ + ctx->accept_writes = false; + + /* do the actual work: call callback */ + ctx->callbacks.shutdown_cb(ctx); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; +} + + +/* + * Callbacks for ReorderBuffer which add in some more information and then call + * output_plugin.h plugins. + */ +static void +begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn) +{ + LogicalDecodingContext *ctx = cache->private_data; + LogicalErrorCallbackState state; + ErrorContextCallback errcallback; + + /* Push callback + info on the error context stack */ + state.ctx = ctx; + state.callback_name = "begin"; + state.report_location = txn->first_lsn; + errcallback.callback = output_plugin_error_callback; + errcallback.arg = (void *) &state; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* set output state */ + ctx->accept_writes = true; + ctx->write_xid = txn->xid; + ctx->write_location = txn->first_lsn; + + /* do the actual work: call callback */ + ctx->callbacks.begin_cb(ctx, txn); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; +} + +static void +commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, + XLogRecPtr commit_lsn) +{ + LogicalDecodingContext *ctx = cache->private_data; + LogicalErrorCallbackState state; + ErrorContextCallback errcallback; + + /* Push callback + info on the error context stack */ + state.ctx = ctx; + state.callback_name = "commit"; + state.report_location = txn->final_lsn; /* beginning of commit record */ + errcallback.callback = output_plugin_error_callback; + errcallback.arg = (void *) &state; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* set output state */ + ctx->accept_writes = true; + ctx->write_xid = txn->xid; + ctx->write_location = txn->end_lsn; /* points to the end of the record */ + + /* do the actual work: call callback */ + ctx->callbacks.commit_cb(ctx, txn, commit_lsn); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; +} + +static void +change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change) +{ + LogicalDecodingContext *ctx = cache->private_data; + LogicalErrorCallbackState state; + ErrorContextCallback errcallback; + + /* Push callback + info on the error context stack */ + state.ctx = ctx; + state.callback_name = "change"; + state.report_location = change->lsn; + errcallback.callback = output_plugin_error_callback; + errcallback.arg = (void *) &state; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* set output state */ + ctx->accept_writes = true; + ctx->write_xid = txn->xid; + /* + * report this change's lsn so replies from clients can give an up2date + * answer. This won't ever be enough (and shouldn't be!) to confirm + * receipt of this transaction, but it might allow another transaction's + * commit to be confirmed with one message. + */ + ctx->write_location = change->lsn; + + ctx->callbacks.change_cb(ctx, txn, relation, change); + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; +} + +/* + * Set the required catalog xmin horizon for historic snapshots in the current + * replication slot. + * + * Note that in the most cases, we won't be able to immediately use the xmin + * to increase the xmin horizon, we need to wait till the client has confirmed + * receiving current_lsn with LogicalConfirmReceivedLocation(). + */ +void +LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin) +{ + bool updated_xmin = false; + ReplicationSlot *slot; + + slot = MyReplicationSlot; + + Assert(slot != NULL); + + SpinLockAcquire(&slot->mutex); + + /* + * don't overwrite if we already have a newer xmin. This can + * happen if we restart decoding in a slot. + */ + if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin)) + { + } + /* + * If the client has already confirmed up to this lsn, we directly + * can mark this as accepted. This can happen if we restart + * decoding in a slot. + */ + else if (current_lsn <= slot->data.confirmed_flush) + { + slot->candidate_catalog_xmin = xmin; + slot->candidate_xmin_lsn = current_lsn; + + /* our candidate can directly be used */ + updated_xmin = true; + } + /* + * Only increase if the previous values have been applied, otherwise we + * might never end up updating if the receiver acks too slowly. + */ + else if (slot->candidate_xmin_lsn == InvalidXLogRecPtr) + { + slot->candidate_catalog_xmin = xmin; + slot->candidate_xmin_lsn = current_lsn; + } + SpinLockRelease(&slot->mutex); + + /* candidate already valid with the current flush position, apply */ + if (updated_xmin) + LogicalConfirmReceivedLocation(slot->data.confirmed_flush); +} + +/* + * Mark the minimal LSN (restart_lsn) we need to read to replay all + * transactions that have not yet committed at current_lsn. + * + * Just like IncreaseRestartDecodingForSlot this nly takes effect when the + * client has confirmed to have received current_lsn. + */ +void +LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn) +{ + bool updated_lsn = false; + ReplicationSlot *slot; + + slot = MyReplicationSlot; + + Assert(slot != NULL); + Assert(restart_lsn != InvalidXLogRecPtr); + Assert(current_lsn != InvalidXLogRecPtr); + + SpinLockAcquire(&slot->mutex); + + /* don't overwrite if have a newer restart lsn*/ + if (restart_lsn <= slot->data.restart_lsn) + { + } + /* + * We might have already flushed far enough to directly accept this lsn, in + * this case there is no need to check for existing candidate LSNs + */ + else if (current_lsn <= slot->data.confirmed_flush) + { + slot->candidate_restart_valid = current_lsn; + slot->candidate_restart_lsn = restart_lsn; + + /* our candidate can directly be used */ + updated_lsn = true; + } + /* + * Only increase if the previous values have been applied, otherwise we + * might never end up updating if the receiver acks too slowly. A missed + * value here will just cause some extra effort after reconnecting. + */ + if (slot->candidate_restart_valid == InvalidXLogRecPtr) + { + slot->candidate_restart_valid = current_lsn; + slot->candidate_restart_lsn = restart_lsn; + + elog(DEBUG1, "got new restart lsn %X/%X at %X/%X", + (uint32) (restart_lsn >> 32), (uint32) restart_lsn, + (uint32) (current_lsn >> 32), (uint32) current_lsn); + } + else + { + elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X", + (uint32) (restart_lsn >> 32), (uint32) restart_lsn, + (uint32) (current_lsn >> 32), (uint32) current_lsn, + (uint32) (slot->candidate_restart_lsn >> 32), + (uint32) slot->candidate_restart_lsn, + (uint32) (slot->candidate_restart_valid >> 32), + (uint32) slot->candidate_restart_valid, + (uint32) (slot->data.confirmed_flush >> 32), + (uint32) slot->data.confirmed_flush + ); + } + SpinLockRelease(&slot->mutex); + + /* candidates are already valid with the current flush position, apply */ + if (updated_lsn) + LogicalConfirmReceivedLocation(slot->data.confirmed_flush); +} + +/* + * Handle a consumer's conformation having received all changes up to lsn. + */ +void +LogicalConfirmReceivedLocation(XLogRecPtr lsn) +{ + Assert(lsn != InvalidXLogRecPtr); + + /* Do an unlocked check for candidate_lsn first. */ + if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr || + MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr) + { + bool updated_xmin = false; + bool updated_restart = false; + + /* use volatile pointer to prevent code rearrangement */ + volatile ReplicationSlot *slot = MyReplicationSlot; + + SpinLockAcquire(&slot->mutex); + + slot->data.confirmed_flush = lsn; + + /* if were past the location required for bumping xmin, do so */ + if (slot->candidate_xmin_lsn != InvalidXLogRecPtr && + slot->candidate_xmin_lsn <= lsn) + { + /* + * We have to write the changed xmin to disk *before* we change + * the in-memory value, otherwise after a crash we wouldn't know + * that some catalog tuples might have been removed already. + * + * Ensure that by first writing to ->xmin and only update + * ->effective_xmin once the new state is synced to disk. After a + * crash ->effective_xmin is set to ->xmin. + */ + if (TransactionIdIsValid(slot->candidate_catalog_xmin) && + slot->data.catalog_xmin != slot->candidate_catalog_xmin) + { + slot->data.catalog_xmin = slot->candidate_catalog_xmin; + slot->candidate_catalog_xmin = InvalidTransactionId; + slot->candidate_xmin_lsn = InvalidXLogRecPtr; + updated_xmin = true; + } + } + + if (slot->candidate_restart_valid != InvalidXLogRecPtr && + slot->candidate_restart_valid <= lsn) + { + Assert(slot->candidate_restart_lsn != InvalidXLogRecPtr); + + slot->data.restart_lsn = slot->candidate_restart_lsn; + slot->candidate_restart_lsn = InvalidXLogRecPtr; + slot->candidate_restart_valid = InvalidXLogRecPtr; + updated_restart = true; + } + + SpinLockRelease(&slot->mutex); + + /* first write new xmin to disk, so we know whats up after a crash */ + if (updated_xmin || updated_restart) + { + ReplicationSlotMarkDirty(); + ReplicationSlotSave(); + elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart); + } + /* + * Now the new xmin is safely on disk, we can let the global value + * advance. We do not take ProcArrayLock or similar since we only + * advance xmin here and there's not much harm done by a concurrent + * computation missing that. + */ + if (updated_xmin) + { + SpinLockAcquire(&slot->mutex); + slot->effective_catalog_xmin = slot->data.catalog_xmin; + SpinLockRelease(&slot->mutex); + + ReplicationSlotsComputeRequiredXmin(false); + ReplicationSlotsComputeRequiredLSN(); + } + } + else + { + volatile ReplicationSlot *slot = MyReplicationSlot; + + SpinLockAcquire(&slot->mutex); + slot->data.confirmed_flush = lsn; + SpinLockRelease(&slot->mutex); + } +} diff --git a/src/backend/replication/logical/logicalfuncs.c b/src/backend/replication/logical/logicalfuncs.c new file mode 100644 index 0000000000..3b8ae3853b --- /dev/null +++ b/src/backend/replication/logical/logicalfuncs.c @@ -0,0 +1,509 @@ +/*------------------------------------------------------------------------- + * + * logicalfuncs.c + * + * Support functions for using logical decoding and managemnt of + * logical replication slots via SQL. + * + * + * Copyright (c) 2012-2014, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/replication/logicalfuncs.c + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "fmgr.h" +#include "funcapi.h" +#include "miscadmin.h" + +#include "catalog/pg_type.h" + +#include "nodes/makefuncs.h" + +#include "mb/pg_wchar.h" + +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/inval.h" +#include "utils/memutils.h" +#include "utils/pg_lsn.h" +#include "utils/resowner.h" +#include "utils/lsyscache.h" + +#include "replication/decode.h" +#include "replication/logical.h" +#include "replication/logicalfuncs.h" + +#include "storage/fd.h" + +/* private date for writing out data */ +typedef struct DecodingOutputState { + Tuplestorestate *tupstore; + TupleDesc tupdesc; + bool binary_output; + int64 returned_rows; +} DecodingOutputState; + +/* + * Prepare for a output plugin write. + */ +static void +LogicalOutputPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, + bool last_write) +{ + resetStringInfo(ctx->out); +} + +/* + * Perform output plugin write into tuplestore. + */ +static void +LogicalOutputWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, + bool last_write) +{ + Datum values[3]; + bool nulls[3]; + DecodingOutputState *p; + + /* SQL Datums can only be of a limited length... */ + if (ctx->out->len > MaxAllocSize - VARHDRSZ) + elog(ERROR, "too much output for sql interface"); + + p = (DecodingOutputState *) ctx->output_writer_private; + + memset(nulls, 0, sizeof(nulls)); + values[0] = LSNGetDatum(lsn); + values[1] = TransactionIdGetDatum(xid); + + /* + * Assert ctx->out is in database encoding when we're writing textual + * output. + */ + if (!p->binary_output) + Assert(pg_verify_mbstr(GetDatabaseEncoding(), + ctx->out->data, ctx->out->len, + false)); + + /* ick, but cstring_to_text_with_len works for bytea perfectly fine */ + values[2] = PointerGetDatum( + cstring_to_text_with_len(ctx->out->data, ctx->out->len)); + + tuplestore_putvalues(p->tupstore, p->tupdesc, values, nulls); + p->returned_rows++; +} + +/* + * TODO: This is duplicate code with pg_xlogdump, similar to walsender.c, but + * we currently don't have the infrastructure (elog!) to share it. + */ +static void +XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) +{ + char *p; + XLogRecPtr recptr; + Size nbytes; + + static int sendFile = -1; + static XLogSegNo sendSegNo = 0; + static uint32 sendOff = 0; + + p = buf; + recptr = startptr; + nbytes = count; + + while (nbytes > 0) + { + uint32 startoff; + int segbytes; + int readbytes; + + startoff = recptr % XLogSegSize; + + if (sendFile < 0 || !XLByteInSeg(recptr, sendSegNo)) + { + char path[MAXPGPATH]; + + /* Switch to another logfile segment */ + if (sendFile >= 0) + close(sendFile); + + XLByteToSeg(recptr, sendSegNo); + + XLogFilePath(path, tli, sendSegNo); + + sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); + + if (sendFile < 0) + { + if (errno == ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("requested WAL segment %s has already been removed", + path))); + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + path))); + } + sendOff = 0; + } + + /* Need to seek in the file? */ + if (sendOff != startoff) + { + if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0) + { + char path[MAXPGPATH]; + + XLogFilePath(path, tli, sendSegNo); + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not seek in log segment %s to offset %u: %m", + path, startoff))); + } + sendOff = startoff; + } + + /* How many bytes are within this segment? */ + if (nbytes > (XLogSegSize - startoff)) + segbytes = XLogSegSize - startoff; + else + segbytes = nbytes; + + readbytes = read(sendFile, p, segbytes); + if (readbytes <= 0) + { + char path[MAXPGPATH]; + + XLogFilePath(path, tli, sendSegNo); + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read from log segment %s, offset %u, length %lu: %m", + path, sendOff, (unsigned long) segbytes))); + } + + /* Update state for read */ + recptr += readbytes; + + sendOff += readbytes; + nbytes -= readbytes; + p += readbytes; + } +} + +static void +check_permissions(void) +{ + if (!superuser() && !has_rolreplication(GetUserId())) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser or replication role to use replication slots")))); +} + +/* + * read_page callback for logical decoding contexts. + * + * Public because it would likely be very helpful for someone writing another + * output method outside walsender, e.g. in a bgworker. + * + * TODO: The walsender has it's own version of this, but it relies on the + * walsender's latch being set whenever WAL is flushed. No such infrastructure + * exists for normal backends, so we have to do a check/sleep/repeat style of + * loop for now. + */ +int +logical_read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, char *cur_page, TimeLineID *pageTLI) +{ + XLogRecPtr flushptr, + loc; + int count; + + loc = targetPagePtr + reqLen; + while (1) + { + /* + * TODO: we're going to have to do something more intelligent about + * timelines on standbys. Use readTimeLineHistory() and + * tliOfPointInHistory() to get the proper LSN? For now we'll catch + * that case earlier, but the code and TODO is left in here for when + * that changes. + */ + if (!RecoveryInProgress()) + { + *pageTLI = ThisTimeLineID; + flushptr = GetFlushRecPtr(); + } + else + flushptr = GetXLogReplayRecPtr(pageTLI); + + if (loc <= flushptr) + break; + + CHECK_FOR_INTERRUPTS(); + pg_usleep(1000L); + } + + /* more than one block available */ + if (targetPagePtr + XLOG_BLCKSZ <= flushptr) + count = XLOG_BLCKSZ; + /* not enough data there */ + else if (targetPagePtr + reqLen > flushptr) + return -1; + /* part of the page available */ + else + count = flushptr - targetPagePtr; + + XLogRead(cur_page, *pageTLI, targetPagePtr, XLOG_BLCKSZ); + + return count; +} + +/* + * Helper function for the various SQL callable logical decoding functions. + */ +static Datum +pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool binary) +{ + Name name = PG_GETARG_NAME(0); + XLogRecPtr upto_lsn; + int32 upto_nchanges; + + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + + XLogRecPtr end_of_wal; + XLogRecPtr startptr; + + LogicalDecodingContext *ctx; + + ResourceOwner old_resowner = CurrentResourceOwner; + ArrayType *arr; + Size ndim; + List *options = NIL; + DecodingOutputState *p; + + if (PG_ARGISNULL(1)) + upto_lsn = InvalidXLogRecPtr; + else + upto_lsn = PG_GETARG_LSN(1); + + if (PG_ARGISNULL(2)) + upto_nchanges = InvalidXLogRecPtr; + else + upto_nchanges = PG_GETARG_INT32(2); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* state to write output to */ + p = palloc0(sizeof(DecodingOutputState)); + + p->binary_output = binary; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &p->tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + check_permissions(); + + CheckLogicalDecodingRequirements(); + + arr = PG_GETARG_ARRAYTYPE_P(3); + ndim = ARR_NDIM(arr); + + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + if (ndim > 1) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("array must be one-dimensional"))); + } + else if (array_contains_nulls(arr)) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("array must not contain nulls"))); + } + else if (ndim == 1) + { + int nelems; + Datum *datum_opts; + int i; + + Assert(ARR_ELEMTYPE(arr) == TEXTOID); + + deconstruct_array(arr, TEXTOID, -1, false, 'i', + &datum_opts, NULL, &nelems); + + if (nelems % 2 != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("array must have even number of elements"))); + + for (i = 0; i < nelems; i += 2) + { + char *name = TextDatumGetCString(datum_opts[i]); + char *opt = TextDatumGetCString(datum_opts[i + 1]); + + options = lappend(options, makeDefElem(name, (Node *) makeString(opt))); + } + } + + p->tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = p->tupstore; + rsinfo->setDesc = p->tupdesc; + + /* compute the current end-of-wal */ + if (!RecoveryInProgress()) + end_of_wal = GetFlushRecPtr(); + else + end_of_wal = GetXLogReplayRecPtr(NULL); + + CheckLogicalDecodingRequirements(); + ReplicationSlotAcquire(NameStr(*name)); + + PG_TRY(); + { + ctx = CreateDecodingContext(InvalidXLogRecPtr, + options, + logical_read_local_xlog_page, + LogicalOutputPrepareWrite, + LogicalOutputWrite); + + MemoryContextSwitchTo(oldcontext); + + /* + * Check whether the output pluggin writes textual output if that's + * what we need. + */ + if (!binary && + ctx->options.output_type != OUTPUT_PLUGIN_TEXTUAL_OUTPUT) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("output plugin cannot produce text output"))); + + ctx->output_writer_private = p; + + startptr = MyReplicationSlot->data.restart_lsn; + + CurrentResourceOwner = ResourceOwnerCreate(CurrentResourceOwner, "logical decoding"); + + /* invalidate non-timetravel entries */ + InvalidateSystemCaches(); + + while ((startptr != InvalidXLogRecPtr && startptr < end_of_wal) || + (ctx->reader->EndRecPtr && ctx->reader->EndRecPtr < end_of_wal)) + { + XLogRecord *record; + char *errm = NULL; + + record = XLogReadRecord(ctx->reader, startptr, &errm); + if (errm) + elog(ERROR, "%s", errm); + + startptr = InvalidXLogRecPtr; + + /* + * The {begin_txn,change,commit_txn}_wrapper callbacks above will + * store the description into our tuplestore. + */ + if (record != NULL) + LogicalDecodingProcessRecord(ctx, record); + + /* check limits */ + if (upto_lsn != InvalidXLogRecPtr && + upto_lsn <= ctx->reader->EndRecPtr) + break; + if (upto_nchanges != 0 && + upto_nchanges <= p->returned_rows) + break; + } + } + PG_CATCH(); + { + /* clear all timetravel entries */ + InvalidateSystemCaches(); + + PG_RE_THROW(); + } + PG_END_TRY(); + + tuplestore_donestoring(tupstore); + + CurrentResourceOwner = old_resowner; + + /* + * Next time, start where we left off. (Hunting things, the family + * business..) + */ + if (ctx->reader->EndRecPtr != InvalidXLogRecPtr && confirm) + LogicalConfirmReceivedLocation(ctx->reader->EndRecPtr); + + /* free context, call shutdown callback */ + FreeDecodingContext(ctx); + + ReplicationSlotRelease(); + InvalidateSystemCaches(); + + return (Datum) 0; +} + +/* + * SQL function returning the changestream as text, consuming the data. + */ +Datum +pg_logical_slot_get_changes(PG_FUNCTION_ARGS) +{ + Datum ret = pg_logical_slot_get_changes_guts(fcinfo, true, false); + return ret; +} + +/* + * SQL function returning the changestream as text, only peeking ahead. + */ +Datum +pg_logical_slot_peek_changes(PG_FUNCTION_ARGS) +{ + Datum ret = pg_logical_slot_get_changes_guts(fcinfo, false, false); + return ret; +} + +/* + * SQL function returning the changestream in binary, consuming the data. + */ +Datum +pg_logical_slot_get_binary_changes(PG_FUNCTION_ARGS) +{ + Datum ret = pg_logical_slot_get_changes_guts(fcinfo, true, true); + return ret; +} + +/* + * SQL function returning the changestream in binary, only peeking ahead. + */ +Datum +pg_logical_slot_peek_binary_changes(PG_FUNCTION_ARGS) +{ + Datum ret = pg_logical_slot_get_changes_guts(fcinfo, false, true); + return ret; +} diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c new file mode 100644 index 0000000000..e7182338b8 --- /dev/null +++ b/src/backend/replication/logical/reorderbuffer.c @@ -0,0 +1,3059 @@ +/*------------------------------------------------------------------------- + * + * reorderbuffer.c + * PostgreSQL logical replay/reorder buffer management + * + * + * Copyright (c) 2012-2014, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/replication/reorderbuffer.c + * + * NOTES + * This module gets handed individual pieces of transactions in the order + * they are written to the WAL and is responsible to reassemble them into + * toplevel transaction sized pieces. When a transaction is completely + * reassembled - signalled by reading the transaction commit record - it + * will then call the output plugin (c.f. ReorderBufferCommit()) with the + * individual changes. The output plugins rely on snapshots built by + * snapbuild.c which hands them to us. + * + * Transactions and subtransactions/savepoints in postgres are not + * immediately linked to each other from outside the performing + * backend. Only at commit/abort (or special xact_assignment records) they + * are linked together. Which means that we will have to splice together a + * toplevel transaction from its subtransactions. To do that efficiently we + * build a binary heap indexed by the smallest current lsn of the individual + * subtransactions' changestreams. As the individual streams are inherently + * ordered by LSN - since that is where we build them from - the transaction + * can easily be reassembled by always using the subtransaction with the + * smallest current LSN from the heap. + * + * In order to cope with large transactions - which can be several times as + * big as the available memory - this module supports spooling the contents + * of a large transactions to disk. When the transaction is replayed the + * contents of individual (sub-)transactions will be read from disk in + * chunks. + * + * This module also has to deal with reassembling toast records from the + * individual chunks stored in WAL. When a new (or initial) version of a + * tuple is stored in WAL it will always be preceded by the toast chunks + * emitted for the columns stored out of line. Within a single toplevel + * transaction there will be no other data carrying records between a row's + * toast chunks and the row data itself. See ReorderBufferToast* for + * details. + * ------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include + +#include "miscadmin.h" + +#include "access/rewriteheap.h" +#include "access/transam.h" +#include "access/tuptoaster.h" +#include "access/xact.h" + +#include "catalog/catalog.h" + +#include "common/relpath.h" + +#include "lib/binaryheap.h" + +#include "replication/logical.h" +#include "replication/reorderbuffer.h" +#include "replication/slot.h" +#include "replication/snapbuild.h" /* just for SnapBuildSnapDecRefcount */ + +#include "storage/bufmgr.h" +#include "storage/fd.h" +#include "storage/sinval.h" + +#include "utils/builtins.h" +#include "utils/combocid.h" +#include "utils/memdebug.h" +#include "utils/memutils.h" +#include "utils/relcache.h" +#include "utils/relfilenodemap.h" +#include "utils/tqual.h" + +/* + * For efficiency and simplicity reasons we want to keep Snapshots, CommandIds + * and ComboCids in the same list with the user visible INSERT/UPDATE/DELETE + * changes. We don't want to leak those internal values to external users + * though (they would just use switch()...default:) because that would make it + * harder to add to new user visible values. + * + * This needs to be synchronized with ReorderBufferChangeType! Adjust the + * StaticAssertExpr's in ReorderBufferAllocate if you add anything! + */ +typedef enum +{ + REORDER_BUFFER_CHANGE_INTERNAL_INSERT, + REORDER_BUFFER_CHANGE_INTERNAL_UPDATE, + REORDER_BUFFER_CHANGE_INTERNAL_DELETE, + REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT, + REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID, + REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID +} ReorderBufferChangeTypeInternal; + +/* entry for a hash table we use to map from xid to our transaction state */ +typedef struct ReorderBufferTXNByIdEnt +{ + TransactionId xid; + ReorderBufferTXN *txn; +} ReorderBufferTXNByIdEnt; + +/* data structures for (relfilenode, ctid) => (cmin, cmax) mapping */ +typedef struct ReorderBufferTupleCidKey +{ + RelFileNode relnode; + ItemPointerData tid; +} ReorderBufferTupleCidKey; + +typedef struct ReorderBufferTupleCidEnt +{ + ReorderBufferTupleCidKey key; + CommandId cmin; + CommandId cmax; + CommandId combocid; /* just for debugging */ +} ReorderBufferTupleCidEnt; + +/* k-way in-order change iteration support structures */ +typedef struct ReorderBufferIterTXNEntry +{ + XLogRecPtr lsn; + ReorderBufferChange *change; + ReorderBufferTXN *txn; + int fd; + XLogSegNo segno; +} ReorderBufferIterTXNEntry; + +typedef struct ReorderBufferIterTXNState +{ + binaryheap *heap; + Size nr_txns; + dlist_head old_change; + ReorderBufferIterTXNEntry entries[FLEXIBLE_ARRAY_MEMBER]; +} ReorderBufferIterTXNState; + +/* toast datastructures */ +typedef struct ReorderBufferToastEnt +{ + Oid chunk_id; /* toast_table.chunk_id */ + int32 last_chunk_seq; /* toast_table.chunk_seq of the last chunk we + * have seen */ + Size num_chunks; /* number of chunks we've already seen */ + Size size; /* combined size of chunks seen */ + dlist_head chunks; /* linked list of chunks */ + struct varlena *reconstructed; /* reconstructed varlena now pointed + * to in main tup */ +} ReorderBufferToastEnt; + +/* Disk serialization support datastructures */ +typedef struct ReorderBufferDiskChange +{ + Size size; + ReorderBufferChange change; + /* data follows */ +} ReorderBufferDiskChange; + +/* + * Maximum number of changes kept in memory, per transaction. After that, + * changes are spooled to disk. + * + * The current value should be sufficient to decode the entire transaction + * without hitting disk in OLTP workloads, while starting to spool to disk in + * other workloads reasonably fast. + * + * At some point in the future it probaly makes sense to have a more elaborate + * resource management here, but it's not entirely clear what that would look + * like. + */ +static const Size max_changes_in_memory = 4096; + +/* + * We use a very simple form of a slab allocator for frequently allocated + * objects, simply keeping a fixed number in a linked list when unused, + * instead pfree()ing them. Without that in many workloads aset.c becomes a + * major bottleneck, especially when spilling to disk while decoding batch + * workloads. + */ +static const Size max_cached_changes = 4096 * 2; +static const Size max_cached_tuplebufs = 4096 * 2; /* ~8MB */ +static const Size max_cached_transactions = 512; + + +/* --------------------------------------- + * primary reorderbuffer support routines + * --------------------------------------- + */ +static ReorderBufferTXN *ReorderBufferGetTXN(ReorderBuffer *rb); +static void ReorderBufferReturnTXN(ReorderBuffer *rb, ReorderBufferTXN *txn); +static ReorderBufferTXN *ReorderBufferTXNByXid(ReorderBuffer *rb, + TransactionId xid, bool create, bool *is_new, + XLogRecPtr lsn, bool create_as_top); + +static void AssertTXNLsnOrder(ReorderBuffer *rb); + +/* --------------------------------------- + * support functions for lsn-order iterating over the ->changes of a + * transaction and its subtransactions + * + * used for iteration over the k-way heap merge of a transaction and its + * subtransactions + * --------------------------------------- + */ +static ReorderBufferIterTXNState *ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn); +static ReorderBufferChange * + ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state); +static void ReorderBufferIterTXNFinish(ReorderBuffer *rb, + ReorderBufferIterTXNState *state); +static void ReorderBufferExecuteInvalidations(ReorderBuffer *rb, ReorderBufferTXN *txn); + +/* + * --------------------------------------- + * Disk serialization support functions + * --------------------------------------- + */ +static void ReorderBufferCheckSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn); +static void ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn); +static void ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn, + int fd, ReorderBufferChange *change); +static Size ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn, + int *fd, XLogSegNo *segno); +static void ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn, + char *change); +static void ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn); + +static void ReorderBufferFreeSnap(ReorderBuffer *rb, Snapshot snap); +static Snapshot ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap, + ReorderBufferTXN *txn, CommandId cid); + +/* --------------------------------------- + * toast reassembly support + * --------------------------------------- + */ +static void ReorderBufferToastInitHash(ReorderBuffer *rb, ReorderBufferTXN *txn); +static void ReorderBufferToastReset(ReorderBuffer *rb, ReorderBufferTXN *txn); +static void ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change); +static void ReorderBufferToastAppendChunk(ReorderBuffer *rb, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change); + + +/* + * Allocate a new ReorderBuffer + */ +ReorderBuffer * +ReorderBufferAllocate(void) +{ + ReorderBuffer *buffer; + HASHCTL hash_ctl; + MemoryContext new_ctx; + + StaticAssertExpr((int) REORDER_BUFFER_CHANGE_INTERNAL_INSERT == (int) REORDER_BUFFER_CHANGE_INSERT, "out of sync enums"); + StaticAssertExpr((int) REORDER_BUFFER_CHANGE_INTERNAL_UPDATE == (int) REORDER_BUFFER_CHANGE_UPDATE, "out of sync enums"); + StaticAssertExpr((int) REORDER_BUFFER_CHANGE_INTERNAL_DELETE == (int) REORDER_BUFFER_CHANGE_DELETE, "out of sync enums"); + + /* allocate memory in own context, to have better accountability */ + new_ctx = AllocSetContextCreate(CurrentMemoryContext, + "ReorderBuffer", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + buffer = + (ReorderBuffer *) MemoryContextAlloc(new_ctx, sizeof(ReorderBuffer)); + + memset(&hash_ctl, 0, sizeof(hash_ctl)); + + buffer->context = new_ctx; + + hash_ctl.keysize = sizeof(TransactionId); + hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt); + hash_ctl.hash = tag_hash; + hash_ctl.hcxt = buffer->context; + + buffer->by_txn = hash_create("ReorderBufferByXid", 1000, &hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); + + buffer->by_txn_last_xid = InvalidTransactionId; + buffer->by_txn_last_txn = NULL; + + buffer->nr_cached_transactions = 0; + buffer->nr_cached_changes = 0; + buffer->nr_cached_tuplebufs = 0; + + buffer->outbuf = NULL; + buffer->outbufsize = 0; + + buffer->current_restart_decoding_lsn = InvalidXLogRecPtr; + + dlist_init(&buffer->toplevel_by_lsn); + dlist_init(&buffer->cached_transactions); + dlist_init(&buffer->cached_changes); + slist_init(&buffer->cached_tuplebufs); + + return buffer; +} + +/* + * Free a ReorderBuffer + */ +void +ReorderBufferFree(ReorderBuffer *rb) +{ + MemoryContext context = rb->context; + + /* + * We free separately allocated data by entirely scrapping reorderbuffer's + * memory context. + */ + MemoryContextDelete(context); +} + +/* + * Get a unused, possibly preallocated, ReorderBufferTXN. + */ +static ReorderBufferTXN * +ReorderBufferGetTXN(ReorderBuffer *rb) +{ + ReorderBufferTXN *txn; + + /* check the slab cache */ + if (rb->nr_cached_transactions > 0) + { + rb->nr_cached_transactions--; + txn = (ReorderBufferTXN *) + dlist_container(ReorderBufferTXN, node, + dlist_pop_head_node(&rb->cached_transactions)); + } + else + { + txn = (ReorderBufferTXN *) + MemoryContextAlloc(rb->context, sizeof(ReorderBufferTXN)); + } + + memset(txn, 0, sizeof(ReorderBufferTXN)); + + dlist_init(&txn->changes); + dlist_init(&txn->tuplecids); + dlist_init(&txn->subtxns); + + return txn; +} + +/* + * Free a ReorderBufferTXN. + * + * Deallocation might be delayed for efficiency purposes, for details check + * the comments above max_cached_changes's definition. + */ +void +ReorderBufferReturnTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) +{ + /* clean the lookup cache if we were cached (quite likely) */ + if (rb->by_txn_last_xid == txn->xid) + { + rb->by_txn_last_xid = InvalidTransactionId; + rb->by_txn_last_txn = NULL; + } + + /* free data that's contained */ + + if (txn->tuplecid_hash != NULL) + { + hash_destroy(txn->tuplecid_hash); + txn->tuplecid_hash = NULL; + } + + if (txn->invalidations) + { + pfree(txn->invalidations); + txn->invalidations = NULL; + } + + /* check whether to put into the slab cache */ + if (rb->nr_cached_transactions < max_cached_transactions) + { + rb->nr_cached_transactions++; + dlist_push_head(&rb->cached_transactions, &txn->node); + VALGRIND_MAKE_MEM_UNDEFINED(txn, sizeof(ReorderBufferTXN)); + VALGRIND_MAKE_MEM_DEFINED(&txn->node, sizeof(txn->node)); + } + else + { + pfree(txn); + } +} + +/* + * Get a unused, possibly preallocated, ReorderBufferChange. + */ +ReorderBufferChange * +ReorderBufferGetChange(ReorderBuffer *rb) +{ + ReorderBufferChange *change; + + /* check the slab cache */ + if (rb->nr_cached_changes) + { + rb->nr_cached_changes--; + change = (ReorderBufferChange *) + dlist_container(ReorderBufferChange, node, + dlist_pop_head_node(&rb->cached_changes)); + } + else + { + change = (ReorderBufferChange *) + MemoryContextAlloc(rb->context, sizeof(ReorderBufferChange)); + } + + memset(change, 0, sizeof(ReorderBufferChange)); + return change; +} + +/* + * Free an ReorderBufferChange. + * + * Deallocation might be delayed for efficiency purposes, for details check + * the comments above max_cached_changes's definition. + */ +void +ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change) +{ + /* free contained data */ + switch ((ReorderBufferChangeTypeInternal) change->action_internal) + { + case REORDER_BUFFER_CHANGE_INTERNAL_INSERT: + case REORDER_BUFFER_CHANGE_INTERNAL_UPDATE: + case REORDER_BUFFER_CHANGE_INTERNAL_DELETE: + if (change->tp.newtuple) + { + ReorderBufferReturnTupleBuf(rb, change->tp.newtuple); + change->tp.newtuple = NULL; + } + + if (change->tp.oldtuple) + { + ReorderBufferReturnTupleBuf(rb, change->tp.oldtuple); + change->tp.oldtuple = NULL; + } + break; + case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT: + if (change->snapshot) + { + ReorderBufferFreeSnap(rb, change->snapshot); + change->snapshot = NULL; + } + break; + case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID: + break; + case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID: + break; + } + + /* check whether to put into the slab cache */ + if (rb->nr_cached_changes < max_cached_changes) + { + rb->nr_cached_changes++; + dlist_push_head(&rb->cached_changes, &change->node); + VALGRIND_MAKE_MEM_UNDEFINED(change, sizeof(ReorderBufferChange)); + VALGRIND_MAKE_MEM_DEFINED(&change->node, sizeof(change->node)); + } + else + { + pfree(change); + } +} + + +/* + * Get a unused, possibly preallocated, ReorderBufferTupleBuf + */ +ReorderBufferTupleBuf * +ReorderBufferGetTupleBuf(ReorderBuffer *rb) +{ + ReorderBufferTupleBuf *tuple; + + /* check the slab cache */ + if (rb->nr_cached_tuplebufs) + { + rb->nr_cached_tuplebufs--; + tuple = slist_container(ReorderBufferTupleBuf, node, + slist_pop_head_node(&rb->cached_tuplebufs)); +#ifdef USE_ASSERT_CHECKING + memset(tuple, 0xdeadbeef, sizeof(ReorderBufferTupleBuf)); +#endif + } + else + { + tuple = (ReorderBufferTupleBuf *) + MemoryContextAlloc(rb->context, sizeof(ReorderBufferTupleBuf)); + } + + return tuple; +} + +/* + * Free an ReorderBufferTupleBuf. + * + * Deallocation might be delayed for efficiency purposes, for details check + * the comments above max_cached_changes's definition. + */ +void +ReorderBufferReturnTupleBuf(ReorderBuffer *rb, ReorderBufferTupleBuf *tuple) +{ + /* check whether to put into the slab cache */ + if (rb->nr_cached_tuplebufs < max_cached_tuplebufs) + { + rb->nr_cached_tuplebufs++; + slist_push_head(&rb->cached_tuplebufs, &tuple->node); + VALGRIND_MAKE_MEM_UNDEFINED(tuple, sizeof(ReorderBufferTupleBuf)); + VALGRIND_MAKE_MEM_DEFINED(&tuple->node, sizeof(tuple->node)); + } + else + { + pfree(tuple); + } +} + +/* + * Return the ReorderBufferTXN from the given buffer, specified by Xid. + * If create is true, and a transaction doesn't already exist, create it + * (with the given LSN, and as top transaction if that's specified); + * when this happens, is_new is set to true. + */ +static ReorderBufferTXN * +ReorderBufferTXNByXid(ReorderBuffer *rb, TransactionId xid, bool create, + bool *is_new, XLogRecPtr lsn, bool create_as_top) +{ + ReorderBufferTXN *txn; + ReorderBufferTXNByIdEnt *ent; + bool found; + + Assert(TransactionIdIsValid(xid)); + Assert(!create || lsn != InvalidXLogRecPtr); + + /* + * Check the one-entry lookup cache first + */ + if (TransactionIdIsValid(rb->by_txn_last_xid) && + rb->by_txn_last_xid == xid) + { + txn = rb->by_txn_last_txn; + + if (txn != NULL) + { + /* found it, and it's valid */ + if (is_new) + *is_new = false; + return txn; + } + + /* + * cached as non-existant, and asked not to create? Then nothing else + * to do. + */ + if (!create) + return NULL; + /* otherwise fall through to create it */ + } + + /* + * If the cache wasn't hit or it yielded an "does-not-exist" and we want + * to create an entry. + */ + + /* search the lookup table */ + ent = (ReorderBufferTXNByIdEnt *) + hash_search(rb->by_txn, + (void *) &xid, + create ? HASH_ENTER : HASH_FIND, + &found); + if (found) + txn = ent->txn; + else if (create) + { + /* initialize the new entry, if creation was requested */ + Assert(ent != NULL); + + ent->txn = ReorderBufferGetTXN(rb); + ent->txn->xid = xid; + txn = ent->txn; + txn->first_lsn = lsn; + txn->restart_decoding_lsn = rb->current_restart_decoding_lsn; + + if (create_as_top) + { + dlist_push_tail(&rb->toplevel_by_lsn, &txn->node); + AssertTXNLsnOrder(rb); + } + } + else + txn = NULL; /* not found and not asked to create */ + + /* update cache */ + rb->by_txn_last_xid = xid; + rb->by_txn_last_txn = txn; + + if (is_new) + *is_new = !found; + + Assert(!create || !!txn); + return txn; +} + +/* + * Queue a change into a transaction so it can be replayed upon commit. + */ +void +ReorderBufferQueueChange(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, + ReorderBufferChange *change) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true); + + change->lsn = lsn; + Assert(InvalidXLogRecPtr != lsn); + dlist_push_tail(&txn->changes, &change->node); + txn->nentries++; + txn->nentries_mem++; + + ReorderBufferCheckSerializeTXN(rb, txn); +} + +static void +AssertTXNLsnOrder(ReorderBuffer *rb) +{ +#ifdef USE_ASSERT_CHECKING + dlist_iter iter; + XLogRecPtr prev_first_lsn = InvalidXLogRecPtr; + + dlist_foreach(iter, &rb->toplevel_by_lsn) + { + ReorderBufferTXN *cur_txn; + + cur_txn = dlist_container(ReorderBufferTXN, node, iter.cur); + Assert(cur_txn->first_lsn != InvalidXLogRecPtr); + + if (cur_txn->end_lsn != InvalidXLogRecPtr) + Assert(cur_txn->first_lsn <= cur_txn->end_lsn); + + if (prev_first_lsn != InvalidXLogRecPtr) + Assert(prev_first_lsn < cur_txn->first_lsn); + + Assert(!cur_txn->is_known_as_subxact); + prev_first_lsn = cur_txn->first_lsn; + } +#endif +} + +ReorderBufferTXN * +ReorderBufferGetOldestTXN(ReorderBuffer *rb) +{ + ReorderBufferTXN *txn; + + if (dlist_is_empty(&rb->toplevel_by_lsn)) + return NULL; + + AssertTXNLsnOrder(rb); + + txn = dlist_head_element(ReorderBufferTXN, node, &rb->toplevel_by_lsn); + + Assert(!txn->is_known_as_subxact); + Assert(txn->first_lsn != InvalidXLogRecPtr); + return txn; +} + +void +ReorderBufferSetRestartPoint(ReorderBuffer *rb, XLogRecPtr ptr) +{ + rb->current_restart_decoding_lsn = ptr; +} + +void +ReorderBufferAssignChild(ReorderBuffer *rb, TransactionId xid, + TransactionId subxid, XLogRecPtr lsn) +{ + ReorderBufferTXN *txn; + ReorderBufferTXN *subtxn; + bool new_top; + bool new_sub; + + txn = ReorderBufferTXNByXid(rb, xid, true, &new_top, lsn, true); + subtxn = ReorderBufferTXNByXid(rb, subxid, true, &new_sub, lsn, false); + + if (new_sub) + { + /* + * we assign subtransactions to top level transaction even if we don't + * have data for it yet, assignment records frequently reference xids + * that have not yet produced any records. Knowing those aren't top + * level xids allows us to make processing cheaper in some places. + */ + dlist_push_tail(&txn->subtxns, &subtxn->node); + txn->nsubtxns++; + } + else if (!subtxn->is_known_as_subxact) + { + subtxn->is_known_as_subxact = true; + Assert(subtxn->nsubtxns == 0); + + /* remove from lsn order list of top-level transactions */ + dlist_delete(&subtxn->node); + + /* add to toplevel transaction */ + dlist_push_tail(&txn->subtxns, &subtxn->node); + txn->nsubtxns++; + } + else if (new_top) + { + elog(ERROR, "existing subxact assigned to unknown toplevel xact"); + } +} + +/* + * Associate a subtransaction with its toplevel transaction at commit + * time. There may be no further changes added after this. + */ +void +ReorderBufferCommitChild(ReorderBuffer *rb, TransactionId xid, + TransactionId subxid, XLogRecPtr commit_lsn, + XLogRecPtr end_lsn) +{ + ReorderBufferTXN *txn; + ReorderBufferTXN *subtxn; + + subtxn = ReorderBufferTXNByXid(rb, subxid, false, NULL, + InvalidXLogRecPtr, false); + + /* + * No need to do anything if that subtxn didn't contain any changes + */ + if (!subtxn) + return; + + txn = ReorderBufferTXNByXid(rb, xid, false, NULL, commit_lsn, true); + + if (txn == NULL) + elog(ERROR, "subxact logged without previous toplevel record"); + + /* + * Pass the our base snapshot to the parent transaction if it doesn't have + * one, or ours is older. That can happen if there are no changes in the + * toplevel transaction but in one of the child transactions. This allows + * the parent to simply use it's base snapshot initially. + */ + if (txn->base_snapshot == NULL || + txn->base_snapshot_lsn > subtxn->base_snapshot_lsn) + { + txn->base_snapshot = subtxn->base_snapshot; + txn->base_snapshot_lsn = subtxn->base_snapshot_lsn; + subtxn->base_snapshot = NULL; + subtxn->base_snapshot_lsn = InvalidXLogRecPtr; + } + + subtxn->final_lsn = commit_lsn; + subtxn->end_lsn = end_lsn; + + if (!subtxn->is_known_as_subxact) + { + subtxn->is_known_as_subxact = true; + Assert(subtxn->nsubtxns == 0); + + /* remove from lsn order list of top-level transactions */ + dlist_delete(&subtxn->node); + + /* add to subtransaction list */ + dlist_push_tail(&txn->subtxns, &subtxn->node); + txn->nsubtxns++; + } +} + + +/* + * Support for efficiently iterating over a transaction's and its + * subtransactions' changes. + * + * We do by doing a k-way merge between transactions/subtransactions. For that + * we model the current heads of the different transactions as a binary heap + * so we easily know which (sub-)transaction has the change with the smallest + * lsn next. + * + * We assume the changes in individual transactions are already sorted by LSN. + */ + +/* + * Binary heap comparison function. + */ +static int +ReorderBufferIterCompare(Datum a, Datum b, void *arg) +{ + ReorderBufferIterTXNState *state = (ReorderBufferIterTXNState *) arg; + XLogRecPtr pos_a = state->entries[DatumGetInt32(a)].lsn; + XLogRecPtr pos_b = state->entries[DatumGetInt32(b)].lsn; + + if (pos_a < pos_b) + return 1; + else if (pos_a == pos_b) + return 0; + return -1; +} + +/* + * Allocate & initialize an iterator which iterates in lsn order over a + * transaction and all its subtransactions. + */ +static ReorderBufferIterTXNState * +ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn) +{ + Size nr_txns = 0; + ReorderBufferIterTXNState *state; + dlist_iter cur_txn_i; + int32 off; + + /* + * Calculate the size of our heap: one element for every transaction that + * contains changes. (Besides the transactions already in the reorder + * buffer, we count the one we were directly passed.) + */ + if (txn->nentries > 0) + nr_txns++; + + dlist_foreach(cur_txn_i, &txn->subtxns) + { + ReorderBufferTXN *cur_txn; + + cur_txn = dlist_container(ReorderBufferTXN, node, cur_txn_i.cur); + + if (cur_txn->nentries > 0) + nr_txns++; + } + + /* + * TODO: Consider adding fastpath for the rather common nr_txns=1 case, no + * need to allocate/build a heap then. + */ + + /* allocate iteration state */ + state = (ReorderBufferIterTXNState *) + MemoryContextAllocZero(rb->context, + sizeof(ReorderBufferIterTXNState) + + sizeof(ReorderBufferIterTXNEntry) * nr_txns); + + state->nr_txns = nr_txns; + dlist_init(&state->old_change); + + for (off = 0; off < state->nr_txns; off++) + { + state->entries[off].fd = -1; + state->entries[off].segno = 0; + } + + /* allocate heap */ + state->heap = binaryheap_allocate(state->nr_txns, + ReorderBufferIterCompare, + state); + + /* + * Now insert items into the binary heap, in an unordered fashion. (We + * will run a heap assembly step at the end; this is more efficient.) + */ + + off = 0; + + /* add toplevel transaction if it contains changes */ + if (txn->nentries > 0) + { + ReorderBufferChange *cur_change; + + if (txn->nentries != txn->nentries_mem) + ReorderBufferRestoreChanges(rb, txn, &state->entries[off].fd, + &state->entries[off].segno); + + cur_change = dlist_head_element(ReorderBufferChange, node, + &txn->changes); + + state->entries[off].lsn = cur_change->lsn; + state->entries[off].change = cur_change; + state->entries[off].txn = txn; + + binaryheap_add_unordered(state->heap, Int32GetDatum(off++)); + } + + /* add subtransactions if they contain changes */ + dlist_foreach(cur_txn_i, &txn->subtxns) + { + ReorderBufferTXN *cur_txn; + + cur_txn = dlist_container(ReorderBufferTXN, node, cur_txn_i.cur); + + if (cur_txn->nentries > 0) + { + ReorderBufferChange *cur_change; + + if (txn->nentries != txn->nentries_mem) + ReorderBufferRestoreChanges(rb, cur_txn, + &state->entries[off].fd, + &state->entries[off].segno); + + cur_change = dlist_head_element(ReorderBufferChange, node, + &cur_txn->changes); + + state->entries[off].lsn = cur_change->lsn; + state->entries[off].change = cur_change; + state->entries[off].txn = cur_txn; + + binaryheap_add_unordered(state->heap, Int32GetDatum(off++)); + } + } + + /* assemble a valid binary heap */ + binaryheap_build(state->heap); + + return state; +} + +/* + * Return the next change when iterating over a transaction and its + * subtransactions. + * + * Returns NULL when no further changes exist. + */ +static ReorderBufferChange * +ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state) +{ + ReorderBufferChange *change; + ReorderBufferIterTXNEntry *entry; + int32 off; + + /* nothing there anymore */ + if (state->heap->bh_size == 0) + return NULL; + + off = DatumGetInt32(binaryheap_first(state->heap)); + entry = &state->entries[off]; + + /* free memory we might have "leaked" in the previous *Next call */ + if (!dlist_is_empty(&state->old_change)) + { + change = dlist_container(ReorderBufferChange, node, + dlist_pop_head_node(&state->old_change)); + ReorderBufferReturnChange(rb, change); + Assert(dlist_is_empty(&state->old_change)); + } + + change = entry->change; + + /* + * update heap with information about which transaction has the next + * relevant change in LSN order + */ + + /* there are in-memory changes */ + if (dlist_has_next(&entry->txn->changes, &entry->change->node)) + { + dlist_node *next = dlist_next_node(&entry->txn->changes, &change->node); + ReorderBufferChange *next_change = + dlist_container(ReorderBufferChange, node, next); + + /* txn stays the same */ + state->entries[off].lsn = next_change->lsn; + state->entries[off].change = next_change; + + binaryheap_replace_first(state->heap, Int32GetDatum(off)); + return change; + } + + /* try to load changes from disk */ + if (entry->txn->nentries != entry->txn->nentries_mem) + { + /* + * Ugly: restoring changes will reuse *Change records, thus delete the + * current one from the per-tx list and only free in the next call. + */ + dlist_delete(&change->node); + dlist_push_tail(&state->old_change, &change->node); + + if (ReorderBufferRestoreChanges(rb, entry->txn, &entry->fd, + &state->entries[off].segno)) + { + /* successfully restored changes from disk */ + ReorderBufferChange *next_change = + dlist_head_element(ReorderBufferChange, node, + &entry->txn->changes); + + elog(DEBUG2, "restored %u/%u changes from disk", + (uint32) entry->txn->nentries_mem, + (uint32) entry->txn->nentries); + + Assert(entry->txn->nentries_mem); + /* txn stays the same */ + state->entries[off].lsn = next_change->lsn; + state->entries[off].change = next_change; + binaryheap_replace_first(state->heap, Int32GetDatum(off)); + + return change; + } + } + + /* ok, no changes there anymore, remove */ + binaryheap_remove_first(state->heap); + + return change; +} + +/* + * Deallocate the iterator + */ +static void +ReorderBufferIterTXNFinish(ReorderBuffer *rb, + ReorderBufferIterTXNState *state) +{ + int32 off; + + for (off = 0; off < state->nr_txns; off++) + { + if (state->entries[off].fd != -1) + CloseTransientFile(state->entries[off].fd); + } + + /* free memory we might have "leaked" in the last *Next call */ + if (!dlist_is_empty(&state->old_change)) + { + ReorderBufferChange *change; + + change = dlist_container(ReorderBufferChange, node, + dlist_pop_head_node(&state->old_change)); + ReorderBufferReturnChange(rb, change); + Assert(dlist_is_empty(&state->old_change)); + } + + binaryheap_free(state->heap); + pfree(state); +} + +/* + * Cleanup the contents of a transaction, usually after the transaction + * committed or aborted. + */ +static void +ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) +{ + bool found; + dlist_mutable_iter iter; + + /* cleanup subtransactions & their changes */ + dlist_foreach_modify(iter, &txn->subtxns) + { + ReorderBufferTXN *subtxn; + + subtxn = dlist_container(ReorderBufferTXN, node, iter.cur); + + /* + * Subtransactions are always associated to the toplevel TXN, even if + * they originally were happening inside another subtxn, so we won't + * ever recurse more than one level deep here. + */ + Assert(subtxn->is_known_as_subxact); + Assert(subtxn->nsubtxns == 0); + + ReorderBufferCleanupTXN(rb, subtxn); + } + + /* cleanup changes in the toplevel txn */ + dlist_foreach_modify(iter, &txn->changes) + { + ReorderBufferChange *change; + + change = dlist_container(ReorderBufferChange, node, iter.cur); + + ReorderBufferReturnChange(rb, change); + } + + /* + * Cleanup the tuplecids we stored for decoding catalog snapshot + * access. They are always stored in the toplevel transaction. + */ + dlist_foreach_modify(iter, &txn->tuplecids) + { + ReorderBufferChange *change; + + change = dlist_container(ReorderBufferChange, node, iter.cur); + Assert(change->action_internal == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID); + ReorderBufferReturnChange(rb, change); + } + + if (txn->base_snapshot != NULL) + { + SnapBuildSnapDecRefcount(txn->base_snapshot); + txn->base_snapshot = NULL; + txn->base_snapshot_lsn = InvalidXLogRecPtr; + } + + /* delete from list of known subxacts */ + if (txn->is_known_as_subxact) + { + /* NB: nsubxacts count of parent will be too high now */ + dlist_delete(&txn->node); + } + /* delete from LSN ordered list of toplevel TXNs */ + else + { + dlist_delete(&txn->node); + } + + /* now remove reference from buffer */ + hash_search(rb->by_txn, + (void *) &txn->xid, + HASH_REMOVE, + &found); + Assert(found); + + /* remove entries spilled to disk */ + if (txn->nentries != txn->nentries_mem) + ReorderBufferRestoreCleanup(rb, txn); + + /* deallocate */ + ReorderBufferReturnTXN(rb, txn); +} + +/* + * Build a hash with a (relfilenode, ctid) -> (cmin, cmax) mapping for use by + * tqual.c's HeapTupleSatisfiesHistoricMVCC. + */ +static void +ReorderBufferBuildTupleCidHash(ReorderBuffer *rb, ReorderBufferTXN *txn) +{ + dlist_iter iter; + HASHCTL hash_ctl; + + if (!txn->has_catalog_changes || dlist_is_empty(&txn->tuplecids)) + return; + + memset(&hash_ctl, 0, sizeof(hash_ctl)); + + hash_ctl.keysize = sizeof(ReorderBufferTupleCidKey); + hash_ctl.entrysize = sizeof(ReorderBufferTupleCidEnt); + hash_ctl.hash = tag_hash; + hash_ctl.hcxt = rb->context; + + /* + * create the hash with the exact number of to-be-stored tuplecids from + * the start + */ + txn->tuplecid_hash = + hash_create("ReorderBufferTupleCid", txn->ntuplecids, &hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); + + dlist_foreach(iter, &txn->tuplecids) + { + ReorderBufferTupleCidKey key; + ReorderBufferTupleCidEnt *ent; + bool found; + ReorderBufferChange *change; + + change = dlist_container(ReorderBufferChange, node, iter.cur); + + Assert(change->action_internal == REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID); + + /* be careful about padding */ + memset(&key, 0, sizeof(ReorderBufferTupleCidKey)); + + key.relnode = change->tuplecid.node; + + ItemPointerCopy(&change->tuplecid.tid, + &key.tid); + + ent = (ReorderBufferTupleCidEnt *) + hash_search(txn->tuplecid_hash, + (void *) &key, + HASH_ENTER | HASH_FIND, + &found); + if (!found) + { + ent->cmin = change->tuplecid.cmin; + ent->cmax = change->tuplecid.cmax; + ent->combocid = change->tuplecid.combocid; + } + else + { + Assert(ent->cmin == change->tuplecid.cmin); + Assert(ent->cmax == InvalidCommandId || + ent->cmax == change->tuplecid.cmax); + + /* + * if the tuple got valid in this transaction and now got deleted + * we already have a valid cmin stored. The cmax will be + * InvalidCommandId though. + */ + ent->cmax = change->tuplecid.cmax; + } + } +} + +/* + * Copy a provided snapshot so we can modify it privately. This is needed so + * that catalog modifying transactions can look into intermediate catalog + * states. + */ +static Snapshot +ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap, + ReorderBufferTXN *txn, CommandId cid) +{ + Snapshot snap; + dlist_iter iter; + int i = 0; + Size size; + + size = sizeof(SnapshotData) + + sizeof(TransactionId) * orig_snap->xcnt + + sizeof(TransactionId) * (txn->nsubtxns + 1); + + snap = MemoryContextAllocZero(rb->context, size); + memcpy(snap, orig_snap, sizeof(SnapshotData)); + + snap->copied = true; + snap->active_count = 0; + snap->regd_count = 1; + snap->xip = (TransactionId *) (snap + 1); + + memcpy(snap->xip, orig_snap->xip, sizeof(TransactionId) * snap->xcnt); + + /* + * snap->subxip contains all txids that belong to our transaction which we + * need to check via cmin/cmax. Thats why we store the toplevel + * transaction in there as well. + */ + snap->subxip = snap->xip + snap->xcnt; + snap->subxip[i++] = txn->xid; + + /* + * nsubxcnt isn't decreased when subtransactions abort, so count + * manually. Since it's an upper boundary it is safe to use it for the + * allocation above. + */ + snap->subxcnt = 1; + + dlist_foreach(iter, &txn->subtxns) + { + ReorderBufferTXN *sub_txn; + + sub_txn = dlist_container(ReorderBufferTXN, node, iter.cur); + snap->subxip[i++] = sub_txn->xid; + snap->subxcnt++; + } + + /* sort so we can bsearch() later */ + qsort(snap->subxip, snap->subxcnt, sizeof(TransactionId), xidComparator); + + /* store the specified current CommandId */ + snap->curcid = cid; + + return snap; +} + +/* + * Free a previously ReorderBufferCopySnap'ed snapshot + */ +static void +ReorderBufferFreeSnap(ReorderBuffer *rb, Snapshot snap) +{ + if (snap->copied) + pfree(snap); + else + SnapBuildSnapDecRefcount(snap); +} + +/* + * Perform the replay of a transaction and it's non-aborted subtransactions. + * + * Subtransactions previously have to be processed by + * ReorderBufferCommitChild(), even if previously assigned to the toplevel + * transaction with ReorderBufferAssignChild. + * + * We currently can only decode a transaction's contents in when their commit + * record is read because that's currently the only place where we know about + * cache invalidations. Thus, once a toplevel commit is read, we iterate over + * the top and subtransactions (using a k-way merge) and replay the changes in + * lsn order. + */ +void +ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr commit_lsn, XLogRecPtr end_lsn, + TimestampTz commit_time) +{ + ReorderBufferTXN *txn; + ReorderBufferIterTXNState *iterstate = NULL; + ReorderBufferChange *change; + + volatile CommandId command_id = FirstCommandId; + volatile Snapshot snapshot_now = NULL; + volatile bool txn_started = false; + volatile bool subtxn_started = false; + + txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, + false); + + /* unknown transaction, nothing to replay */ + if (txn == NULL) + return; + + txn->final_lsn = commit_lsn; + txn->end_lsn = end_lsn; + txn->commit_time = commit_time; + + /* serialize the last bunch of changes if we need start earlier anyway */ + if (txn->nentries_mem != txn->nentries) + ReorderBufferSerializeTXN(rb, txn); + + /* + * If this transaction didn't have any real changes in our database, it's + * OK not to have a snapshot. Note that ReorderBufferCommitChild will have + * transferred its snapshot to this transaction if it had one and the + * toplevel tx didn't. + */ + if (txn->base_snapshot == NULL) + { + Assert(txn->ninvalidations == 0); + ReorderBufferCleanupTXN(rb, txn); + return; + } + + snapshot_now = txn->base_snapshot; + + /* build data to be able to lookup the CommandIds of catalog tuples */ + ReorderBufferBuildTupleCidHash(rb, txn); + + /* setup the initial snapshot */ + SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash); + + PG_TRY(); + { + txn_started = false; + + /* + * Decoding needs access to syscaches et al., which in turn use + * heavyweight locks and such. Thus we need to have enough state around + * to keep track of those. The easiest way is to simply use a + * transaction internally. That also allows us to easily enforce that + * nothing writes to the database by checking for xid assignments. + * + * When we're called via the SQL SRF there's already a transaction + * started, so start an explicit subtransaction there. + */ + if (IsTransactionOrTransactionBlock()) + { + BeginInternalSubTransaction("replay"); + subtxn_started = true; + } + else + { + StartTransactionCommand(); + txn_started = true; + } + + rb->begin(rb, txn); + + iterstate = ReorderBufferIterTXNInit(rb, txn); + while ((change = ReorderBufferIterTXNNext(rb, iterstate))) + { + Relation relation = NULL; + Oid reloid; + + switch ((ReorderBufferChangeTypeInternal) change->action_internal) + { + case REORDER_BUFFER_CHANGE_INTERNAL_INSERT: + case REORDER_BUFFER_CHANGE_INTERNAL_UPDATE: + case REORDER_BUFFER_CHANGE_INTERNAL_DELETE: + Assert(snapshot_now); + + reloid = RelidByRelfilenode(change->tp.relnode.spcNode, + change->tp.relnode.relNode); + + /* + * Catalog tuple without data, emitted while catalog was + * in the process of being rewritten. + */ + if (reloid == InvalidOid && + change->tp.newtuple == NULL && + change->tp.oldtuple == NULL) + continue; + else if (reloid == InvalidOid) + elog(ERROR, "could not lookup relation %s", + relpathperm(change->tp.relnode, MAIN_FORKNUM)); + + relation = RelationIdGetRelation(reloid); + + if (relation == NULL) + elog(ERROR, "could open relation descriptor %s", + relpathperm(change->tp.relnode, MAIN_FORKNUM)); + + if (RelationIsLogicallyLogged(relation)) + { + /* + * For now ignore sequence changes entirely. Most of + * the time they don't log changes using records we + * understand, so it doesn't make sense to handle the + * few cases we do. + */ + if (relation->rd_rel->relkind == RELKIND_SEQUENCE) + { + } + /* user-triggered change */ + else if (!IsToastRelation(relation)) + { + ReorderBufferToastReplace(rb, txn, relation, change); + rb->apply_change(rb, txn, relation, change); + ReorderBufferToastReset(rb, txn); + } + /* we're not interested in toast deletions */ + else if (change->action == REORDER_BUFFER_CHANGE_INSERT) + { + /* + * Need to reassemble the full toasted Datum in + * memory, to ensure the chunks don't get reused + * till we're done remove it from the list of this + * transaction's changes. Otherwise it will get + * freed/reused while restoring spooled data from + * disk. + */ + dlist_delete(&change->node); + ReorderBufferToastAppendChunk(rb, txn, relation, + change); + } + + } + RelationClose(relation); + break; + case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT: + /* get rid of the old */ + TeardownHistoricSnapshot(false); + + if (snapshot_now->copied) + { + ReorderBufferFreeSnap(rb, snapshot_now); + snapshot_now = + ReorderBufferCopySnap(rb, change->snapshot, + txn, command_id); + } + /* + * Restored from disk, need to be careful not to double + * free. We could introduce refcounting for that, but for + * now this seems infrequent enough not to care. + */ + else if (change->snapshot->copied) + { + snapshot_now = + ReorderBufferCopySnap(rb, change->snapshot, + txn, command_id); + } + else + { + snapshot_now = change->snapshot; + } + + + /* and continue with the new one */ + SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash); + break; + + case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID: + Assert(change->command_id != InvalidCommandId); + + if (command_id < change->command_id) + { + command_id = change->command_id; + + if (!snapshot_now->copied) + { + /* we don't use the global one anymore */ + snapshot_now = ReorderBufferCopySnap(rb, snapshot_now, + txn, command_id); + } + + snapshot_now->curcid = command_id; + + TeardownHistoricSnapshot(false); + SetupHistoricSnapshot(snapshot_now, txn->tuplecid_hash); + + /* + * Every time the CommandId is incremented, we could + * see new catalog contents, so execute all + * invalidations. + */ + ReorderBufferExecuteInvalidations(rb, txn); + } + + break; + + case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID: + elog(ERROR, "tuplecid value in changequeue"); + break; + } + } + + ReorderBufferIterTXNFinish(rb, iterstate); + + /* call commit callback */ + rb->commit(rb, txn, commit_lsn); + + /* this is just a sanity check against bad output plugin behaviour */ + if (GetCurrentTransactionIdIfAny() != InvalidTransactionId) + elog(ERROR, "output plugin used xid %u", + GetCurrentTransactionId()); + + /* make sure there's no cache pollution */ + ReorderBufferExecuteInvalidations(rb, txn); + + /* cleanup */ + TeardownHistoricSnapshot(false); + + /* + * Abort subtransaction or the transaction as a whole has the right + * semantics. We want all locks acquired in here to be released, not + * reassigned to the parent and we do not want any database access + * have persistent effects. + */ + if (subtxn_started) + RollbackAndReleaseCurrentSubTransaction(); + else if (txn_started) + AbortCurrentTransaction(); + + if (snapshot_now->copied) + ReorderBufferFreeSnap(rb, snapshot_now); + + /* remove potential on-disk data, and deallocate */ + ReorderBufferCleanupTXN(rb, txn); + } + PG_CATCH(); + { + /* TODO: Encapsulate cleanup from the PG_TRY and PG_CATCH blocks */ + if (iterstate) + ReorderBufferIterTXNFinish(rb, iterstate); + + TeardownHistoricSnapshot(true); + + if (snapshot_now->copied) + ReorderBufferFreeSnap(rb, snapshot_now); + + if (subtxn_started) + RollbackAndReleaseCurrentSubTransaction(); + else if (txn_started) + AbortCurrentTransaction(); + + /* + * Invalidations in an aborted transactions aren't allowed to do + * catalog access, so we don't need to still have the snapshot setup. + */ + ReorderBufferExecuteInvalidations(rb, txn); + + /* remove potential on-disk data, and deallocate */ + ReorderBufferCleanupTXN(rb, txn); + + PG_RE_THROW(); + } + PG_END_TRY(); +} + +/* + * Abort a transaction that possibly has previous changes. Needs to be first + * called for subtransactions and then for the toplevel xid. + * + * NB: Transactions handled here have to have actively aborted (i.e. have + * produced an abort record). Implicitly aborted transactions are handled via + * ReorderBufferAbortOld(); transactions we're just not interesteded in, but + * which have committed are handled in ReorderBufferForget(). + * + * This function purges this transaction and its contents from memory and + * disk. + */ +void +ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, + false); + + /* unknown, nothing to remove */ + if (txn == NULL) + return; + + /* cosmetic... */ + txn->final_lsn = lsn; + + /* remove potential on-disk data, and deallocate */ + ReorderBufferCleanupTXN(rb, txn); +} + +/* + * Abort all transactions that aren't actually running anymore because the + * server restarted. + * + * NB: These really have to be transactions that have aborted due to a server + * crash/immediate restart, as we don't deal with invalidations here. + */ +void +ReorderBufferAbortOld(ReorderBuffer *rb, TransactionId oldestRunningXid) +{ + dlist_mutable_iter it; + + /* + * Iterate through all (potential) toplevel TXNs and abort all that are + * older than what possibly can be running. Once we've found the first + * that is alive we stop, there might be some that acquired an xid earlier + * but started writing later, but it's unlikely and they will cleaned up + * in a later call to ReorderBufferAbortOld(). + */ + dlist_foreach_modify(it, &rb->toplevel_by_lsn) + { + ReorderBufferTXN * txn; + + txn = dlist_container(ReorderBufferTXN, node, it.cur); + + if (TransactionIdPrecedes(txn->xid, oldestRunningXid)) + { + elog(DEBUG1, "aborting old transaction %u", txn->xid); + + /* remove potential on-disk data, and deallocate this tx */ + ReorderBufferCleanupTXN(rb, txn); + } + else + return; + } +} + +/* + * Forget the contents of a transaction if we aren't interested in it's + * contents. Needs to be first called for subtransactions and then for the + * toplevel xid. + * + * This is significantly different to ReorderBufferAbort() because + * transactions that have committed need to be treated differenly from aborted + * ones since they may have modified the catalog. + * + * Note that this is only allowed to be called in the moment a transaction + * commit has just been read, not earlier; otherwise later records referring + * to this xid might re-create the transaction incompletely. + */ +void +ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, + false); + + /* unknown, nothing to forget */ + if (txn == NULL) + return; + + /* cosmetic... */ + txn->final_lsn = lsn; + + /* + * Proccess cache invalidation messages if there are any. Even if we're + * not interested in the transaction's contents, it could have manipulated + * the catalog and we need to update the caches according to that. + */ + if (txn->base_snapshot != NULL && txn->ninvalidations > 0) + { + /* setup snapshot to perform the invalidations in */ + SetupHistoricSnapshot(txn->base_snapshot, txn->tuplecid_hash); + PG_TRY(); + { + ReorderBufferExecuteInvalidations(rb, txn); + TeardownHistoricSnapshot(false); + } + PG_CATCH(); + { + /* cleanup */ + TeardownHistoricSnapshot(true); + PG_RE_THROW(); + } + PG_END_TRY(); + } + else + Assert(txn->ninvalidations == 0); + + /* remove potential on-disk data, and deallocate */ + ReorderBufferCleanupTXN(rb, txn); +} + + +/* + * Check whether a transaction is already known in this module.xs + */ +bool +ReorderBufferIsXidKnown(ReorderBuffer *rb, TransactionId xid) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, + false); + return txn != NULL; +} + +/* + * Add a new snapshot to this transaction that may only used after lsn 'lsn' + * because the previous snapshot doesn't describe the catalog correctly for + * following rows. + */ +void +ReorderBufferAddSnapshot(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr lsn, Snapshot snap) +{ + ReorderBufferChange *change = ReorderBufferGetChange(rb); + + change->snapshot = snap; + change->action_internal = REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT; + + ReorderBufferQueueChange(rb, xid, lsn, change); +} + +/* + * Setup the base snapshot of a transaction. The base snapshot is the snapshot + * that is used to decode all changes until either this transaction modifies + * the catalog or another catalog modifying transaction commits. + * + * Needs to be called before any changes are added with + * ReorderBufferQueueChange(). + */ +void +ReorderBufferSetBaseSnapshot(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr lsn, Snapshot snap) +{ + ReorderBufferTXN *txn; + bool is_new; + + txn = ReorderBufferTXNByXid(rb, xid, true, &is_new, lsn, true); + Assert(txn->base_snapshot == NULL); + Assert(snap != NULL); + + txn->base_snapshot = snap; + txn->base_snapshot_lsn = lsn; +} + +/* + * Access the catalog with this CommandId at this point in the changestream. + * + * May only be called for command ids > 1 + */ +void +ReorderBufferAddNewCommandId(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr lsn, CommandId cid) +{ + ReorderBufferChange *change = ReorderBufferGetChange(rb); + + change->command_id = cid; + change->action_internal = REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID; + + ReorderBufferQueueChange(rb, xid, lsn, change); +} + + +/* + * Add new (relfilenode, tid) -> (cmin, cmax) mappings. + */ +void +ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr lsn, RelFileNode node, + ItemPointerData tid, CommandId cmin, + CommandId cmax, CommandId combocid) +{ + ReorderBufferChange *change = ReorderBufferGetChange(rb); + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true); + + change->tuplecid.node = node; + change->tuplecid.tid = tid; + change->tuplecid.cmin = cmin; + change->tuplecid.cmax = cmax; + change->tuplecid.combocid = combocid; + change->lsn = lsn; + change->action_internal = REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID; + + dlist_push_tail(&txn->tuplecids, &change->node); + txn->ntuplecids++; +} + +/* + * Setup the invalidation of the toplevel transaction. + * + * This needs to be done before ReorderBufferCommit is called! + */ +void +ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr lsn, Size nmsgs, + SharedInvalidationMessage *msgs) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true); + + if (txn->ninvalidations != 0) + elog(ERROR, "only ever add one set of invalidations"); + + Assert(nmsgs > 0); + + txn->ninvalidations = nmsgs; + txn->invalidations = (SharedInvalidationMessage *) + MemoryContextAlloc(rb->context, + sizeof(SharedInvalidationMessage) * nmsgs); + memcpy(txn->invalidations, msgs, + sizeof(SharedInvalidationMessage) * nmsgs); +} + +/* + * Apply all invalidations we know. Possibly we only need parts at this point + * in the changestream but we don't know which those are. + */ +static void +ReorderBufferExecuteInvalidations(ReorderBuffer *rb, ReorderBufferTXN *txn) +{ + int i; + + for (i = 0; i < txn->ninvalidations; i++) + LocalExecuteInvalidationMessage(&txn->invalidations[i]); +} + +/* + * Mark a transaction as containing catalog changes + */ +void +ReorderBufferXidSetCatalogChanges(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr lsn) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true); + + txn->has_catalog_changes = true; +} + +/* + * Query whether a transaction is already *known* to contain catalog + * changes. This can be wrong until directly before the commit! + */ +bool +ReorderBufferXidHasCatalogChanges(ReorderBuffer *rb, TransactionId xid) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, + false); + if (txn == NULL) + return false; + + return txn->has_catalog_changes; +} + +/* + * Have we already added the first snapshot? + */ +bool +ReorderBufferXidHasBaseSnapshot(ReorderBuffer *rb, TransactionId xid) +{ + ReorderBufferTXN *txn; + + txn = ReorderBufferTXNByXid(rb, xid, false, NULL, InvalidXLogRecPtr, + false); + + /* transaction isn't known yet, ergo no snapshot */ + if (txn == NULL) + return false; + + /* + * TODO: It would be a nice improvement if we would check the toplevel + * transaction in subtransactions, but we'd need to keep track of a bit + * more state. + */ + return txn->base_snapshot != NULL; +} + + +/* + * --------------------------------------- + * Disk serialization support + * --------------------------------------- + */ + +/* + * Ensure the IO buffer is >= sz. + */ +static void +ReorderBufferSerializeReserve(ReorderBuffer *rb, Size sz) +{ + if (!rb->outbufsize) + { + rb->outbuf = MemoryContextAlloc(rb->context, sz); + rb->outbufsize = sz; + } + else if (rb->outbufsize < sz) + { + rb->outbuf = repalloc(rb->outbuf, sz); + rb->outbufsize = sz; + } +} + +/* + * Check whether the transaction tx should spill its data to disk. + */ +static void +ReorderBufferCheckSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) +{ + /* + * TODO: improve accounting so we cheaply can take subtransactions into + * account here. + */ + if (txn->nentries_mem >= max_changes_in_memory) + { + ReorderBufferSerializeTXN(rb, txn); + Assert(txn->nentries_mem == 0); + } +} + +/* + * Spill data of a large transaction (and its subtransactions) to disk. + */ +static void +ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) +{ + dlist_iter subtxn_i; + dlist_mutable_iter change_i; + int fd = -1; + XLogSegNo curOpenSegNo = 0; + Size spilled = 0; + char path[MAXPGPATH]; + + elog(DEBUG2, "spill %u changes in tx %u to disk", + (uint32) txn->nentries_mem, txn->xid); + + /* do the same to all child TXs */ + dlist_foreach(subtxn_i, &txn->subtxns) + { + ReorderBufferTXN *subtxn; + + subtxn = dlist_container(ReorderBufferTXN, node, subtxn_i.cur); + ReorderBufferSerializeTXN(rb, subtxn); + } + + /* serialize changestream */ + dlist_foreach_modify(change_i, &txn->changes) + { + ReorderBufferChange *change; + + change = dlist_container(ReorderBufferChange, node, change_i.cur); + + /* + * store in segment in which it belongs by start lsn, don't split over + * multiple segments tho + */ + if (fd == -1 || XLByteInSeg(change->lsn, curOpenSegNo)) + { + XLogRecPtr recptr; + + if (fd != -1) + CloseTransientFile(fd); + + XLByteToSeg(change->lsn, curOpenSegNo); + XLogSegNoOffsetToRecPtr(curOpenSegNo, 0, recptr); + + /* + * No need to care about TLIs here, only used during a single run, + * so each LSN only maps to a specific WAL record. + */ + sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap", + NameStr(MyReplicationSlot->data.name), txn->xid, + (uint32) (recptr >> 32), (uint32) recptr); + + /* open segment, create it if necessary */ + fd = OpenTransientFile(path, + O_CREAT | O_WRONLY | O_APPEND | PG_BINARY, + S_IRUSR | S_IWUSR); + + if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + path))); + } + + ReorderBufferSerializeChange(rb, txn, fd, change); + dlist_delete(&change->node); + ReorderBufferReturnChange(rb, change); + + spilled++; + } + + Assert(spilled == txn->nentries_mem); + Assert(dlist_is_empty(&txn->changes)); + txn->nentries_mem = 0; + + if (fd != -1) + CloseTransientFile(fd); +} + +/* + * Serialize individual change to disk. + */ +static void +ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn, + int fd, ReorderBufferChange *change) +{ + ReorderBufferDiskChange *ondisk; + Size sz = sizeof(ReorderBufferDiskChange); + + ReorderBufferSerializeReserve(rb, sz); + + ondisk = (ReorderBufferDiskChange *) rb->outbuf; + memcpy(&ondisk->change, change, sizeof(ReorderBufferChange)); + + switch ((ReorderBufferChangeTypeInternal) change->action_internal) + { + case REORDER_BUFFER_CHANGE_INTERNAL_INSERT: + /* fall through */ + case REORDER_BUFFER_CHANGE_INTERNAL_UPDATE: + /* fall through */ + case REORDER_BUFFER_CHANGE_INTERNAL_DELETE: + { + char *data; + Size oldlen = 0; + Size newlen = 0; + + if (change->tp.oldtuple) + oldlen = offsetof(ReorderBufferTupleBuf, data) + + change->tp.oldtuple->tuple.t_len + - offsetof(HeapTupleHeaderData, t_bits); + + if (change->tp.newtuple) + newlen = offsetof(ReorderBufferTupleBuf, data) + + change->tp.newtuple->tuple.t_len + - offsetof(HeapTupleHeaderData, t_bits); + + sz += oldlen; + sz += newlen; + + /* make sure we have enough space */ + ReorderBufferSerializeReserve(rb, sz); + + data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange); + /* might have been reallocated above */ + ondisk = (ReorderBufferDiskChange *) rb->outbuf; + + if (oldlen) + { + memcpy(data, change->tp.oldtuple, oldlen); + data += oldlen; + Assert(&change->tp.oldtuple->header == change->tp.oldtuple->tuple.t_data); + } + + if (newlen) + { + memcpy(data, change->tp.newtuple, newlen); + data += newlen; + Assert(&change->tp.newtuple->header == change->tp.newtuple->tuple.t_data); + } + break; + } + case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT: + { + char *data; + + sz += sizeof(SnapshotData) + + sizeof(TransactionId) * change->snapshot->xcnt + + sizeof(TransactionId) * change->snapshot->subxcnt + ; + + /* make sure we have enough space */ + ReorderBufferSerializeReserve(rb, sz); + data = ((char *) rb->outbuf) + sizeof(ReorderBufferDiskChange); + /* might have been reallocated above */ + ondisk = (ReorderBufferDiskChange *) rb->outbuf; + + memcpy(data, change->snapshot, sizeof(SnapshotData)); + data += sizeof(SnapshotData); + + if (change->snapshot->xcnt) + { + memcpy(data, change->snapshot->xip, + sizeof(TransactionId) + change->snapshot->xcnt); + data += sizeof(TransactionId) + change->snapshot->xcnt; + } + + if (change->snapshot->subxcnt) + { + memcpy(data, change->snapshot->subxip, + sizeof(TransactionId) + change->snapshot->subxcnt); + data += sizeof(TransactionId) + change->snapshot->subxcnt; + } + break; + } + case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID: + /* ReorderBufferChange contains everything important */ + break; + case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID: + /* ReorderBufferChange contains everything important */ + break; + } + + ondisk->size = sz; + + if (write(fd, rb->outbuf, ondisk->size) != ondisk->size) + { + CloseTransientFile(fd); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to xid %u's data file: %m", + txn->xid))); + } + + Assert(ondisk->change.action_internal == change->action_internal); +} + +/* + * Restore a number of changes spilled to disk back into memory. + */ +static Size +ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn, + int *fd, XLogSegNo *segno) +{ + Size restored = 0; + XLogSegNo last_segno; + dlist_mutable_iter cleanup_iter; + + Assert(txn->first_lsn != InvalidXLogRecPtr); + Assert(txn->final_lsn != InvalidXLogRecPtr); + + /* free current entries, so we have memory for more */ + dlist_foreach_modify(cleanup_iter, &txn->changes) + { + ReorderBufferChange *cleanup = + dlist_container(ReorderBufferChange, node, cleanup_iter.cur); + + dlist_delete(&cleanup->node); + ReorderBufferReturnChange(rb, cleanup); + } + txn->nentries_mem = 0; + Assert(dlist_is_empty(&txn->changes)); + + XLByteToSeg(txn->final_lsn, last_segno); + + while (restored < max_changes_in_memory && *segno <= last_segno) + { + int readBytes; + ReorderBufferDiskChange *ondisk; + + if (*fd == -1) + { + XLogRecPtr recptr; + char path[MAXPGPATH]; + + /* first time in */ + if (*segno == 0) + { + XLByteToSeg(txn->first_lsn, *segno); + } + + Assert(*segno != 0 || dlist_is_empty(&txn->changes)); + XLogSegNoOffsetToRecPtr(*segno, 0, recptr); + + /* + * No need to care about TLIs here, only used during a single run, + * so each LSN only maps to a specific WAL record. + */ + sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap", + NameStr(MyReplicationSlot->data.name), txn->xid, + (uint32) (recptr >> 32), (uint32) recptr); + + *fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0); + if (*fd < 0 && errno == ENOENT) + { + *fd = -1; + (*segno)++; + continue; + } + else if (*fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + path))); + + } + + ReorderBufferSerializeReserve(rb, sizeof(ReorderBufferDiskChange)); + + + /* + * Read the statically sized part of a change which has information + * about the total size. If we couldn't read a record, we're at the + * end of this file. + */ + + readBytes = read(*fd, rb->outbuf, sizeof(ReorderBufferDiskChange)); + + /* eof */ + if (readBytes == 0) + { + CloseTransientFile(*fd); + *fd = -1; + (*segno)++; + continue; + } + else if (readBytes < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read from reorderbuffer spill file: %m"))); + else if (readBytes != sizeof(ReorderBufferDiskChange)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("incomplete read from reorderbuffer spill file: read %d instead of %u", + readBytes, + (uint32) sizeof(ReorderBufferDiskChange)))); + + ondisk = (ReorderBufferDiskChange *) rb->outbuf; + + ReorderBufferSerializeReserve(rb, + sizeof(ReorderBufferDiskChange) + ondisk->size); + ondisk = (ReorderBufferDiskChange *) rb->outbuf; + + readBytes = read(*fd, rb->outbuf + sizeof(ReorderBufferDiskChange), + ondisk->size - sizeof(ReorderBufferDiskChange)); + + if (readBytes < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read from reorderbuffer spill file: %m"))); + else if (readBytes != ondisk->size - sizeof(ReorderBufferDiskChange)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read from reorderbuffer spill file: read %d instead of %u", + readBytes, + (uint32) (ondisk->size - sizeof(ReorderBufferDiskChange))))); + + /* + * ok, read a full change from disk, now restore it into proper + * in-memory format + */ + ReorderBufferRestoreChange(rb, txn, rb->outbuf); + restored++; + } + + return restored; +} + +/* + * Convert change from its on-disk format to in-memory format and queue it onto + * the TXN's ->changes list. + */ +static void +ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn, + char *data) +{ + ReorderBufferDiskChange *ondisk; + ReorderBufferChange *change; + + ondisk = (ReorderBufferDiskChange *) data; + + change = ReorderBufferGetChange(rb); + + /* copy static part */ + memcpy(change, &ondisk->change, sizeof(ReorderBufferChange)); + + data += sizeof(ReorderBufferDiskChange); + + /* restore individual stuff */ + switch ((ReorderBufferChangeTypeInternal) change->action_internal) + { + case REORDER_BUFFER_CHANGE_INTERNAL_INSERT: + /* fall through */ + case REORDER_BUFFER_CHANGE_INTERNAL_UPDATE: + /* fall through */ + case REORDER_BUFFER_CHANGE_INTERNAL_DELETE: + if (change->tp.newtuple) + { + Size len = offsetof(ReorderBufferTupleBuf, data) + +((ReorderBufferTupleBuf *) data)->tuple.t_len + - offsetof(HeapTupleHeaderData, t_bits); + + change->tp.newtuple = ReorderBufferGetTupleBuf(rb); + memcpy(change->tp.newtuple, data, len); + change->tp.newtuple->tuple.t_data = &change->tp.newtuple->header; + + data += len; + } + + if (change->tp.oldtuple) + { + Size len = offsetof(ReorderBufferTupleBuf, data) + +((ReorderBufferTupleBuf *) data)->tuple.t_len + - offsetof(HeapTupleHeaderData, t_bits); + + change->tp.oldtuple = ReorderBufferGetTupleBuf(rb); + memcpy(change->tp.oldtuple, data, len); + change->tp.oldtuple->tuple.t_data = &change->tp.oldtuple->header; + data += len; + } + break; + case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT: + { + Snapshot oldsnap = (Snapshot) data; + Size size = sizeof(SnapshotData) + + sizeof(TransactionId) * oldsnap->xcnt + + sizeof(TransactionId) * (oldsnap->subxcnt + 0) + ; + + Assert(change->snapshot != NULL); + + change->snapshot = MemoryContextAllocZero(rb->context, size); + + memcpy(change->snapshot, data, size); + change->snapshot->xip = (TransactionId *) + (((char *) change->snapshot) + sizeof(SnapshotData)); + change->snapshot->subxip = + change->snapshot->xip + change->snapshot->xcnt + 0; + change->snapshot->copied = true; + break; + } + /* the base struct contains all the data, easy peasy */ + case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID: + case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID: + break; + } + + dlist_push_tail(&txn->changes, &change->node); + txn->nentries_mem++; +} + +/* + * Remove all on-disk stored for the passed in transaction. + */ +static void +ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn) +{ + XLogSegNo first; + XLogSegNo cur; + XLogSegNo last; + + Assert(txn->first_lsn != InvalidXLogRecPtr); + Assert(txn->final_lsn != InvalidXLogRecPtr); + + XLByteToSeg(txn->first_lsn, first); + XLByteToSeg(txn->final_lsn, last); + + /* iterate over all possible filenames, and delete them */ + for (cur = first; cur <= last; cur++) + { + char path[MAXPGPATH]; + XLogRecPtr recptr; + + XLogSegNoOffsetToRecPtr(cur, 0, recptr); + + sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap", + NameStr(MyReplicationSlot->data.name), txn->xid, + (uint32) (recptr >> 32), (uint32) recptr); + if (unlink(path) != 0 && errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not unlink file \"%s\": %m", path))); + } +} + +/* + * Delete all data spilled to disk after we've restarted/crashed. It will be + * recreated when the respective slots are reused. + */ +void +StartupReorderBuffer(void) +{ + DIR *logical_dir; + struct dirent *logical_de; + + DIR *spill_dir; + struct dirent *spill_de; + + logical_dir = AllocateDir("pg_replslot"); + while ((logical_de = ReadDir(logical_dir, "pg_replslot")) != NULL) + { + struct stat statbuf; + char path[MAXPGPATH]; + + if (strcmp(logical_de->d_name, ".") == 0 || + strcmp(logical_de->d_name, "..") == 0) + continue; + + /* if it cannot be a slot, skip the directory */ + if (!ReplicationSlotValidateName(logical_de->d_name, DEBUG2)) + continue; + + /* + * ok, has to be a surviving logical slot, iterate and delete + * everythign starting with xid-* + */ + sprintf(path, "pg_replslot/%s", logical_de->d_name); + + /* we're only creating directories here, skip if it's not our's */ + if (lstat(path, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) + continue; + + spill_dir = AllocateDir(path); + while ((spill_de = ReadDir(spill_dir, path)) != NULL) + { + if (strcmp(spill_de->d_name, ".") == 0 || + strcmp(spill_de->d_name, "..") == 0) + continue; + + /* only look at names that can be ours */ + if (strncmp(spill_de->d_name, "xid", 3) == 0) + { + sprintf(path, "pg_replslot/%s/%s", logical_de->d_name, + spill_de->d_name); + + if (unlink(path) != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not unlink file \"%s\": %m", + path))); + } + } + FreeDir(spill_dir); + } + FreeDir(logical_dir); +} + +/* --------------------------------------- + * toast reassembly support + * --------------------------------------- + */ + +/* + * Initialize per tuple toast reconstruction support. + */ +static void +ReorderBufferToastInitHash(ReorderBuffer *rb, ReorderBufferTXN *txn) +{ + HASHCTL hash_ctl; + + Assert(txn->toast_hash == NULL); + + memset(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = sizeof(Oid); + hash_ctl.entrysize = sizeof(ReorderBufferToastEnt); + hash_ctl.hash = tag_hash; + hash_ctl.hcxt = rb->context; + txn->toast_hash = hash_create("ReorderBufferToastHash", 5, &hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); +} + +/* + * Per toast-chunk handling for toast reconstruction + * + * Appends a toast chunk so we can reconstruct it when the tuple "owning" the + * toasted Datum comes along. + */ +static void +ReorderBufferToastAppendChunk(ReorderBuffer *rb, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change) +{ + ReorderBufferToastEnt *ent; + bool found; + int32 chunksize; + bool isnull; + Pointer chunk; + TupleDesc desc = RelationGetDescr(relation); + Oid chunk_id; + Oid chunk_seq; + + if (txn->toast_hash == NULL) + ReorderBufferToastInitHash(rb, txn); + + Assert(IsToastRelation(relation)); + + chunk_id = DatumGetObjectId(fastgetattr(&change->tp.newtuple->tuple, 1, desc, &isnull)); + Assert(!isnull); + chunk_seq = DatumGetInt32(fastgetattr(&change->tp.newtuple->tuple, 2, desc, &isnull)); + Assert(!isnull); + + ent = (ReorderBufferToastEnt *) + hash_search(txn->toast_hash, + (void *) &chunk_id, + HASH_ENTER, + &found); + + if (!found) + { + Assert(ent->chunk_id == chunk_id); + ent->num_chunks = 0; + ent->last_chunk_seq = 0; + ent->size = 0; + ent->reconstructed = NULL; + dlist_init(&ent->chunks); + + if (chunk_seq != 0) + elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq 0", + chunk_seq, chunk_id); + } + else if (found && chunk_seq != ent->last_chunk_seq + 1) + elog(ERROR, "got sequence entry %d for toast chunk %u instead of seq %d", + chunk_seq, chunk_id, ent->last_chunk_seq + 1); + + chunk = DatumGetPointer(fastgetattr(&change->tp.newtuple->tuple, 3, desc, &isnull)); + Assert(!isnull); + + /* calculate size so we can allocate the right size at once later */ + if (!VARATT_IS_EXTENDED(chunk)) + chunksize = VARSIZE(chunk) - VARHDRSZ; + else if (VARATT_IS_SHORT(chunk)) + /* could happen due to heap_form_tuple doing its thing */ + chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; + else + elog(ERROR, "unexpected type of toast chunk"); + + ent->size += chunksize; + ent->last_chunk_seq = chunk_seq; + ent->num_chunks++; + dlist_push_tail(&ent->chunks, &change->node); +} + +/* + * Rejigger change->newtuple to point to in-memory toast tuples instead to + * on-disk toast tuples that may not longer exist (think DROP TABLE or VACUUM). + * + * We cannot replace unchanged toast tuples though, so those will still point + * to on-disk toast data. + */ +static void +ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change) +{ + TupleDesc desc; + int natt; + Datum *attrs; + bool *isnull; + bool *free; + HeapTuple newtup; + Relation toast_rel; + TupleDesc toast_desc; + MemoryContext oldcontext; + + /* no toast tuples changed */ + if (txn->toast_hash == NULL) + return; + + oldcontext = MemoryContextSwitchTo(rb->context); + + /* we should only have toast tuples in an INSERT or UPDATE */ + Assert(change->tp.newtuple); + + desc = RelationGetDescr(relation); + + toast_rel = RelationIdGetRelation(relation->rd_rel->reltoastrelid); + toast_desc = RelationGetDescr(toast_rel); + + /* should we allocate from stack instead? */ + attrs = palloc0(sizeof(Datum) * desc->natts); + isnull = palloc0(sizeof(bool) * desc->natts); + free = palloc0(sizeof(bool) * desc->natts); + + heap_deform_tuple(&change->tp.newtuple->tuple, desc, + attrs, isnull); + + for (natt = 0; natt < desc->natts; natt++) + { + Form_pg_attribute attr = desc->attrs[natt]; + ReorderBufferToastEnt *ent; + struct varlena *varlena; + + /* va_rawsize is the size of the original datum -- including header */ + struct varatt_external toast_pointer; + struct varatt_indirect redirect_pointer; + struct varlena *new_datum = NULL; + struct varlena *reconstructed; + dlist_iter it; + Size data_done = 0; + + /* system columns aren't toasted */ + if (attr->attnum < 0) + continue; + + if (attr->attisdropped) + continue; + + /* not a varlena datatype */ + if (attr->attlen != -1) + continue; + + /* no data */ + if (isnull[natt]) + continue; + + /* ok, we know we have a toast datum */ + varlena = (struct varlena *) DatumGetPointer(attrs[natt]); + + /* no need to do anything if the tuple isn't external */ + if (!VARATT_IS_EXTERNAL(varlena)) + continue; + + VARATT_EXTERNAL_GET_POINTER(toast_pointer, varlena); + + /* + * Check whether the toast tuple changed, replace if so. + */ + ent = (ReorderBufferToastEnt *) + hash_search(txn->toast_hash, + (void *) &toast_pointer.va_valueid, + HASH_FIND, + NULL); + if (ent == NULL) + continue; + + new_datum = + (struct varlena *) palloc0(INDIRECT_POINTER_SIZE); + + free[natt] = true; + + reconstructed = palloc0(toast_pointer.va_rawsize); + + ent->reconstructed = reconstructed; + + /* stitch toast tuple back together from its parts */ + dlist_foreach(it, &ent->chunks) + { + bool isnull; + ReorderBufferTupleBuf *tup = + dlist_container(ReorderBufferChange, node, it.cur)->tp.newtuple; + Pointer chunk = + DatumGetPointer(fastgetattr(&tup->tuple, 3, toast_desc, &isnull)); + + Assert(!isnull); + Assert(!VARATT_IS_EXTERNAL(chunk)); + Assert(!VARATT_IS_SHORT(chunk)); + + memcpy(VARDATA(reconstructed) + data_done, + VARDATA(chunk), + VARSIZE(chunk) - VARHDRSZ); + data_done += VARSIZE(chunk) - VARHDRSZ; + } + Assert(data_done == toast_pointer.va_extsize); + + /* make sure its marked as compressed or not */ + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + SET_VARSIZE_COMPRESSED(reconstructed, data_done + VARHDRSZ); + else + SET_VARSIZE(reconstructed, data_done + VARHDRSZ); + + memset(&redirect_pointer, 0, sizeof(redirect_pointer)); + redirect_pointer.pointer = reconstructed; + + SET_VARTAG_EXTERNAL(new_datum, VARTAG_INDIRECT); + memcpy(VARDATA_EXTERNAL(new_datum), &redirect_pointer, + sizeof(redirect_pointer)); + + attrs[natt] = PointerGetDatum(new_datum); + } + + /* + * Build tuple in separate memory & copy tuple back into the tuplebuf + * passed to the output plugin. We can't directly heap_fill_tuple() into + * the tuplebuf because attrs[] will point back into the current content. + */ + newtup = heap_form_tuple(desc, attrs, isnull); + Assert(change->tp.newtuple->tuple.t_len <= MaxHeapTupleSize); + Assert(&change->tp.newtuple->header == change->tp.newtuple->tuple.t_data); + + memcpy(change->tp.newtuple->tuple.t_data, + newtup->t_data, + newtup->t_len); + change->tp.newtuple->tuple.t_len = newtup->t_len; + + /* + * free resources we won't further need, more persistent stuff will be + * free'd in ReorderBufferToastReset(). + */ + RelationClose(toast_rel); + pfree(newtup); + for (natt = 0; natt < desc->natts; natt++) + { + if (free[natt]) + pfree(DatumGetPointer(attrs[natt])); + } + pfree(attrs); + pfree(free); + pfree(isnull); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Free all resources allocated for toast reconstruction. + */ +static void +ReorderBufferToastReset(ReorderBuffer *rb, ReorderBufferTXN *txn) +{ + HASH_SEQ_STATUS hstat; + ReorderBufferToastEnt *ent; + + if (txn->toast_hash == NULL) + return; + + /* sequentially walk over the hash and free everything */ + hash_seq_init(&hstat, txn->toast_hash); + while ((ent = (ReorderBufferToastEnt *) hash_seq_search(&hstat)) != NULL) + { + dlist_mutable_iter it; + + if (ent->reconstructed != NULL) + pfree(ent->reconstructed); + + dlist_foreach_modify(it, &ent->chunks) + { + ReorderBufferChange *change = + dlist_container(ReorderBufferChange, node, it.cur); + + dlist_delete(&change->node); + ReorderBufferReturnChange(rb, change); + } + } + + hash_destroy(txn->toast_hash); + txn->toast_hash = NULL; +} + + +/* --------------------------------------- + * Visibility support for logical decoding + * + * + * Lookup actual cmin/cmax values when using decoding snapshot. We can't + * always rely on stored cmin/cmax values because of two scenarios: + * + * * A tuple got changed multiple times during a single transaction and thus + * has got a combocid. Combocid's are only valid for the duration of a + * single transaction. + * * A tuple with a cmin but no cmax (and thus no combocid) got + * deleted/updated in another transaction than the one which created it + * which we are looking at right now. As only one of cmin, cmax or combocid + * is actually stored in the heap we don't have access to the the value we + * need anymore. + * + * To resolve those problems we have a per-transaction hash of (cmin, + * cmax) tuples keyed by (relfilenode, ctid) which contains the actual + * (cmin, cmax) values. That also takes care of combocids by simply + * not caring about them at all. As we have the real cmin/cmax values + * combocids aren't interesting. + * + * As we only care about catalog tuples here the overhead of this + * hashtable should be acceptable. + * + * Heap rewrites complicate this a bit, check rewriteheap.c for + * details. + * ------------------------------------------------------------------------- + */ + +/* struct for qsort()ing mapping files by lsn somewhat efficiently */ +typedef struct RewriteMappingFile +{ + XLogRecPtr lsn; + char fname[MAXPGPATH]; +} RewriteMappingFile; + +#if NOT_USED +static void +DisplayMapping(HTAB *tuplecid_data) +{ + HASH_SEQ_STATUS hstat; + ReorderBufferTupleCidEnt *ent; + + hash_seq_init(&hstat, tuplecid_data); + while ((ent = (ReorderBufferTupleCidEnt *) hash_seq_search(&hstat)) != NULL) + { + elog(DEBUG3, "mapping: node: %u/%u/%u tid: %u/%u cmin: %u, cmax: %u", + ent->key.relnode.dbNode, + ent->key.relnode.spcNode, + ent->key.relnode.relNode, + BlockIdGetBlockNumber(&ent->key.tid.ip_blkid), + ent->key.tid.ip_posid, + ent->cmin, + ent->cmax + ); + } +} +#endif + +/* + * Apply a single mapping file to tuplecid_data. + * + * The mapping file has to have been verified to be a) committed b) for our + * transaction c) applied in LSN order. + */ +static void +ApplyLogicalMappingFile(HTAB *tuplecid_data, Oid relid, const char *fname) +{ + char path[MAXPGPATH]; + int fd; + int readBytes; + LogicalRewriteMappingData map; + + sprintf(path, "pg_llog/mappings/%s", fname); + fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0); + if (fd < 0) + ereport(ERROR, + (errmsg("could not open file \"%s\": %m", path))); + + while (true) + { + ReorderBufferTupleCidKey key; + ReorderBufferTupleCidEnt *ent; + ReorderBufferTupleCidEnt *new_ent; + bool found; + + /* be careful about padding */ + memset(&key, 0, sizeof(ReorderBufferTupleCidKey)); + + /* read all mappings till the end of the file */ + readBytes = read(fd, &map, sizeof(LogicalRewriteMappingData)); + + if (readBytes < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + path))); + else if (readBytes == 0) /* EOF */ + break; + else if (readBytes != sizeof(LogicalRewriteMappingData)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\", read %d instead of %d", + path, readBytes, + (int32) sizeof(LogicalRewriteMappingData)))); + + key.relnode = map.old_node; + ItemPointerCopy(&map.old_tid, + &key.tid); + + + ent = (ReorderBufferTupleCidEnt *) + hash_search(tuplecid_data, + (void *) &key, + HASH_FIND, + NULL); + + /* no existing mapping, no need to update */ + if (!ent) + continue; + + key.relnode = map.new_node; + ItemPointerCopy(&map.new_tid, + &key.tid); + + new_ent = (ReorderBufferTupleCidEnt *) + hash_search(tuplecid_data, + (void *) &key, + HASH_ENTER, + &found); + + if (found) + { + /* + * Make sure the existing mapping makes sense. We sometime update + * old records that did not yet have a cmax (e.g. pg_class' own + * entry while rewriting it) during rewrites, so allow that. + */ + Assert(ent->cmin == InvalidCommandId || ent->cmin == new_ent->cmin); + Assert(ent->cmax == InvalidCommandId || ent->cmax == new_ent->cmax); + } + else + { + /* update mapping */ + new_ent->cmin = ent->cmin; + new_ent->cmax = ent->cmax; + new_ent->combocid = ent->combocid; + } + } +} + + +/* + * Check whether the TransactionOId 'xid' is in the pre-sorted array 'xip'. + */ +static bool +TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num) +{ + return bsearch(&xid, xip, num, + sizeof(TransactionId), xidComparator) != NULL; +} + +/* + * qsort() comparator for sorting RewriteMappingFiles in LSN order. + */ +static int +file_sort_by_lsn(const void *a_p, const void *b_p) +{ + RewriteMappingFile *a = *(RewriteMappingFile **)a_p; + RewriteMappingFile *b = *(RewriteMappingFile **)b_p; + + if (a->lsn < b->lsn) + return -1; + else if (a->lsn > b->lsn) + return 1; + return 0; +} + +/* + * Apply any existing logical remapping files if there are any targeted at our + * transaction for relid. + */ +static void +UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot) +{ + DIR *mapping_dir; + struct dirent *mapping_de; + List *files = NIL; + ListCell *file; + RewriteMappingFile **files_a; + size_t off; + Oid dboid = IsSharedRelation(relid) ? InvalidOid : MyDatabaseId; + + mapping_dir = AllocateDir("pg_llog/mappings"); + while ((mapping_de = ReadDir(mapping_dir, "pg_llog/mappings")) != NULL) + { + Oid f_dboid; + Oid f_relid; + TransactionId f_mapped_xid; + TransactionId f_create_xid; + XLogRecPtr f_lsn; + uint32 f_hi, f_lo; + RewriteMappingFile *f; + + if (strcmp(mapping_de->d_name, ".") == 0 || + strcmp(mapping_de->d_name, "..") == 0) + continue; + + /* Ignore files that aren't ours*/ + if (strncmp(mapping_de->d_name, "map-", 4) != 0) + continue; + + if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, + &f_dboid, &f_relid, &f_hi, &f_lo, + &f_mapped_xid, &f_create_xid) != 6) + elog(ERROR, "could not parse fname %s", mapping_de->d_name); + + f_lsn = ((uint64) f_hi) << 32 | f_lo; + + /* mapping for another database */ + if (f_dboid != dboid) + continue; + + /* mapping for another relation */ + if (f_relid != relid) + continue; + + /* did the creating transaction abort? */ + if (!TransactionIdDidCommit(f_create_xid)) + continue; + + /* not for our transaction */ + if (!TransactionIdInArray(f_mapped_xid, snapshot->subxip, snapshot->subxcnt)) + continue; + + /* ok, relevant, queue for apply */ + f = palloc(sizeof(RewriteMappingFile)); + f->lsn = f_lsn; + strcpy(f->fname, mapping_de->d_name); + files = lappend(files, f); + } + FreeDir(mapping_dir); + + /* build array we can easily sort */ + files_a = palloc(list_length(files) * sizeof(RewriteMappingFile *)); + off = 0; + foreach(file, files) + { + files_a[off++] = lfirst(file); + } + + /* sort files so we apply them in LSN order */ + qsort(files_a, list_length(files), sizeof(RewriteMappingFile *), + file_sort_by_lsn); + + for(off = 0; off < list_length(files); off++) + { + RewriteMappingFile *f = files_a[off]; + elog(DEBUG1, "applying mapping: %s in %u", f->fname, + snapshot->subxip[0]); + ApplyLogicalMappingFile(tuplecid_data, relid, f->fname); + pfree(f); + } +} + +/* + * Lookup cmin/cmax of a tuple, during logical decoding where we can't rely on + * combocids. + */ +bool +ResolveCminCmaxDuringDecoding(HTAB *tuplecid_data, + Snapshot snapshot, + HeapTuple htup, Buffer buffer, + CommandId *cmin, CommandId *cmax) +{ + ReorderBufferTupleCidKey key; + ReorderBufferTupleCidEnt *ent; + ForkNumber forkno; + BlockNumber blockno; + bool updated_mapping = false; + + /* be careful about padding */ + memset(&key, 0, sizeof(key)); + + Assert(!BufferIsLocal(buffer)); + + /* + * get relfilenode from the buffer, no convenient way to access it other + * than that. + */ + BufferGetTag(buffer, &key.relnode, &forkno, &blockno); + + /* tuples can only be in the main fork */ + Assert(forkno == MAIN_FORKNUM); + Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self)); + + ItemPointerCopy(&htup->t_self, + &key.tid); + +restart: + ent = (ReorderBufferTupleCidEnt *) + hash_search(tuplecid_data, + (void *) &key, + HASH_FIND, + NULL); + + /* + * failed to find a mapping, check whether the table was rewritten and + * apply mapping if so, but only do that once - there can be no new + * mappings while we are in here since we have to hold a lock on the + * relation. + */ + if (ent == NULL && !updated_mapping) + { + UpdateLogicalMappings(tuplecid_data, htup->t_tableOid, snapshot); + /* now check but don't update for a mapping again */ + updated_mapping = true; + goto restart; + } + else if (ent == NULL) + return false; + + if (cmin) + *cmin = ent->cmin; + if (cmax) + *cmax = ent->cmax; + return true; +} diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c new file mode 100644 index 0000000000..28f9a8a1a6 --- /dev/null +++ b/src/backend/replication/logical/snapbuild.c @@ -0,0 +1,1885 @@ +/*------------------------------------------------------------------------- + * + * snapbuild.c + * + * Infrastructure for building historic catalog snapshots based on contents + * of the WAL, for the purpose of decoding heapam.c style values in the + * WAL. + * + * NOTES: + * + * We build snapshots which can *only* be used to read catalog contents and we + * do so by reading and interpreting the WAL stream. The aim is to build a + * snapshot that behaves the same as a freshly taken MVCC snapshot would have + * at the time the XLogRecord was generated. + * + * To build the snapshots we reuse the infrastructure built for Hot + * Standby. The in-memory snapshots we build look different than HS' because + * we have different needs. To successfully decode data from the WAL we only + * need to access catalog tables and (sys|rel|cat)cache, not the actual user + * tables since the data we decode is wholly contained in the WAL + * records. Also, our snapshots need to be different in comparison to normal + * MVCC ones because in contrast to those we cannot fully rely on the clog and + * pg_subtrans for information about committed transactions because they might + * commit in the future from the POV of the WAL entry we're currently + * decoding. This definition has the advantage that we only need to prevent + * removal of catalog rows, while normal table's rows can still be + * removed. This is achieved by using the replication slot mechanism. + * + * As the percentage of transactions modifying the catalog normally is fairly + * small in comparisons to ones only manipulating user data, we keep track of + * the committed catalog modifying ones inside (xmin, xmax) instead of keeping + * track of all running transactions like its done in a normal snapshot. Note + * that we're generally only looking at transactions that have acquired an + * xid. That is we keep a list of transactions between snapshot->(xmin, xmax) + * that we consider committed, everything else is considered aborted/in + * progress. That also allows us not to care about subtransactions before they + * have committed which means this modules, in contrast to HS, doesn't have to + * care about suboverflowed subtransactions and similar. + * + * One complexity of doing this is that to e.g. handle mixed DDL/DML + * transactions we need Snapshots that see intermediate versions of the + * catalog in a transaction. During normal operation this is achieved by using + * CommandIds/cmin/cmax. The problem with that however is that for space + * efficiency reasons only one value of that is stored + * (c.f. combocid.c). Since ComboCids are only available in memory we log + * additional information which allows us to get the original (cmin, cmax) + * pair during visibility checks. Check the reorderbuffer.c's comment above + * ResolveCminCmaxDuringDecoding() for details. + * + * To facilitate all this we need our own visibility routine, as the normal + * ones are optimized for different usecases. + * + * To replace the normal catalog snapshots with decoding ones use the + * SetupHistoricSnapshot() and TeardownHistoricSnapshot() functions. + * + * + * + * The snapbuild machinery is starting up in in several stages, as illustrated + * by the following graph: + * +-------------------------+ + * +----|SNAPBUILD_START |-------------+ + * | +-------------------------+ | + * | | | + * | | | + * | running_xacts with running xacts | + * | | | + * | | | + * | v | + * | +-------------------------+ v + * | |SNAPBUILD_FULL_SNAPSHOT |------------>| + * | +-------------------------+ | + * running_xacts | saved snapshot + * with zero xacts | at running_xacts's lsn + * | | | + * | all running toplevel TXNs finished | + * | | | + * | v | + * | +-------------------------+ | + * +--->|SNAPBUILD_CONSISTENT |<------------+ + * +-------------------------+ + * + * Initially the machinery is in the START stage. When a xl_running_xacts + * record is read that is sufficiently new (above the safe xmin horizon), + * there's a state transation. If there were no running xacts when the + * runnign_xacts record was generated, we'll directly go into CONSISTENT + * state, otherwise we'll switch to the FULL_SNAPSHOT state. Having a full + * snapshot means that all transactions that start henceforth can be decoded + * in their entirety, but transactions that started previously can't. In + * FULL_SNAPSHOT we'll switch into CONSISTENT once all those previously + * running transactions have committed or aborted. + * + * Only transactions that commit after CONSISTENT state has been reached will + * be replayed, even though they might have started while still in + * FULL_SNAPSHOT. That ensures that we'll reach a point where no previous + * changes has been exported, but all the following ones will be. That point + * is a convenient point to initialize replication from, which is why we + * export a snapshot at that point, which *can* be used to read normal data. + * + * Copyright (c) 2012-2014, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/replication/snapbuild.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include +#include +#include + +#include "miscadmin.h" + +#include "access/heapam_xlog.h" +#include "access/transam.h" +#include "access/xact.h" + +#include "replication/logical.h" +#include "replication/reorderbuffer.h" +#include "replication/snapbuild.h" + +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/snapshot.h" +#include "utils/snapmgr.h" +#include "utils/tqual.h" + +#include "storage/block.h" /* debugging output */ +#include "storage/fd.h" +#include "storage/lmgr.h" +#include "storage/proc.h" +#include "storage/procarray.h" +#include "storage/standby.h" + +/* + * This struct contains the current state of the snapshot building + * machinery. Besides a forward declaration in the header, it is not exposed + * to the public, so we can easily change it's contents. + */ +struct SnapBuild +{ + /* how far are we along building our first full snapshot */ + SnapBuildState state; + + /* private memory context used to allocate memory for this module. */ + MemoryContext context; + + /* all transactions < than this have committed/aborted */ + TransactionId xmin; + + /* all transactions >= than this are uncommitted */ + TransactionId xmax; + + /* + * Don't replay commits from an LSN <= this LSN. This can be set + * externally but it will also be advanced (never retreat) from within + * snapbuild.c. + */ + XLogRecPtr transactions_after; + + /* + * Don't start decoding WAL until the "xl_running_xacts" information + * indicates there are no running xids with a xid smaller than this. + */ + TransactionId initial_xmin_horizon; + + /* + * Snapshot that's valid to see the catalog state seen at this moment. + */ + Snapshot snapshot; + + /* + * LSN of the last location we are sure a snapshot has been serialized to. + */ + XLogRecPtr last_serialized_snapshot; + + /* + * The reorderbuffer we need to update with usable snapshots et al. + */ + ReorderBuffer *reorder; + + /* + * Information about initially running transactions + * + * When we start building a snapshot there already may be transactions in + * progress. Those are stored in running.xip. We don't have enough + * information about those to decode their contents, so until they are + * finished (xcnt=0) we cannot switch to a CONSISTENT state. + */ + struct + { + /* + * As long as running.xcnt all XIDs < running.xmin and > running.xmax + * have to be checked whether they still are running. + */ + TransactionId xmin; + TransactionId xmax; + + size_t xcnt; /* number of used xip entries */ + size_t xcnt_space; /* allocated size of xip */ + TransactionId *xip; /* running xacts array, xidComparator-sorted */ + } running; + + /* + * Array of transactions which could have catalog changes that committed + * between xmin and xmax. + */ + struct + { + /* number of committed transactions */ + size_t xcnt; + + /* available space for committed transactions */ + size_t xcnt_space; + + /* + * Until we reach a CONSISTENT state, we record commits of all + * transactions, not just the catalog changing ones. Record when that + * changes so we know we cannot export a snapshot safely anymore. + */ + bool includes_all_transactions; + + /* + * Array of committed transactions that have modified the catalog. + * + * As this array is frequently modified we do *not* keep it in + * xidComparator order. Instead we sort the array when building & + * distributing a snapshot. + * + * TODO: It's unclear whether that reasoning has much merit. Every + * time we add something here after becoming consistent will also + * require distributing a snapshot. Storing them sorted would + * potentially also make it easier to purge (but more complicated wrt + * wraparound?). Should be improved if sorting while building the + * snapshot shows up in profiles. + */ + TransactionId *xip; + } committed; +}; + +/* + * Starting a transaction -- which we need to do while exporting a snapshot -- + * removes knowledge about the previously used resowner, so we save it here. + */ +ResourceOwner SavedResourceOwnerDuringExport = NULL; +bool ExportInProgress = false; + +/* transaction state manipulation functions */ +static void SnapBuildEndTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid); + +/* ->running manipulation */ +static bool SnapBuildTxnIsRunning(SnapBuild *builder, TransactionId xid); + +/* ->committed manipulation */ +static void SnapBuildPurgeCommittedTxn(SnapBuild *builder); + +/* snapshot building/manipulation/distribution functions */ +static Snapshot SnapBuildBuildSnapshot(SnapBuild *builder, TransactionId xid); + +static void SnapBuildFreeSnapshot(Snapshot snap); + +static void SnapBuildSnapIncRefcount(Snapshot snap); + +static void SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn); + +/* xlog reading helper functions for SnapBuildProcessRecord */ +static bool SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running); + +/* serialization functions */ +static void SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn); +static bool SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn); + + +/* + * Allocate a new snapshot builder. + * + * xmin_horizon is the xid >=which we can be sure no catalog rows have been + * removed, start_lsn is the LSN >= we want to replay commits. + */ +SnapBuild * +AllocateSnapshotBuilder(ReorderBuffer *reorder, + TransactionId xmin_horizon, + XLogRecPtr start_lsn) +{ + MemoryContext context; + MemoryContext oldcontext; + SnapBuild *builder; + + /* allocate memory in own context, to have better accountability */ + context = AllocSetContextCreate(CurrentMemoryContext, + "snapshot builder context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldcontext = MemoryContextSwitchTo(context); + + builder = palloc0(sizeof(SnapBuild)); + + builder->state = SNAPBUILD_START; + builder->context = context; + builder->reorder = reorder; + /* Other struct members initialized by zeroing via palloc0 above */ + + builder->committed.xcnt = 0; + builder->committed.xcnt_space = 128; /* arbitrary number */ + builder->committed.xip = + palloc0(builder->committed.xcnt_space * sizeof(TransactionId)); + builder->committed.includes_all_transactions = true; + builder->committed.xip = + palloc0(builder->committed.xcnt_space * sizeof(TransactionId)); + builder->initial_xmin_horizon = xmin_horizon; + builder->transactions_after = start_lsn; + + MemoryContextSwitchTo(oldcontext); + + return builder; +} + +/* + * Free a snapshot builder. + */ +void +FreeSnapshotBuilder(SnapBuild *builder) +{ + MemoryContext context = builder->context; + + /* free snapshot explicitly, that contains some error checking */ + if (builder->snapshot != NULL) + { + SnapBuildSnapDecRefcount(builder->snapshot); + builder->snapshot = NULL; + } + + /* other resources are deallocated via memory context reset */ + MemoryContextDelete(context); +} + +/* + * Free an unreferenced snapshot that has previously been built by us. + */ +static void +SnapBuildFreeSnapshot(Snapshot snap) +{ + /* make sure we don't get passed an external snapshot */ + Assert(snap->satisfies == HeapTupleSatisfiesHistoricMVCC); + + /* make sure nobody modified our snapshot */ + Assert(snap->curcid == FirstCommandId); + Assert(!snap->suboverflowed); + Assert(!snap->takenDuringRecovery); + Assert(snap->regd_count == 1); + + /* slightly more likely, so it's checked even without c-asserts */ + if (snap->copied) + elog(ERROR, "cannot free a copied snapshot"); + + if (snap->active_count) + elog(ERROR, "cannot free an active snapshot"); + + pfree(snap); +} + +/* + * In which state of snapshot building are we? + */ +SnapBuildState +SnapBuildCurrentState(SnapBuild *builder) +{ + return builder->state; +} + +/* + * Should the contents of transaction ending at 'ptr' be decoded? + */ +bool +SnapBuildXactNeedsSkip(SnapBuild *builder, XLogRecPtr ptr) +{ + return ptr <= builder->transactions_after; +} + +/* + * Increase refcount of a snapshot. + * + * This is used when handing out a snapshot to some external resource or when + * adding a Snapshot as builder->snapshot. + */ +static void +SnapBuildSnapIncRefcount(Snapshot snap) +{ + snap->active_count++; +} + +/* + * Decrease refcount of a snapshot and free if the refcount reaches zero. + * + * Externally visible, so that external resources that have been handed an + * IncRef'ed Snapshot can adjust its refcount easily. + */ +void +SnapBuildSnapDecRefcount(Snapshot snap) +{ + /* make sure we don't get passed an external snapshot */ + Assert(snap->satisfies == HeapTupleSatisfiesHistoricMVCC); + + /* make sure nobody modified our snapshot */ + Assert(snap->curcid == FirstCommandId); + Assert(!snap->suboverflowed); + Assert(!snap->takenDuringRecovery); + + Assert(snap->regd_count == 1); + + Assert(snap->active_count); + + /* slightly more likely, so its checked even without casserts */ + if (snap->copied) + elog(ERROR, "cannot free a copied snapshot"); + + snap->active_count--; + if (!snap->active_count) + SnapBuildFreeSnapshot(snap); +} + +/* + * Build a new snapshot, based on currently committed catalog-modifying + * transactions. + * + * In-progress transactions with catalog access are *not* allowed to modify + * these snapshots; they have to copy them and fill in appropriate ->curcid + * and ->subxip/subxcnt values. + */ +static Snapshot +SnapBuildBuildSnapshot(SnapBuild *builder, TransactionId xid) +{ + Snapshot snapshot; + Size ssize; + + Assert(builder->state >= SNAPBUILD_FULL_SNAPSHOT); + + ssize = sizeof(SnapshotData) + + sizeof(TransactionId) * builder->committed.xcnt + + sizeof(TransactionId) * 1 /* toplevel xid */ ; + + snapshot = MemoryContextAllocZero(builder->context, ssize); + + snapshot->satisfies = HeapTupleSatisfiesHistoricMVCC; + + /* + * We misuse the original meaning of SnapshotData's xip and subxip fields + * to make the more fitting for our needs. + * + * In the 'xip' array we store transactions that have to be treated as + * committed. Since we will only ever look at tuples from transactions + * that have modified the catalog its more efficient to store those few + * that exist between xmin and xmax (frequently there are none). + * + * Snapshots that are used in transactions that have modified the catalog + * also use the 'subxip' array to store their toplevel xid and all the + * subtransaction xids so we can recognize when we need to treat rows as + * visible that are not in xip but still need to be visible. Subxip only + * gets filled when the transaction is copied into the context of a + * catalog modifying transaction since we otherwise share a snapshot + * between transactions. As long as a txn hasn't modified the catalog it + * doesn't need to treat any uncommitted rows as visible, so there is no + * need for those xids. + * + * Both arrays are qsort'ed so that we can use bsearch() on them. + */ + Assert(TransactionIdIsNormal(builder->xmin)); + Assert(TransactionIdIsNormal(builder->xmax)); + + snapshot->xmin = builder->xmin; + snapshot->xmax = builder->xmax; + + /* store all transactions to be treated as committed by this snapshot */ + snapshot->xip = + (TransactionId *) ((char *) snapshot + sizeof(SnapshotData)); + snapshot->xcnt = builder->committed.xcnt; + memcpy(snapshot->xip, + builder->committed.xip, + builder->committed.xcnt * sizeof(TransactionId)); + + /* sort so we can bsearch() */ + qsort(snapshot->xip, snapshot->xcnt, sizeof(TransactionId), xidComparator); + + /* + * Initially, subxip is empty, i.e. it's a snapshot to be used by + * transactions that don't modify the catalog. Will be filled by + * ReorderBufferCopySnap() if necessary. + */ + snapshot->subxcnt = 0; + snapshot->subxip = NULL; + + snapshot->suboverflowed = false; + snapshot->takenDuringRecovery = false; + snapshot->copied = false; + snapshot->curcid = FirstCommandId; + snapshot->active_count = 0; + snapshot->regd_count = 1; /* mark as registered so nobody frees it */ + + return snapshot; +} + +/* + * Export a snapshot so it can be set in another session with SET TRANSACTION + * SNAPSHOT. + * + * For that we need to start a transaction in the current backend as the + * importing side checks whether the source transaction is still open to make + * sure the xmin horizon hasn't advanced since then. + * + * After that we convert a locally built snapshot into the normal variant + * understood by HeapTupleSatisfiesMVCC et al. + */ +const char * +SnapBuildExportSnapshot(SnapBuild *builder) +{ + Snapshot snap; + char *snapname; + TransactionId xid; + TransactionId *newxip; + int newxcnt = 0; + + if (builder->state != SNAPBUILD_CONSISTENT) + elog(ERROR, "cannot export a snapshot before reaching a consistent state"); + + if (!builder->committed.includes_all_transactions) + elog(ERROR, "cannot export a snapshot, not all transactions are monitored anymore"); + + /* so we don't overwrite the existing value */ + if (TransactionIdIsValid(MyPgXact->xmin)) + elog(ERROR, "cannot export a snapshot when MyPgXact->xmin already is valid"); + + if (IsTransactionOrTransactionBlock()) + elog(ERROR, "cannot export a snapshot from within a transaction"); + + if (SavedResourceOwnerDuringExport) + elog(ERROR, "can only export one snapshot at a time"); + + SavedResourceOwnerDuringExport = CurrentResourceOwner; + ExportInProgress = true; + + StartTransactionCommand(); + + Assert(!FirstSnapshotSet); + + /* There doesn't seem to a nice API to set these */ + XactIsoLevel = XACT_REPEATABLE_READ; + XactReadOnly = true; + + snap = SnapBuildBuildSnapshot(builder, GetTopTransactionId()); + + /* + * We know that snap->xmin is alive, enforced by the logical xmin + * mechanism. Due to that we can do this without locks, we're only + * changing our own value. + */ + MyPgXact->xmin = snap->xmin; + + /* allocate in transaction context */ + newxip = (TransactionId *) + palloc(sizeof(TransactionId) * GetMaxSnapshotXidCount()); + + /* + * snapbuild.c builds transactions in an "inverted" manner, which means it + * stores committed transactions in ->xip, not ones in progress. Build a + * classical snapshot by marking all non-committed transactions as + * in-progress. This can be expensive. + */ + for (xid = snap->xmin; NormalTransactionIdPrecedes(xid, snap->xmax);) + { + void *test; + + /* + * Check whether transaction committed using the decoding snapshot + * meaning of ->xip. + */ + test = bsearch(&xid, snap->xip, snap->xcnt, + sizeof(TransactionId), xidComparator); + + if (test == NULL) + { + if (newxcnt >= GetMaxSnapshotXidCount()) + elog(ERROR, "snapshot too large"); + + newxip[newxcnt++] = xid; + } + + TransactionIdAdvance(xid); + } + + snap->xcnt = newxcnt; + snap->xip = newxip; + + /* + * now that we've built a plain snapshot, use the normal mechanisms for + * exporting it + */ + snapname = ExportSnapshot(snap); + + ereport(LOG, + (errmsg("exported logical decoding snapshot: \"%s\" with %u xids", + snapname, snap->xcnt))); + return snapname; +} + +/* + * Reset a previously SnapBuildExportSnapshot()'ed snapshot if there is + * any. Aborts the previously started transaction and resets the resource + * owner back to it's original value. + */ +void +SnapBuildClearExportedSnapshot() +{ + /* nothing exported, thats the usual case */ + if (!ExportInProgress) + return; + + if (!IsTransactionState()) + elog(ERROR, "clearing exported snapshot in wrong transaction state"); + + /* make sure nothing could have ever happened */ + AbortCurrentTransaction(); + + CurrentResourceOwner = SavedResourceOwnerDuringExport; + SavedResourceOwnerDuringExport = NULL; + ExportInProgress = false; +} + +/* + * Handle the effects of a single heap change, appropriate to the current state + * of the snapshot builder and returns whether changes made at (xid, lsn) can + * be decoded. + */ +bool +SnapBuildProcessChange(SnapBuild *builder, TransactionId xid, XLogRecPtr lsn) +{ + bool is_old_tx; + + /* + * We can't handle data in transactions if we haven't built a snapshot + * yet, so don't store them. + */ + if (builder->state < SNAPBUILD_FULL_SNAPSHOT) + return false; + + /* + * No point in keeping track of changes in transactions that we don't have + * enough information about to decode. This means that they started before + * we got into the SNAPBUILD_FULL_SNAPSHOT state. + */ + if (builder->state < SNAPBUILD_CONSISTENT && + SnapBuildTxnIsRunning(builder, xid)) + return false; + + /* + * If the reorderbuffer doesn't yet have a snapshot, add one now, it will + * be needed to decode the change we're currently processing. + */ + is_old_tx = ReorderBufferIsXidKnown(builder->reorder, xid); + + if (!is_old_tx || !ReorderBufferXidHasBaseSnapshot(builder->reorder, xid)) + { + /* only build a new snapshot if we don't have a prebuilt one */ + if (builder->snapshot == NULL) + { + builder->snapshot = SnapBuildBuildSnapshot(builder, xid); + /* inrease refcount for the snapshot builder */ + SnapBuildSnapIncRefcount(builder->snapshot); + } + + /* + * Increase refcount for the transaction we're handing the snapshot + * out to. + */ + SnapBuildSnapIncRefcount(builder->snapshot); + ReorderBufferSetBaseSnapshot(builder->reorder, xid, lsn, + builder->snapshot); + } + + return true; +} + +/* + * Do CommandId/ComboCid handling after reading a xl_heap_new_cid record. This + * implies that a transaction has done some form of write to system catalogs. + */ +void +SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid, + XLogRecPtr lsn, xl_heap_new_cid *xlrec) +{ + CommandId cid; + + /* + * we only log new_cid's if a catalog tuple was modified, so mark + * the transaction as containing catalog modifications + */ + ReorderBufferXidSetCatalogChanges(builder->reorder, xid,lsn); + + ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn, + xlrec->target.node, xlrec->target.tid, + xlrec->cmin, xlrec->cmax, + xlrec->combocid); + + /* figure out new command id */ + if (xlrec->cmin != InvalidCommandId && + xlrec->cmax != InvalidCommandId) + cid = Max(xlrec->cmin, xlrec->cmax); + else if (xlrec->cmax != InvalidCommandId) + cid = xlrec->cmax; + else if (xlrec->cmin != InvalidCommandId) + cid = xlrec->cmin; + else + { + cid = InvalidCommandId; /* silence compiler */ + elog(ERROR, "xl_heap_new_cid record without a valid CommandId"); + } + + ReorderBufferAddNewCommandId(builder->reorder, xid, lsn, cid + 1); +} + +/* + * Check whether `xid` is currently 'running'. + * + * Running transactions in our parlance are transactions which we didn't + * observe from the start so we can't properly decode their contents. They + * only exist after we freshly started from an < CONSISTENT snapshot. + */ +static bool +SnapBuildTxnIsRunning(SnapBuild *builder, TransactionId xid) +{ + Assert(builder->state < SNAPBUILD_CONSISTENT); + Assert(TransactionIdIsNormal(builder->running.xmin)); + Assert(TransactionIdIsNormal(builder->running.xmax)); + + if (builder->running.xcnt && + NormalTransactionIdFollows(xid, builder->running.xmin) && + NormalTransactionIdPrecedes(xid, builder->running.xmax)) + { + TransactionId *search = + bsearch(&xid, builder->running.xip, builder->running.xcnt_space, + sizeof(TransactionId), xidComparator); + + if (search != NULL) + { + Assert(*search == xid); + return true; + } + } + + return false; +} + +/* + * Add a new Snapshot to all transactions we're decoding that currently are + * in-progress so they can see new catalog contents made by the transaction + * that just committed. This is necessary because those in-progress + * transactions will use the new catalog's contents from here on (at the very + * least everything they do needs to be compatible with newer catalog + * contents). + */ +static void +SnapBuildDistributeNewCatalogSnapshot(SnapBuild *builder, XLogRecPtr lsn) +{ + dlist_iter txn_i; + ReorderBufferTXN *txn; + + /* + * Iterate through all toplevel transactions. This can include + * subtransactions which we just don't yet know to be that, but that's + * fine, they will just get an unneccesary snapshot queued. + */ + dlist_foreach(txn_i, &builder->reorder->toplevel_by_lsn) + { + txn = dlist_container(ReorderBufferTXN, node, txn_i.cur); + + Assert(TransactionIdIsValid(txn->xid)); + + /* + * If we don't have a base snapshot yet, there are no changes in this + * transaction which in turn implies we don't yet need a snapshot at + * all. We'll add add a snapshot when the first change gets queued. + * + * NB: This works correctly even for subtransactions because + * ReorderBufferCommitChild() takes care to pass the parent the base + * snapshot, and while iterating the changequeue we'll get the change + * from the subtxn. + */ + if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, txn->xid)) + continue; + + elog(DEBUG2, "adding a new snapshot to %u at %X/%X", + txn->xid, (uint32) (lsn >> 32), (uint32) lsn); + + /* + * increase the snapshot's refcount for the transaction we are handing + * it out to + */ + SnapBuildSnapIncRefcount(builder->snapshot); + ReorderBufferAddSnapshot(builder->reorder, txn->xid, lsn, + builder->snapshot); + } +} + +/* + * Keep track of a new catalog changing transaction that has committed. + */ +static void +SnapBuildAddCommittedTxn(SnapBuild *builder, TransactionId xid) +{ + Assert(TransactionIdIsValid(xid)); + + if (builder->committed.xcnt == builder->committed.xcnt_space) + { + builder->committed.xcnt_space = builder->committed.xcnt_space * 2 + 1; + + elog(DEBUG1, "increasing space for committed transactions to %u", + (uint32) builder->committed.xcnt_space); + + builder->committed.xip = repalloc(builder->committed.xip, + builder->committed.xcnt_space * sizeof(TransactionId)); + } + + /* + * TODO: It might make sense to keep the array sorted here instead of + * doing it every time we build a new snapshot. On the other hand this + * gets called repeatedly when a transaction with subtransactions commits. + */ + builder->committed.xip[builder->committed.xcnt++] = xid; +} + +/* + * Remove knowledge about transactions we treat as committed that are smaller + * than ->xmin. Those won't ever get checked via the ->commited array but via + * the clog machinery, so we don't need to waste memory on them. + */ +static void +SnapBuildPurgeCommittedTxn(SnapBuild *builder) +{ + int off; + TransactionId *workspace; + int surviving_xids = 0; + + /* not ready yet */ + if (!TransactionIdIsNormal(builder->xmin)) + return; + + /* TODO: Neater algorithm than just copying and iterating? */ + workspace = + MemoryContextAlloc(builder->context, + builder->committed.xcnt * sizeof(TransactionId)); + + /* copy xids that still are interesting to workspace */ + for (off = 0; off < builder->committed.xcnt; off++) + { + if (NormalTransactionIdPrecedes(builder->committed.xip[off], + builder->xmin)) + ; /* remove */ + else + workspace[surviving_xids++] = builder->committed.xip[off]; + } + + /* copy workspace back to persistent state */ + memcpy(builder->committed.xip, workspace, + surviving_xids * sizeof(TransactionId)); + + elog(DEBUG3, "purged committed transactions from %u to %u, xmin: %u, xmax: %u", + (uint32) builder->committed.xcnt, (uint32) surviving_xids, + builder->xmin, builder->xmax); + builder->committed.xcnt = surviving_xids; + + pfree(workspace); +} + +/* + * Common logic for SnapBuildAbortTxn and SnapBuildCommitTxn dealing with + * keeping track of the amount of running transactions. + */ +static void +SnapBuildEndTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid) +{ + if (builder->state == SNAPBUILD_CONSISTENT) + return; + + /* + * NB: This handles subtransactions correctly even if we started from + * suboverflowed xl_running_xacts because we only keep track of toplevel + * transactions. Since the latter are always are allocated before their + * subxids and since they end at the same time it's sufficient to deal + * with them here. + */ + if (SnapBuildTxnIsRunning(builder, xid)) + { + Assert(builder->running.xcnt > 0); + + if (!--builder->running.xcnt) + { + /* + * None of the originally running transaction is running anymore, + * so our incrementaly built snapshot now is consistent. + */ + ereport(LOG, + (errmsg("logical decoding found consistent point at %X/%X", + (uint32)(lsn >> 32), (uint32)lsn), + errdetail("xid %u finished, no running transactions anymore", + xid))); + builder->state = SNAPBUILD_CONSISTENT; + } + } +} + +/* + * Abort a transaction, throw away all state we kept. + */ +void +SnapBuildAbortTxn(SnapBuild *builder, XLogRecPtr lsn, + TransactionId xid, + int nsubxacts, TransactionId *subxacts) +{ + int i; + + for (i = 0; i < nsubxacts; i++) + { + TransactionId subxid = subxacts[i]; + + SnapBuildEndTxn(builder, lsn, subxid); + } + + SnapBuildEndTxn(builder, lsn, xid); +} + +/* + * Handle everything that needs to be done when a transaction commits + */ +void +SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, TransactionId xid, + int nsubxacts, TransactionId *subxacts) +{ + int nxact; + + bool forced_timetravel = false; + bool sub_needs_timetravel = false; + bool top_needs_timetravel = false; + + TransactionId xmax = xid; + + /* + * If we couldn't observe every change of a transaction because it was + * already running at the point we started to observe we have to assume it + * made catalog changes. + * + * This has the positive benefit that we afterwards have enough + * information to build an exportable snapshot that's usable by pg_dump et + * al. + */ + if (builder->state < SNAPBUILD_CONSISTENT) + { + /* ensure that only commits after this are getting replayed */ + if (builder->transactions_after < lsn) + builder->transactions_after = lsn; + + /* + * We could avoid treating !SnapBuildTxnIsRunning transactions as + * timetravel ones, but we want to be able to export a snapshot when + * we reached consistency. + */ + forced_timetravel = true; + elog(DEBUG1, "forced to assume catalog changes for xid %u because it was running to early", xid); + } + + for (nxact = 0; nxact < nsubxacts; nxact++) + { + TransactionId subxid = subxacts[nxact]; + + /* + * make sure txn is not tracked in running txn's anymore, switch state + */ + SnapBuildEndTxn(builder, lsn, subxid); + + /* + * If we're forcing timetravel we also need visibility information + * about subtransaction, so keep track of subtransaction's state. + */ + if (forced_timetravel) + { + SnapBuildAddCommittedTxn(builder, subxid); + if (NormalTransactionIdFollows(subxid, xmax)) + xmax = subxid; + } + + /* + * Add subtransaction to base snapshot if it DDL, we don't distinguish + * to toplevel transactions there. + */ + else if (ReorderBufferXidHasCatalogChanges(builder->reorder, subxid)) + { + sub_needs_timetravel = true; + + elog(DEBUG1, "found subtransaction %u:%u with catalog changes.", + xid, subxid); + + SnapBuildAddCommittedTxn(builder, subxid); + + if (NormalTransactionIdFollows(subxid, xmax)) + xmax = subxid; + } + } + + /* + * Make sure toplevel txn is not tracked in running txn's anymore, switch + * state to consistent if possible. + */ + SnapBuildEndTxn(builder, lsn, xid); + + if (forced_timetravel) + { + elog(DEBUG2, "forced transaction %u to do timetravel.", xid); + + SnapBuildAddCommittedTxn(builder, xid); + } + /* add toplevel transaction to base snapshot */ + else if (ReorderBufferXidHasCatalogChanges(builder->reorder, xid)) + { + elog(DEBUG2, "found top level transaction %u, with catalog changes!", + xid); + + top_needs_timetravel = true; + SnapBuildAddCommittedTxn(builder, xid); + } + else if (sub_needs_timetravel) + { + /* mark toplevel txn as timetravel as well */ + SnapBuildAddCommittedTxn(builder, xid); + } + + /* if there's any reason to build a historic snapshot, to so now */ + if (forced_timetravel || top_needs_timetravel || sub_needs_timetravel) + { + /* + * Adjust xmax of the snapshot builder, we only do that for committed, + * catalog modifying, transactions, everything else isn't interesting + * for us since we'll never look at the respective rows. + */ + if (!TransactionIdIsValid(builder->xmax) || + TransactionIdFollowsOrEquals(xmax, builder->xmax)) + { + builder->xmax = xmax; + TransactionIdAdvance(builder->xmax); + } + + /* + * If we haven't built a complete snapshot yet there's no need to hand + * it out, it wouldn't (and couldn't) be used anyway. + */ + if (builder->state < SNAPBUILD_FULL_SNAPSHOT) + return; + + /* + * Decrease the snapshot builder's refcount of the old snapshot, note + * that it still will be used if it has been handed out to the + * reorderbuffer earlier. + */ + if (builder->snapshot) + SnapBuildSnapDecRefcount(builder->snapshot); + + builder->snapshot = SnapBuildBuildSnapshot(builder, xid); + + /* we might need to execute invalidations, add snapshot */ + if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, xid)) + { + SnapBuildSnapIncRefcount(builder->snapshot); + ReorderBufferSetBaseSnapshot(builder->reorder, xid, lsn, + builder->snapshot); + } + + /* refcount of the snapshot builder for the new snapshot */ + SnapBuildSnapIncRefcount(builder->snapshot); + + /* add a new SnapshotNow to all currently running transactions */ + SnapBuildDistributeNewCatalogSnapshot(builder, lsn); + } + else + { + /* record that we cannot export a general snapshot anymore */ + builder->committed.includes_all_transactions = false; + } +} + + +/* ----------------------------------- + * Snapshot building functions dealing with xlog records + * ----------------------------------- + */ + +/* + * Process a running xacts record, and use it's information to first build a + * historic snapshot and later to release resources that aren't needed + * anymore. + */ +void +SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running) +{ + ReorderBufferTXN *txn; + + /* + * If we're not consistent yet, inspect the record to see whether it + * allows to get closer to being consistent. If we are consistent, dump + * our snapshot so others or we, after a restart, can use it. + */ + if (builder->state < SNAPBUILD_CONSISTENT) + { + /* returns false if there's no point in performing cleanup just yet */ + if (!SnapBuildFindSnapshot(builder, lsn, running)) + return; + } + else + SnapBuildSerialize(builder, lsn); + + /* + * Update range of interesting xids base don the running xacts + * information. We don't increase ->xmax using it, because once we are in + * a consistent state we can do that ourselves and much more efficiently + * so, because we only need to do it for catalog transactions since we + * only ever look at those. + * + * NB: Because of that xmax can be lower than xmin, because we only + * increase xmax when a catalog modifying transaction commits. While odd + * looking, its correct and actually more efficient this way since we hit + * fast paths in tqual.c. + */ + builder->xmin = running->oldestRunningXid; + + /* Remove transactions we don't need to keep track off anymore */ + SnapBuildPurgeCommittedTxn(builder); + + elog(DEBUG3, "xmin: %u, xmax: %u, oldestrunning: %u", + builder->xmin, builder->xmax, + running->oldestRunningXid); + + /* + * Inrease shared memory limits, so vacuum can work on tuples we prevented + * from being pruned till now. + */ + LogicalIncreaseXminForSlot(lsn, running->oldestRunningXid); + + /* + * Also tell the slot where we can restart decoding from. We don't want to + * do that after every commit because changing that implies an fsync of + * the logical slot's state file, so we only do it every time we see a + * running xacts record. + * + * Do so by looking for the oldest in progress transaction (determined by + * the first LSN of any of its relevant records). Every transaction + * remembers the last location we stored the snapshot to disk before its + * beginning. That point is where we can restart from. + */ + + /* + * Can't know about a serialized snapshot's location if we're not + * consistent. + */ + if (builder->state < SNAPBUILD_CONSISTENT) + return; + + txn = ReorderBufferGetOldestTXN(builder->reorder); + + /* + * oldest ongoing txn might have started when we didn't yet serialize + * anything because we hadn't reached a consistent state yet. + */ + if (txn != NULL && txn->restart_decoding_lsn != InvalidXLogRecPtr) + LogicalIncreaseRestartDecodingForSlot(lsn, txn->restart_decoding_lsn); + /* + * No in-progress transaction, can reuse the last serialized snapshot if + * we have one. + */ + else if (txn == NULL && + builder->reorder->current_restart_decoding_lsn != InvalidXLogRecPtr && + builder->last_serialized_snapshot != InvalidXLogRecPtr) + LogicalIncreaseRestartDecodingForSlot(lsn, + builder->last_serialized_snapshot); +} + + +/* + * Build the start of a snapshot that's capable of decoding the catalog. + * + * Helper function for SnapBuildProcessRunningXacts() while we're not yet + * consistent. + * + * Returns true if there is a point in performing internal maintenance/cleanup + * using the xl_running_xacts record. + */ +static bool +SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running) +{ + /* --- + * Build catalog decoding snapshot incrementally using information about + * the currently running transactions. There are several ways to do that: + * + * a) There were no running transactions when the xl_running_xacts record + * was inserted, jump to CONSISTENT immediately. We might find such a + * state we were waiting for b) and c). + * + * b) Wait for all toplevel transactions that were running to end. We + * simply track the number of in-progress toplevel transactions and + * lower it whenever one commits or aborts. When that number + * (builder->running.xcnt) reaches zero, we can go from FULL_SNAPSHOT + * to CONSISTENT. + * NB: We need to search running.xip when seeing a transaction's end to + * make sure it's a toplevel transaction and it's been one of the + * intially running ones. + * Interestingly, in contrast to HS, this allows us not to care about + * subtransactions - and by extension suboverflowed xl_running_xacts - + * at all. + * + * c) This (in a previous run) or another decoding slot serialized a + * snapshot to disk that we can use. + * --- + */ + + /* + * xl_running_xact record is older than what we can use, we might not have + * all necessary catalog rows anymore. + */ + if (TransactionIdIsNormal(builder->initial_xmin_horizon) && + NormalTransactionIdPrecedes(running->oldestRunningXid, + builder->initial_xmin_horizon)) + { + ereport(DEBUG1, + (errmsg("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low", + (uint32) (lsn >> 32), (uint32) lsn), + errdetail("initial xmin horizon of %u vs the snapshot's %u", + builder->initial_xmin_horizon, running->oldestRunningXid))); + return true; + } + + /* + * a) No transaction were running, we can jump to consistent. + * + * NB: We might have already started to incrementally assemble a snapshot, + * so we need to be careful to deal with that. + */ + if (running->xcnt == 0) + { + if (builder->transactions_after == InvalidXLogRecPtr || + builder->transactions_after < lsn) + builder->transactions_after = lsn; + + builder->xmin = running->oldestRunningXid; + builder->xmax = running->latestCompletedXid; + TransactionIdAdvance(builder->xmax); + + Assert(TransactionIdIsNormal(builder->xmin)); + Assert(TransactionIdIsNormal(builder->xmax)); + + /* no transactions running now */ + builder->running.xcnt = 0; + builder->running.xmin = InvalidTransactionId; + builder->running.xmax = InvalidTransactionId; + + builder->state = SNAPBUILD_CONSISTENT; + + ereport(LOG, + (errmsg("logical decoding found consistent point at %X/%X", + (uint32)(lsn >> 32), (uint32)lsn), + errdetail("running xacts with xcnt == 0"))); + + return false; + } + /* c) valid on disk state */ + else if (SnapBuildRestore(builder, lsn)) + { + /* there won't be any state to cleanup */ + return false; + } + /* + * b) first encounter of a useable xl_running_xacts record. If we had + * found one earlier we would either track running transactions + * (i.e. builder->running.xcnt != 0) or be consistent (this function + * wouldn't get called). + */ + else if (!builder->running.xcnt) + { + int off; + + /* + * We only care about toplevel xids as those are the ones we + * definitely see in the wal stream. As snapbuild.c tracks committed + * instead of running transactions we don't need to know anything + * about uncommitted subtransactions. + */ + builder->xmin = running->oldestRunningXid; + builder->xmax = running->latestCompletedXid; + TransactionIdAdvance(builder->xmax); + + /* so we can safely use the faster comparisons */ + Assert(TransactionIdIsNormal(builder->xmin)); + Assert(TransactionIdIsNormal(builder->xmax)); + + builder->running.xcnt = running->xcnt; + builder->running.xcnt_space = running->xcnt; + builder->running.xip = + MemoryContextAlloc(builder->context, + builder->running.xcnt * sizeof(TransactionId)); + memcpy(builder->running.xip, running->xids, + builder->running.xcnt * sizeof(TransactionId)); + + /* sort so we can do a binary search */ + qsort(builder->running.xip, builder->running.xcnt, + sizeof(TransactionId), xidComparator); + + builder->running.xmin = builder->running.xip[0]; + builder->running.xmax = builder->running.xip[running->xcnt - 1]; + + /* makes comparisons cheaper later */ + TransactionIdRetreat(builder->running.xmin); + TransactionIdAdvance(builder->running.xmax); + + builder->state = SNAPBUILD_FULL_SNAPSHOT; + + ereport(LOG, + (errmsg("logical decoding found initial starting point at %X/%X", + (uint32)(lsn >> 32), (uint32)lsn), + errdetail("%u xacts need to finish", (uint32) builder->running.xcnt))); + + /* + * Iterate through all xids, wait for them to finish. + * + * This isn't required for the correctness of decoding, but to allow + * isolationtester to notice that we're currently waiting for + * something. + */ + for(off = 0; off < builder->running.xcnt; off++) + { + TransactionId xid = builder->running.xip[off]; + + /* + * Upper layers should prevent that we ever need to wait on + * ourselves. Check anyway, since failing to do so would either + * result in an endless wait or an Assert() failure. + */ + if (TransactionIdIsCurrentTransactionId(xid)) + elog(ERROR, "waiting for ourselves"); + + XactLockTableWait(xid); + } + + /* nothing could have built up so far, so don't perform cleanup */ + return false; + } + + /* + * We already started to track running xacts and need to wait for all + * in-progress ones to finish. We fall through to the normal processing of + * records so incremental cleanup can be performed. + */ + return true; +} + + +/* ----------------------------------- + * Snapshot serialization support + * ----------------------------------- + */ + +/* + * We store current state of struct SnapBuild on disk in the following manner: + * + * struct SnapBuildOnDisk; + * TransactionId * running.xcnt_space; + * TransactionId * committed.xcnt; (*not xcnt_space*) + * + */ +typedef struct SnapBuildOnDisk +{ + /* first part of this struct needs to be version independent */ + + /* data not covered by checksum */ + uint32 magic; + pg_crc32 checksum; + + /* data covered by checksum */ + + /* version, in case we want to support pg_upgrade */ + uint32 version; + /* how large is the on disk data, excluding the constant sized part */ + uint32 length; + + /* version dependent part */ + SnapBuild builder; + + /* variable amount of TransactionIds follows */ +} SnapBuildOnDisk; + +#define SnapBuildOnDiskConstantSize \ + offsetof(SnapBuildOnDisk, builder) +#define SnapBuildOnDiskNotChecksummedSize \ + offsetof(SnapBuildOnDisk, version) + +#define SNAPBUILD_MAGIC 0x51A1E001 +#define SNAPBUILD_VERSION 1 + +/* + * Store/Load a snapshot from disk, depending on the snapshot builder's state. + * + * Supposed to be used by external (i.e. not snapbuild.c) code that just reada + * record that's a potential location for a serialized snapshot. + */ +void +SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn) +{ + if (builder->state < SNAPBUILD_CONSISTENT) + SnapBuildRestore(builder, lsn); + else + SnapBuildSerialize(builder, lsn); +} + +/* + * Serialize the snapshot 'builder' at the location 'lsn' if it hasn't already + * been done by another decoding process. + */ +static void +SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn) +{ + Size needed_length; + SnapBuildOnDisk *ondisk; + char *ondisk_c; + int fd; + char tmppath[MAXPGPATH]; + char path[MAXPGPATH]; + int ret; + struct stat stat_buf; + uint32 sz; + + Assert(lsn != InvalidXLogRecPtr); + Assert(builder->last_serialized_snapshot == InvalidXLogRecPtr || + builder->last_serialized_snapshot <= lsn); + + /* + * no point in serializing if we cannot continue to work immediately after + * restoring the snapshot + */ + if (builder->state < SNAPBUILD_CONSISTENT) + return; + + /* + * We identify snapshots by the LSN they are valid for. We don't need to + * include timelines in the name as each LSN maps to exactly one timeline + * unless the user used pg_resetxlog or similar. If a user did so, there's + * no hope continuing to decode anyway. + */ + sprintf(path, "pg_llog/snapshots/%X-%X.snap", + (uint32) (lsn >> 32), (uint32) lsn); + + /* + * first check whether some other backend already has written the snapshot + * for this LSN. It's perfectly fine if there's none, so we accept ENOENT + * as a valid state. Everything else is an unexpected error. + */ + ret = stat(path, &stat_buf); + + if (ret != 0 && errno != ENOENT) + ereport(ERROR, + (errmsg("could not stat file \"%s\": %m", path))); + + else if (ret == 0) + { + /* + * somebody else has already serialized to this point, don't overwrite + * but remember location, so we don't need to read old data again. + * + * To be sure it has been synced to disk after the rename() from the + * tempfile filename to the real filename, we just repeat the + * fsync. That ought to be cheap because in most scenarios it should + * already be safely on disk. + */ + fsync_fname(path, false); + fsync_fname("pg_llog/snapshots", true); + + builder->last_serialized_snapshot = lsn; + goto out; + } + + /* + * there is an obvious race condition here between the time we stat(2) the + * file and us writing the file. But we rename the file into place + * atomically and all files created need to contain the same data anyway, + * so this is perfectly fine, although a bit of a resource waste. Locking + * seems like pointless complication. + */ + elog(DEBUG1, "serializing snapshot to %s", path); + + /* to make sure only we will write to this tempfile, include pid */ + sprintf(tmppath, "pg_llog/snapshots/%X-%X.snap.%u.tmp", + (uint32) (lsn >> 32), (uint32) lsn, MyProcPid); + + /* + * Unlink temporary file if it already exists, needs to have been before a + * crash/error since we won't enter this function twice from within a + * single decoding slot/backend and the temporary file contains the pid of + * the current process. + */ + if (unlink(tmppath) != 0 && errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not unlink file \"%s\": %m", path))); + + needed_length = sizeof(SnapBuildOnDisk) + + sizeof(TransactionId) * builder->running.xcnt_space + + sizeof(TransactionId) * builder->committed.xcnt; + + ondisk_c = MemoryContextAllocZero(builder->context, needed_length); + ondisk = (SnapBuildOnDisk *) ondisk_c; + ondisk->magic = SNAPBUILD_MAGIC; + ondisk->version = SNAPBUILD_VERSION; + ondisk->length = needed_length; + INIT_CRC32(ondisk->checksum); + COMP_CRC32(ondisk->checksum, + ((char *) ondisk) + SnapBuildOnDiskNotChecksummedSize, + SnapBuildOnDiskConstantSize - SnapBuildOnDiskNotChecksummedSize); + ondisk_c += sizeof(SnapBuildOnDisk); + + memcpy(&ondisk->builder, builder, sizeof(SnapBuild)); + /* NULL-ify memory-only data */ + ondisk->builder.context = NULL; + ondisk->builder.snapshot = NULL; + ondisk->builder.reorder = NULL; + ondisk->builder.running.xip = NULL; + ondisk->builder.committed.xip = NULL; + + COMP_CRC32(ondisk->checksum, + &ondisk->builder, + sizeof(SnapBuild)); + + /* copy running xacts */ + sz = sizeof(TransactionId) * builder->running.xcnt_space; + memcpy(ondisk_c, builder->running.xip, sz); + COMP_CRC32(ondisk->checksum, ondisk_c, sz); + ondisk_c += sz; + + /* copy committed xacts */ + sz = sizeof(TransactionId) * builder->committed.xcnt; + memcpy(ondisk_c, builder->committed.xip, sz); + COMP_CRC32(ondisk->checksum, ondisk_c, sz); + ondisk_c += sz; + + /* we have valid data now, open tempfile and write it there */ + fd = OpenTransientFile(tmppath, + O_CREAT | O_EXCL | O_WRONLY | PG_BINARY, + S_IRUSR | S_IWUSR); + if (fd < 0) + ereport(ERROR, + (errmsg("could not open file \"%s\": %m", path))); + + if ((write(fd, ondisk, needed_length)) != needed_length) + { + CloseTransientFile(fd); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tmppath))); + } + + /* + * fsync the file before renaming so that even if we crash after this we + * have either a fully valid file or nothing. + * + * TODO: Do the fsync() via checkpoints/restartpoints, doing it here has + * some noticeable overhead since it's performed synchronously during + * decoding? + */ + if (pg_fsync(fd) != 0) + { + CloseTransientFile(fd); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", tmppath))); + } + CloseTransientFile(fd); + + fsync_fname("pg_llog/snapshots", true); + + /* + * We may overwrite the work from some other backend, but that's ok, our + * snapshot is valid as well, we'll just have done some superflous work. + */ + if (rename(tmppath, path) != 0) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not rename file \"%s\" to \"%s\": %m", + tmppath, path))); + } + + /* make sure we persist */ + fsync_fname(path, false); + fsync_fname("pg_llog/snapshots", true); + + /* + * Now there's no way we can loose the dumped state anymore, remember + * this as a serialization point. + */ + builder->last_serialized_snapshot = lsn; + +out: + ReorderBufferSetRestartPoint(builder->reorder, + builder->last_serialized_snapshot); +} + +/* + * Restore a snapshot into 'builder' if previously one has been stored at the + * location indicated by 'lsn'. Returns true if successful, false otherwise. + */ +static bool +SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn) +{ + SnapBuildOnDisk ondisk; + int fd; + char path[MAXPGPATH]; + Size sz; + int readBytes; + pg_crc32 checksum; + + /* no point in loading a snapshot if we're already there */ + if (builder->state == SNAPBUILD_CONSISTENT) + return false; + + sprintf(path, "pg_llog/snapshots/%X-%X.snap", + (uint32) (lsn >> 32), (uint32) lsn); + + fd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0); + + if (fd < 0 && errno == ENOENT) + return false; + else if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", path))); + + /* ---- + * Make sure the snapshot had been stored safely to disk, that's normally + * cheap. + * Note that we do not need PANIC here, nobody will be able to use the + * slot without fsyncing, and saving it won't suceed without an fsync() + * either... + * ---- + */ + fsync_fname(path, false); + fsync_fname("pg_llog/snapshots", true); + + + /* read statically sized portion of snapshot */ + readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize); + if (readBytes != SnapBuildOnDiskConstantSize) + { + CloseTransientFile(fd); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\", read %d of %d: %m", + path, readBytes, (int) SnapBuildOnDiskConstantSize))); + } + + if (ondisk.magic != SNAPBUILD_MAGIC) + ereport(ERROR, + (errmsg("snapbuild state file \"%s\" has wrong magic %u instead of %u", + path, ondisk.magic, SNAPBUILD_MAGIC))); + + if (ondisk.version != SNAPBUILD_VERSION) + ereport(ERROR, + (errmsg("snapbuild state file \"%s\" has unsupported version %u instead of %u", + path, ondisk.version, SNAPBUILD_VERSION))); + + INIT_CRC32(checksum); + COMP_CRC32(checksum, + ((char *) &ondisk) + SnapBuildOnDiskNotChecksummedSize, + SnapBuildOnDiskConstantSize - SnapBuildOnDiskNotChecksummedSize); + + /* read SnapBuild */ + readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild)); + if (readBytes != sizeof(SnapBuild)) + { + CloseTransientFile(fd); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\", read %d of %d: %m", + path, readBytes, (int) sizeof(SnapBuild)))); + } + COMP_CRC32(checksum, &ondisk.builder, sizeof(SnapBuild)); + + /* restore running xacts information */ + sz = sizeof(TransactionId) * ondisk.builder.running.xcnt_space; + ondisk.builder.running.xip = MemoryContextAlloc(builder->context, sz); + readBytes = read(fd, ondisk.builder.running.xip, sz); + if (readBytes != sz) + { + CloseTransientFile(fd); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\", read %d of %d: %m", + path, readBytes, (int) sz))); + } + COMP_CRC32(checksum, ondisk.builder.running.xip, sz); + + /* restore committed xacts information */ + sz = sizeof(TransactionId) * ondisk.builder.committed.xcnt; + ondisk.builder.committed.xip = MemoryContextAlloc(builder->context, sz); + readBytes = read(fd, ondisk.builder.committed.xip, sz); + if (readBytes != sz) + { + CloseTransientFile(fd); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\", read %d of %d: %m", + path, readBytes, (int) sz))); + } + COMP_CRC32(checksum, ondisk.builder.committed.xip, sz); + + CloseTransientFile(fd); + + /* verify checksum of what we've read */ + if (!EQ_CRC32(checksum, ondisk.checksum)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("snapbuild state file %s: checksum mismatch, is %u, should be %u", + path, checksum, ondisk.checksum))); + + /* + * ok, we now have a sensible snapshot here, figure out if it has more + * information than we have. + */ + + /* + * We are only interested in consistent snapshots for now, comparing + * whether one imcomplete snapshot is more "advanced" seems to be + * unnecessarily complex. + */ + if (ondisk.builder.state < SNAPBUILD_CONSISTENT) + goto snapshot_not_interesting; + + /* + * Don't use a snapshot that requires an xmin that we cannot guarantee to + * be available. + */ + if (TransactionIdPrecedes(ondisk.builder.xmin, builder->initial_xmin_horizon)) + goto snapshot_not_interesting; + + + /* ok, we think the snapshot is sensible, copy over everything important */ + builder->xmin = ondisk.builder.xmin; + builder->xmax = ondisk.builder.xmax; + builder->state = ondisk.builder.state; + + builder->committed.xcnt = ondisk.builder.committed.xcnt; + /* We only allocated/stored xcnt, not xcnt_space xids ! */ + /* don't overwrite preallocated xip, if we don't have anything here */ + if (builder->committed.xcnt > 0) + { + pfree(builder->committed.xip); + builder->committed.xcnt_space = ondisk.builder.committed.xcnt; + builder->committed.xip = ondisk.builder.committed.xip; + } + ondisk.builder.committed.xip = NULL; + + builder->running.xcnt = ondisk.builder.committed.xcnt; + if (builder->running.xip) + pfree(builder->running.xip); + builder->running.xcnt_space = ondisk.builder.committed.xcnt_space; + builder->running.xip = ondisk.builder.running.xip; + + /* our snapshot is not interesting anymore, build a new one */ + if (builder->snapshot != NULL) + { + SnapBuildSnapDecRefcount(builder->snapshot); + } + builder->snapshot = SnapBuildBuildSnapshot(builder, InvalidTransactionId); + SnapBuildSnapIncRefcount(builder->snapshot); + + ReorderBufferSetRestartPoint(builder->reorder, lsn); + + Assert(builder->state == SNAPBUILD_CONSISTENT); + + ereport(LOG, + (errmsg("logical decoding found consistent point at %X/%X", + (uint32)(lsn >> 32), (uint32)lsn), + errdetail("found initial snapshot in snapbuild file"))); + return true; + +snapshot_not_interesting: + if (ondisk.builder.running.xip != NULL) + pfree(ondisk.builder.running.xip); + if (ondisk.builder.committed.xip != NULL) + pfree(ondisk.builder.committed.xip); + return false; +} + +/* + * Remove all serialized snapshots that are not required anymore because no + * slot can need them. This doesn't actually have to run during a checkpoint, + * but it's a convenient point to schedule this. + * + * NB: We run this during checkpoints even if logical decoding is disabled so + * we cleanup old slots at some point after it got disabled. + */ +void +CheckPointSnapBuild(void) +{ + XLogRecPtr cutoff; + XLogRecPtr redo; + DIR *snap_dir; + struct dirent *snap_de; + char path[MAXPGPATH]; + + /* + * We start of with a minimum of the last redo pointer. No new replication + * slot will start before that, so that's a safe upper bound for removal. + */ + redo = GetRedoRecPtr(); + + /* now check for the restart ptrs from existing slots */ + cutoff = ReplicationSlotsComputeLogicalRestartLSN(); + + /* don't start earlier than the restart lsn */ + if (redo < cutoff) + cutoff = redo; + + snap_dir = AllocateDir("pg_llog/snapshots"); + while ((snap_de = ReadDir(snap_dir, "pg_llog/snapshots")) != NULL) + { + uint32 hi; + uint32 lo; + XLogRecPtr lsn; + struct stat statbuf; + + if (strcmp(snap_de->d_name, ".") == 0 || + strcmp(snap_de->d_name, "..") == 0) + continue; + + snprintf(path, MAXPGPATH, "pg_llog/snapshots/%s", snap_de->d_name); + + if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode)) + { + elog(DEBUG1, "only regular files expected: %s", path); + continue; + } + + /* + * temporary filenames from SnapBuildSerialize() include the LSN and + * everything but are postfixed by .$pid.tmp. We can just remove them + * the same as other files because there can be none that are currently + * being written that are older than cutoff. + * + * We just log a message if a file doesn't fit the pattern, it's + * probably some editors lock/state file or similar... + */ + if (sscanf(snap_de->d_name, "%X-%X.snap", &hi, &lo) != 2) + { + ereport(LOG, + (errmsg("could not parse filename \"%s\"", path))); + continue; + } + + lsn = ((uint64) hi) << 32 | lo; + + /* check whether we still need it */ + if (lsn < cutoff || cutoff == InvalidXLogRecPtr) + { + elog(DEBUG1, "removing snapbuild snapshot %s", path); + + /* + * It's not particularly harmful, though strange, if we can't + * remove the file here. Don't prevent the checkpoint from + * completing, that'd be cure worse than the disease. + */ + if (unlink(path) < 0) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not unlink file \"%s\": %m", + path))); + continue; + } + } + } + FreeDir(snap_dir); +} diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 826c7f027e..45ed7e40e8 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -43,6 +43,7 @@ #include "miscadmin.h" #include "replication/slot.h" #include "storage/fd.h" +#include "storage/proc.h" #include "storage/procarray.h" /* @@ -82,6 +83,8 @@ ReplicationSlot *MyReplicationSlot = NULL; /* GUCs */ int max_replication_slots = 0; /* the maximum number of replication slots */ +static void ReplicationSlotDropAcquired(void); + /* internal persistency functions */ static void RestoreSlotFromDisk(const char *name); static void CreateSlotOnDisk(ReplicationSlot *slot); @@ -190,11 +193,12 @@ ReplicationSlotValidateName(const char *name, int elevel) * Create a new replication slot and mark it as used by this backend. * * name: Name of the slot - * db_specific: changeset extraction is db specific, if the slot is going to + * db_specific: logical decoding is db specific; if the slot is going to * be used for that pass true, otherwise false. */ void -ReplicationSlotCreate(const char *name, bool db_specific) +ReplicationSlotCreate(const char *name, bool db_specific, + ReplicationSlotPersistency persistency) { ReplicationSlot *slot = NULL; int i; @@ -246,6 +250,7 @@ ReplicationSlotCreate(const char *name, bool db_specific) */ Assert(!slot->in_use); Assert(!slot->active); + slot->data.persistency = persistency; slot->data.xmin = InvalidTransactionId; slot->effective_xmin = InvalidTransactionId; strncpy(NameStr(slot->data.name), name, NAMEDATALEN); @@ -348,14 +353,30 @@ ReplicationSlotRelease(void) Assert(slot != NULL && slot->active); - /* Mark slot inactive. We're not freeing it, just disconnecting. */ + if (slot->data.persistency == RS_EPHEMERAL) { + /* + * Delete the slot. There is no !PANIC case where this is allowed to + * fail, all that may happen is an incomplete cleanup of the on-disk + * data. + */ + ReplicationSlotDropAcquired(); + } + else + { + /* Mark slot inactive. We're not freeing it, just disconnecting. */ volatile ReplicationSlot *vslot = slot; SpinLockAcquire(&slot->mutex); vslot->active = false; SpinLockRelease(&slot->mutex); - MyReplicationSlot = NULL; } + + MyReplicationSlot = NULL; + + /* might not have been set when we've been a plain slot */ + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + MyPgXact->vacuumFlags &= ~PROC_IN_LOGICAL_DECODING; + LWLockRelease(ProcArrayLock); } /* @@ -364,52 +385,36 @@ ReplicationSlotRelease(void) void ReplicationSlotDrop(const char *name) { - ReplicationSlot *slot = NULL; - int i; - bool active; + Assert(MyReplicationSlot == NULL); + + ReplicationSlotAcquire(name); + + ReplicationSlotDropAcquired(); +} + +/* + * Permanently drop the currently acquired replication slot which will be + * released by the point this function returns. + */ +static void +ReplicationSlotDropAcquired(void) +{ char path[MAXPGPATH]; char tmppath[MAXPGPATH]; + ReplicationSlot *slot = MyReplicationSlot; - ReplicationSlotValidateName(name, ERROR); + Assert(MyReplicationSlot != NULL); + + /* slot isn't acquired anymore */ + MyReplicationSlot = NULL; /* - * If some other backend ran this code currently with us, we might both - * try to free the same slot at the same time. Or we might try to delete - * a slot with a certain name while someone else was trying to create a - * slot with the same name. + * If some other backend ran this code concurrently with us, we might try + * to delete a slot with a certain name while someone else was trying to + * create a slot with the same name. */ LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE); - /* Search for the named slot and mark it active if we find it. */ - LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); - for (i = 0; i < max_replication_slots; i++) - { - ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; - - if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0) - { - volatile ReplicationSlot *vslot = s; - - SpinLockAcquire(&s->mutex); - active = vslot->active; - vslot->active = true; - SpinLockRelease(&s->mutex); - slot = s; - break; - } - } - LWLockRelease(ReplicationSlotControlLock); - - /* If we did not find the slot or it was already active, error out. */ - if (slot == NULL) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_OBJECT), - errmsg("replication slot \"%s\" does not exist", name))); - if (active) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_IN_USE), - errmsg("replication slot \"%s\" is already active", name))); - /* Generate pathnames. */ sprintf(path, "pg_replslot/%s", NameStr(slot->data.name)); sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name)); @@ -417,34 +422,40 @@ ReplicationSlotDrop(const char *name) /* * Rename the slot directory on disk, so that we'll no longer recognize * this as a valid slot. Note that if this fails, we've got to mark the - * slot inactive again before bailing out. + * slot inactive before bailing out. If we're dropping a ephemeral slot, + * we better never fail hard as the caller won't expect the slot to + * survive and this might get called during error handling. */ - if (rename(path, tmppath) != 0) + if (rename(path, tmppath) == 0) + { + /* + * We need to fsync() the directory we just renamed and its parent to + * make sure that our changes are on disk in a crash-safe fashion. If + * fsync() fails, we can't be sure whether the changes are on disk or + * not. For now, we handle that by panicking; + * StartupReplicationSlots() will try to straighten it out after + * restart. + */ + START_CRIT_SECTION(); + fsync_fname(tmppath, true); + fsync_fname("pg_replslot", true); + END_CRIT_SECTION(); + } + else { volatile ReplicationSlot *vslot = slot; + bool fail_softly = slot->data.persistency == RS_EPHEMERAL; SpinLockAcquire(&slot->mutex); vslot->active = false; SpinLockRelease(&slot->mutex); - ereport(ERROR, + ereport(fail_softly ? WARNING : ERROR, (errcode_for_file_access(), errmsg("could not rename \"%s\" to \"%s\": %m", path, tmppath))); } - /* - * We need to fsync() the directory we just renamed and its parent to make - * sure that our changes are on disk in a crash-safe fashion. If fsync() - * fails, we can't be sure whether the changes are on disk or not. For - * now, we handle that by panicking; StartupReplicationSlots() will - * try to straighten it out after restart. - */ - START_CRIT_SECTION(); - fsync_fname(tmppath, true); - fsync_fname("pg_replslot", true); - END_CRIT_SECTION(); - /* * The slot is definitely gone. Lock out concurrent scans of the array * long enough to kill it. It's OK to clear the active flag here without @@ -461,7 +472,7 @@ ReplicationSlotDrop(const char *name) * Slot is dead and doesn't prevent resource removal anymore, recompute * limits. */ - ReplicationSlotsComputeRequiredXmin(); + ReplicationSlotsComputeRequiredXmin(false); ReplicationSlotsComputeRequiredLSN(); /* @@ -518,22 +529,50 @@ ReplicationSlotMarkDirty(void) } } +/* + * Convert a slot that's marked as RS_DROP_ON_ERROR to a RS_PERSISTENT slot, + * guaranteeing it will be there after a eventual crash. + */ +void +ReplicationSlotPersist(void) +{ + ReplicationSlot *slot = MyReplicationSlot; + + Assert(slot != NULL); + Assert(slot->data.persistency != RS_PERSISTENT); + + { + volatile ReplicationSlot *vslot = slot; + + SpinLockAcquire(&slot->mutex); + vslot->data.persistency = RS_PERSISTENT; + SpinLockRelease(&slot->mutex); + } + + ReplicationSlotMarkDirty(); + ReplicationSlotSave(); +} + /* * Compute the oldest xmin across all slots and store it in the ProcArray. */ void -ReplicationSlotsComputeRequiredXmin(void) +ReplicationSlotsComputeRequiredXmin(bool already_locked) { int i; TransactionId agg_xmin = InvalidTransactionId; + TransactionId agg_catalog_xmin = InvalidTransactionId; Assert(ReplicationSlotCtl != NULL); - LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); + if (!already_locked) + LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); + for (i = 0; i < max_replication_slots; i++) { ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; TransactionId effective_xmin; + TransactionId effective_catalog_xmin; if (!s->in_use) continue; @@ -543,6 +582,7 @@ ReplicationSlotsComputeRequiredXmin(void) SpinLockAcquire(&s->mutex); effective_xmin = vslot->effective_xmin; + effective_catalog_xmin = vslot->effective_catalog_xmin; SpinLockRelease(&s->mutex); } @@ -551,10 +591,18 @@ ReplicationSlotsComputeRequiredXmin(void) (!TransactionIdIsValid(agg_xmin) || TransactionIdPrecedes(effective_xmin, agg_xmin))) agg_xmin = effective_xmin; - } - LWLockRelease(ReplicationSlotControlLock); - ProcArraySetReplicationSlotXmin(agg_xmin); + /* check the catalog xmin */ + if (TransactionIdIsValid(effective_catalog_xmin) && + (!TransactionIdIsValid(agg_catalog_xmin) || + TransactionIdPrecedes(effective_catalog_xmin, agg_catalog_xmin))) + agg_catalog_xmin = effective_catalog_xmin; + } + + if (!already_locked) + LWLockRelease(ReplicationSlotControlLock); + + ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked); } /* @@ -595,6 +643,110 @@ ReplicationSlotsComputeRequiredLSN(void) XLogSetReplicationSlotMinimumLSN(min_required); } +/* + * Compute the oldest WAL LSN required by *logical* decoding slots.. + * + * Returns InvalidXLogRecPtr if logical decoding is disabled or no logicals + * slots exist. + * + * NB: this returns a value >= ReplicationSlotsComputeRequiredLSN(), since it + * ignores physical replication slots. + * + * The results aren't required frequently, so we don't maintain a precomputed + * value like we do for ComputeRequiredLSN() and ComputeRequiredXmin(). + */ +XLogRecPtr +ReplicationSlotsComputeLogicalRestartLSN(void) +{ + XLogRecPtr result = InvalidXLogRecPtr; + int i; + + if (max_replication_slots <= 0) + return InvalidXLogRecPtr; + + LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); + + for (i = 0; i < max_replication_slots; i++) + { + volatile ReplicationSlot *s; + XLogRecPtr restart_lsn; + + s = &ReplicationSlotCtl->replication_slots[i]; + + /* cannot change while ReplicationSlotCtlLock is held */ + if (!s->in_use) + continue; + + /* we're only interested in logical slots */ + if (s->data.database == InvalidOid) + continue; + + /* read once, it's ok if it increases while we're checking */ + SpinLockAcquire(&s->mutex); + restart_lsn = s->data.restart_lsn; + SpinLockRelease(&s->mutex); + + if (result == InvalidXLogRecPtr || + restart_lsn < result) + result = restart_lsn; + } + + LWLockRelease(ReplicationSlotControlLock); + + return result; +} + +/* + * ReplicationSlotsCountDBSlots -- count the number of slots that refer to the + * passed database oid. + * + * Returns true if there are any slots referencing the database. *nslots will + * be set to the absolute number of slots in the database, *nactive to ones + * currently active. + */ +bool +ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive) +{ + int i; + + *nslots = *nactive = 0; + + if (max_replication_slots <= 0) + return false; + + LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); + for (i = 0; i < max_replication_slots; i++) + { + volatile ReplicationSlot *s; + + s = &ReplicationSlotCtl->replication_slots[i]; + + /* cannot change while ReplicationSlotCtlLock is held */ + if (!s->in_use) + continue; + + /* not database specific, skip */ + if (s->data.database == InvalidOid) + + /* not our database, skip */ + if (s->data.database != dboid) + continue; + + /* count slots with spinlock held */ + SpinLockAcquire(&s->mutex); + (*nslots)++; + if (s->active) + (*nactive)++; + SpinLockRelease(&s->mutex); + } + LWLockRelease(ReplicationSlotControlLock); + + if (*nslots > 0) + return true; + return false; +} + + /* * Check whether the server's configuration supports using replication * slots. @@ -723,7 +875,7 @@ StartupReplicationSlots(XLogRecPtr checkPointRedo) return; /* Now that we have recovered all the data, compute replication xmin */ - ReplicationSlotsComputeRequiredXmin(); + ReplicationSlotsComputeRequiredXmin(false); ReplicationSlotsComputeRequiredLSN(); } @@ -1050,8 +1202,19 @@ RestoreSlotFromDisk(const char *name) memcpy(&slot->data, &cp.slotdata, sizeof(ReplicationSlotPersistentData)); + /* Don't restore the slot if it's not parked as persistent. */ + if (slot->data.persistency != RS_PERSISTENT) + return; + /* initialize in memory state */ slot->effective_xmin = cp.slotdata.xmin; + slot->effective_catalog_xmin = cp.slotdata.catalog_xmin; + + slot->candidate_catalog_xmin = InvalidTransactionId; + slot->candidate_xmin_lsn = InvalidXLogRecPtr; + slot->candidate_restart_lsn = InvalidXLogRecPtr; + slot->candidate_restart_valid = InvalidXLogRecPtr; + slot->in_use = true; slot->active = false; diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index 5acd2bae19..c9416b03ee 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -15,13 +15,13 @@ #include "funcapi.h" #include "miscadmin.h" + #include "access/htup_details.h" +#include "replication/slot.h" +#include "replication/logical.h" +#include "replication/logicalfuncs.h" #include "utils/builtins.h" #include "utils/pg_lsn.h" -#include "replication/slot.h" - -Datum pg_create_physical_replication_slot(PG_FUNCTION_ARGS); -Datum pg_drop_replication_slot(PG_FUNCTION_ARGS); static void check_permissions(void) @@ -54,7 +54,7 @@ pg_create_physical_replication_slot(PG_FUNCTION_ARGS) elog(ERROR, "return type must be a row type"); /* acquire replication slot, this will check for conflicting names*/ - ReplicationSlotCreate(NameStr(*name), false); + ReplicationSlotCreate(NameStr(*name), false, RS_PERSISTENT); values[0] = NameGetDatum(&MyReplicationSlot->data.name); @@ -69,6 +69,68 @@ pg_create_physical_replication_slot(PG_FUNCTION_ARGS) PG_RETURN_DATUM(result); } + +/* + * SQL function for creating a new logical replication slot. + */ +Datum +pg_create_logical_replication_slot(PG_FUNCTION_ARGS) +{ + Name name = PG_GETARG_NAME(0); + Name plugin = PG_GETARG_NAME(1); + + LogicalDecodingContext *ctx = NULL; + + TupleDesc tupdesc; + HeapTuple tuple; + Datum result; + Datum values[2]; + bool nulls[2]; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + check_permissions(); + + CheckLogicalDecodingRequirements(); + + Assert(!MyReplicationSlot); + + /* + * Acquire a logical decoding slot, this will check for conflicting + * names. + */ + ReplicationSlotCreate(NameStr(*name), true, RS_EPHEMERAL); + + /* + * Create logical decoding context, to build the initial snapshot. + */ + ctx = CreateInitDecodingContext( + NameStr(*plugin), NIL, + logical_read_local_xlog_page, NULL, NULL); + + /* build initial snapshot, might take a while */ + DecodingContextFindStartpoint(ctx); + + values[0] = CStringGetTextDatum(NameStr(MyReplicationSlot->data.name)); + values[1] = LSNGetDatum(MyReplicationSlot->data.confirmed_flush); + + /* don't need the decoding context anymore */ + FreeDecodingContext(ctx); + + memset(nulls, 0, sizeof(nulls)); + + tuple = heap_form_tuple(tupdesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + /* ok, slot is now fully created, mark it as persistent */ + ReplicationSlotPersist(); + ReplicationSlotRelease(); + + PG_RETURN_DATUM(result); +} + + /* * SQL function for dropping a replication slot. */ @@ -92,7 +154,7 @@ pg_drop_replication_slot(PG_FUNCTION_ARGS) Datum pg_get_replication_slots(PG_FUNCTION_ARGS) { -#define PG_STAT_GET_REPLICATION_SLOTS_COLS 6 +#define PG_GET_REPLICATION_SLOTS_COLS 8 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; TupleDesc tupdesc; Tuplestorestate *tupstore; @@ -134,15 +196,16 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) for (slotno = 0; slotno < max_replication_slots; slotno++) { ReplicationSlot *slot = &ReplicationSlotCtl->replication_slots[slotno]; - Datum values[PG_STAT_GET_REPLICATION_SLOTS_COLS]; - bool nulls[PG_STAT_GET_REPLICATION_SLOTS_COLS]; + Datum values[PG_GET_REPLICATION_SLOTS_COLS]; + bool nulls[PG_GET_REPLICATION_SLOTS_COLS]; TransactionId xmin; + TransactionId catalog_xmin; XLogRecPtr restart_lsn; bool active; Oid database; NameData slot_name; - + NameData plugin; int i; SpinLockAcquire(&slot->mutex); @@ -154,9 +217,11 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) else { xmin = slot->data.xmin; + catalog_xmin = slot->data.catalog_xmin; database = slot->data.database; restart_lsn = slot->data.restart_lsn; namecpy(&slot_name, &slot->data.name); + namecpy(&plugin, &slot->data.plugin); active = slot->active; } @@ -166,19 +231,34 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) i = 0; values[i++] = NameGetDatum(&slot_name); + + if (database == InvalidOid) + nulls[i++] = true; + else + values[i++] = NameGetDatum(&plugin); + if (database == InvalidOid) values[i++] = CStringGetTextDatum("physical"); else values[i++] = CStringGetTextDatum("logical"); + if (database == InvalidOid) nulls[i++] = true; else values[i++] = database; + values[i++] = BoolGetDatum(active); + if (xmin != InvalidTransactionId) values[i++] = TransactionIdGetDatum(xmin); else nulls[i++] = true; + + if (catalog_xmin != InvalidTransactionId) + values[i++] = TransactionIdGetDatum(catalog_xmin); + else + nulls[i++] = true; + if (restart_lsn != InvalidTransactionId) values[i++] = LSNGetDatum(restart_lsn); else diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index e31977eee0..43db10851c 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -1147,7 +1147,7 @@ XLogWalRcvSendHSFeedback(bool immed) * everything else has been checked. */ if (hot_standby_feedback) - xmin = GetOldestXmin(true, false); + xmin = GetOldestXmin(NULL, false); else xmin = InvalidTransactionId; diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 048367af29..5227eab414 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -55,6 +55,7 @@ #include "replication/basebackup.h" #include "replication/slot.h" #include "replication/syncrep.h" +#include "replication/slot.h" #include "replication/walreceiver.h" #include "replication/walsender.h" #include "replication/walsender_private.h" @@ -434,7 +435,7 @@ StartReplication(StartReplicationCmd *cmd) if (MyReplicationSlot->data.database != InvalidOid) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - (errmsg("cannot use a replication slot created for changeset extraction for streaming replication")))); + (errmsg("cannot use a logical replication slot for physical replication")))); } /* @@ -656,7 +657,9 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) sendTimeLineIsHistoric = false; sendTimeLine = ThisTimeLineID; - ReplicationSlotCreate(cmd->slotname, cmd->kind == REPLICATION_KIND_LOGICAL); + ReplicationSlotCreate(cmd->slotname, + cmd->kind == REPLICATION_KIND_LOGICAL, + RS_PERSISTENT); initStringInfo(&output_message); @@ -766,7 +769,7 @@ exec_replication_command(const char *cmd_string) if (cmd->kind == REPLICATION_KIND_PHYSICAL) StartReplication(cmd); else - elog(ERROR, "cannot handle changeset extraction yet"); + elog(ERROR, "cannot handle logical decoding yet"); break; } @@ -1017,7 +1020,7 @@ ProcessStandbyReplyMessage(void) if (MyReplicationSlot && flushPtr != InvalidXLogRecPtr) { if (MyReplicationSlot->data.database != InvalidOid) - elog(ERROR, "cannot handle changeset extraction yet"); + elog(ERROR, "cannot handle logical decoding yet"); else PhysicalConfirmReceivedLocation(flushPtr); } @@ -1050,7 +1053,7 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin) if (changed) { ReplicationSlotMarkDirty(); - ReplicationSlotsComputeRequiredXmin(); + ReplicationSlotsComputeRequiredXmin(false); } } diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index eac418442d..3376a353a4 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -50,11 +50,13 @@ #include "access/transam.h" #include "access/xact.h" #include "access/twophase.h" +#include "catalog/catalog.h" #include "miscadmin.h" #include "storage/proc.h" #include "storage/procarray.h" #include "storage/spin.h" #include "utils/builtins.h" +#include "utils/rel.h" #include "utils/snapmgr.h" @@ -84,6 +86,8 @@ typedef struct ProcArrayStruct /* oldest xmin of any replication slot */ TransactionId replication_slot_xmin; + /* oldest catalog xmin of any replication slot */ + TransactionId replication_slot_catalog_xmin; /* * We declare pgprocnos[] as 1 entry because C wants a fixed-size array, @@ -1108,21 +1112,22 @@ TransactionIdIsActive(TransactionId xid) * GetOldestXmin -- returns oldest transaction that was running * when any current transaction was started. * - * If allDbs is TRUE then all backends are considered; if allDbs is FALSE - * then only backends running in my own database are considered. + * If rel is NULL or a shared relation, all backends are considered, otherwise + * only backends running in this database are considered. * * If ignoreVacuum is TRUE then backends with the PROC_IN_VACUUM flag set are * ignored. * - * This is used by VACUUM to decide which deleted tuples must be preserved - * in a table. allDbs = TRUE is needed for shared relations, but allDbs = - * FALSE is sufficient for non-shared relations, since only backends in my - * own database could ever see the tuples in them. Also, we can ignore - * concurrently running lazy VACUUMs because (a) they must be working on other - * tables, and (b) they don't need to do snapshot-based lookups. + * This is used by VACUUM to decide which deleted tuples must be preserved in + * the passed in table. For shared relations backends in all databases must be + * considered, but for non-shared relations that's not required, since only + * backends in my own database could ever see the tuples in them. Also, we can + * ignore concurrently running lazy VACUUMs because (a) they must be working + * on other tables, and (b) they don't need to do snapshot-based lookups. * - * This is also used to determine where to truncate pg_subtrans. allDbs - * must be TRUE for that case, and ignoreVacuum FALSE. + * This is also used to determine where to truncate pg_subtrans. For that + * backends in all databases have to be considered, so rel = NULL has to be + * passed in. * * Note: we include all currently running xids in the set of considered xids. * This ensures that if a just-started xact has not yet set its snapshot, @@ -1133,7 +1138,7 @@ TransactionIdIsActive(TransactionId xid) * backwards on repeated calls. The calculated value is conservative, so that * anything older is definitely not considered as running by anyone anymore, * but the exact value calculated depends on a number of things. For example, - * if allDbs is FALSE and there are no transactions running in the current + * if rel = NULL and there are no transactions running in the current * database, GetOldestXmin() returns latestCompletedXid. If a transaction * begins after that, its xmin will include in-progress transactions in other * databases that started earlier, so another call will return a lower value. @@ -1152,12 +1157,22 @@ TransactionIdIsActive(TransactionId xid) * GetOldestXmin() move backwards, with no consequences for data integrity. */ TransactionId -GetOldestXmin(bool allDbs, bool ignoreVacuum) +GetOldestXmin(Relation rel, bool ignoreVacuum) { ProcArrayStruct *arrayP = procArray; TransactionId result; int index; + bool allDbs; + volatile TransactionId replication_slot_xmin = InvalidTransactionId; + volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId; + + /* + * If we're not computing a relation specific limit, or if a shared + * relation has been passed in, backends in all databases have to be + * considered. + */ + allDbs = rel == NULL || rel->rd_rel->relisshared; /* Cannot look for individual databases during recovery */ Assert(allDbs || !RecoveryInProgress()); @@ -1180,6 +1195,13 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum) volatile PGPROC *proc = &allProcs[pgprocno]; volatile PGXACT *pgxact = &allPgXact[pgprocno]; + /* + * Backend is doing logical decoding which manages xmin separately, + * check below. + */ + if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING) + continue; + if (ignoreVacuum && (pgxact->vacuumFlags & PROC_IN_VACUUM)) continue; @@ -1211,6 +1233,7 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum) /* fetch into volatile var while ProcArrayLock is held */ replication_slot_xmin = procArray->replication_slot_xmin; + replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin; if (RecoveryInProgress()) { @@ -1259,6 +1282,18 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum) NormalTransactionIdPrecedes(replication_slot_xmin, result)) result = replication_slot_xmin; + /* + * After locks have been released and defer_cleanup_age has been applied, + * check whether we need to back up further to make logical decoding + * possible. We need to do so if we're computing the global limit (rel = + * NULL) or if the passed relation is a catalog relation of some kind. + */ + if ((rel == NULL || + RelationIsAccessibleInLogicalDecoding(rel)) && + TransactionIdIsValid(replication_slot_catalog_xmin) && + NormalTransactionIdPrecedes(replication_slot_catalog_xmin, result)) + result = replication_slot_catalog_xmin; + return result; } @@ -1313,6 +1348,8 @@ GetMaxSnapshotSubxidCount(void) * RecentGlobalXmin: the global xmin (oldest TransactionXmin across all * running transactions, except those running LAZY VACUUM). This is * the same computation done by GetOldestXmin(true, true). + * RecentGlobalDataXmin: the global xmin for non-catalog tables + * >= RecentGlobalXmin * * Note: this function should probably not be called with an argument that's * not statically allocated (see xip allocation below). @@ -1329,6 +1366,7 @@ GetSnapshotData(Snapshot snapshot) int subcount = 0; bool suboverflowed = false; volatile TransactionId replication_slot_xmin = InvalidTransactionId; + volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId; Assert(snapshot != NULL); @@ -1397,6 +1435,13 @@ GetSnapshotData(Snapshot snapshot) volatile PGXACT *pgxact = &allPgXact[pgprocno]; TransactionId xid; + /* + * Backend is doing logical decoding which manages xmin + * separately, check below. + */ + if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING) + continue; + /* Ignore procs running LAZY VACUUM */ if (pgxact->vacuumFlags & PROC_IN_VACUUM) continue; @@ -1509,6 +1554,7 @@ GetSnapshotData(Snapshot snapshot) /* fetch into volatile var while ProcArrayLock is held */ replication_slot_xmin = procArray->replication_slot_xmin; + replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin; if (!TransactionIdIsValid(MyPgXact->xmin)) MyPgXact->xmin = TransactionXmin = xmin; @@ -1533,6 +1579,17 @@ GetSnapshotData(Snapshot snapshot) NormalTransactionIdPrecedes(replication_slot_xmin, RecentGlobalXmin)) RecentGlobalXmin = replication_slot_xmin; + /* Non-catalog tables can be vacuumed if older than this xid */ + RecentGlobalDataXmin = RecentGlobalXmin; + + /* + * Check whether there's a replication slot requiring an older catalog + * xmin. + */ + if (TransactionIdIsNormal(replication_slot_catalog_xmin) && + NormalTransactionIdPrecedes(replication_slot_catalog_xmin, RecentGlobalXmin)) + RecentGlobalXmin = replication_slot_catalog_xmin; + RecentXmin = xmin; snapshot->xmin = xmin; @@ -1633,9 +1690,11 @@ ProcArrayInstallImportedXmin(TransactionId xmin, TransactionId sourcexid) * Similar to GetSnapshotData but returns more information. We include * all PGXACTs with an assigned TransactionId, even VACUUM processes. * - * We acquire XidGenLock, but the caller is responsible for releasing it. - * This ensures that no new XIDs enter the proc array until the caller has - * WAL-logged this snapshot, and releases the lock. + * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for + * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc + * array until the caller has WAL-logged this snapshot, and releases the + * lock. Acquiring ProcArrayLock ensures that no transactions commit until the + * lock is released. * * The returned data structure is statically allocated; caller should not * modify it, and must not assume it is valid past the next call. @@ -1770,6 +1829,15 @@ GetRunningTransactionData(void) } } + /* + * It's important *not* to include the limits set by slots here because + * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those + * were to be included here the initial value could never increase because + * of a circular dependency where slots only increase their limits when + * running xacts increases oldestRunningXid and running xacts only + * increases if slots do. + */ + CurrentRunningXacts->xcnt = count - subcount; CurrentRunningXacts->subxcnt = subcount; CurrentRunningXacts->subxid_overflow = suboverflowed; @@ -1777,13 +1845,12 @@ GetRunningTransactionData(void) CurrentRunningXacts->oldestRunningXid = oldestRunningXid; CurrentRunningXacts->latestCompletedXid = latestCompletedXid; - /* We don't release XidGenLock here, the caller is responsible for that */ - LWLockRelease(ProcArrayLock); - Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid)); Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid)); Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid)); + /* We don't release the locks here, the caller is responsible for that */ + return CurrentRunningXacts; } @@ -1852,6 +1919,92 @@ GetOldestActiveTransactionId(void) return oldestRunningXid; } +/* + * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum + * + * Returns the oldest xid that we can guarantee not to have been affected by + * vacuum, i.e. no rows >= that xid have been vacuumed away unless the + * transaction aborted. Note that the value can (and most of the time will) be + * much more conservative than what really has been affected by vacuum, but we + * currently don't have better data available. + * + * This is useful to initalize the cutoff xid after which a new changeset + * extraction replication slot can start decoding changes. + * + * Must be called with ProcArrayLock held either shared or exclusively, + * although most callers will want to use exclusive mode since it is expected + * that the caller will immediately use the xid to peg the xmin horizon. + */ +TransactionId +GetOldestSafeDecodingTransactionId(void) +{ + ProcArrayStruct *arrayP = procArray; + TransactionId oldestSafeXid; + int index; + bool recovery_in_progress = RecoveryInProgress(); + + Assert(LWLockHeldByMe(ProcArrayLock)); + + /* + * Acquire XidGenLock, so no transactions can acquire an xid while we're + * running. If no transaction with xid were running concurrently a new xid + * could influence the the RecentXmin et al. + * + * We initialize the computation to nextXid since that's guaranteed to be + * a safe, albeit pessimal, value. + */ + LWLockAcquire(XidGenLock, LW_SHARED); + oldestSafeXid = ShmemVariableCache->nextXid; + + /* + * If there's already a slot pegging the xmin horizon, we can start with + * that value, it's guaranteed to be safe since it's computed by this + * routine initally and has been enforced since. + */ + if (TransactionIdIsValid(procArray->replication_slot_catalog_xmin) && + TransactionIdPrecedes(procArray->replication_slot_catalog_xmin, + oldestSafeXid)) + oldestSafeXid = procArray->replication_slot_catalog_xmin; + + /* + * If we're not in recovery, we walk over the procarray and collect the + * lowest xid. Since we're called with ProcArrayLock held and have + * acquired XidGenLock, no entries can vanish concurrently, since + * PGXACT->xid is only set with XidGenLock held and only cleared with + * ProcArrayLock held. + * + * In recovery we can't lower the safe value besides what we've computed + * above, so we'll have to wait a bit longer there. We unfortunately can + * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids + * machinery can miss values and return an older value than is safe. + */ + if (!recovery_in_progress) + { + /* + * Spin over procArray collecting all min(PGXACT->xid) + */ + for (index = 0; index < arrayP->numProcs; index++) + { + int pgprocno = arrayP->pgprocnos[index]; + volatile PGXACT *pgxact = &allPgXact[pgprocno]; + TransactionId xid; + + /* Fetch xid just once - see GetNewTransactionId */ + xid = pgxact->xid; + + if (!TransactionIdIsNormal(xid)) + continue; + + if (TransactionIdPrecedes(xid, oldestSafeXid)) + oldestSafeXid = xid; + } + } + + LWLockRelease(XidGenLock); + + return oldestSafeXid; +} + /* * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are * delaying checkpoint because they have critical actions in progress. @@ -2523,10 +2676,39 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared) * replicaton slots. */ void -ProcArraySetReplicationSlotXmin(TransactionId xmin) +ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin, + bool already_locked) { - LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + Assert(!already_locked || LWLockHeldByMe(ProcArrayLock)); + + if (!already_locked) + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + procArray->replication_slot_xmin = xmin; + procArray->replication_slot_catalog_xmin = catalog_xmin; + + if (!already_locked) + LWLockRelease(ProcArrayLock); +} + +/* + * ProcArrayGetReplicationSlotXmin + * + * Return the current slot xmin limits. That's useful to be able to remove + * data that's older than those limits. + */ +void +ProcArrayGetReplicationSlotXmin(TransactionId *xmin, + TransactionId *catalog_xmin) +{ + LWLockAcquire(ProcArrayLock, LW_SHARED); + + if (xmin != NULL) + *xmin = procArray->replication_slot_xmin; + + if (catalog_xmin != NULL) + *catalog_xmin = procArray->replication_slot_catalog_xmin; + LWLockRelease(ProcArrayLock); } diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index fb5f18edfc..aa8bea5538 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -800,7 +800,9 @@ standby_redo(XLogRecPtr lsn, XLogRecord *record) /* * Log details of the current snapshot to WAL. This allows the snapshot state - * to be reconstructed on the standby. + * to be reconstructed on the standby and for logical decoding. + * + * This is used for Hot Standby as follows: * * We can move directly to STANDBY_SNAPSHOT_READY at startup if we * start from a shutdown checkpoint because we know nothing was running @@ -854,6 +856,12 @@ standby_redo(XLogRecPtr lsn, XLogRecord *record) * Zero xids should no longer be possible, but we may be replaying WAL * from a time when they were possible. * + * For logical decoding only the running xacts information is needed; + * there's no need to look at the locking information, but it's logged anyway, + * as there's no independent knob to just enable logical decoding. For + * details of how this is used, check snapbuild.c's introductory comment. + * + * * Returns the RecPtr of the last inserted record. */ XLogRecPtr @@ -879,8 +887,28 @@ LogStandbySnapshot(void) * record we write, because standby will open up when it sees this. */ running = GetRunningTransactionData(); + + /* + * GetRunningTransactionData() acquired ProcArrayLock, we must release + * it. For Hot Standby this can be done before inserting the WAL record + * because ProcArrayApplyRecoveryInfo() rechecks the commit status using + * the clog. For logical decoding, though, the lock can't be released + * early becuase the clog might be "in the future" from the POV of the + * historic snapshot. This would allow for situations where we're waiting + * for the end of a transaction listed in the xl_running_xacts record + * which, according to the WAL, have commit before the xl_running_xacts + * record. Fortunately this routine isn't executed frequently, and it's + * only a shared lock. + */ + if (wal_level < WAL_LEVEL_LOGICAL) + LWLockRelease(ProcArrayLock); + recptr = LogCurrentRunningXacts(running); + /* Release lock if we kept it longer ... */ + if (wal_level >= WAL_LEVEL_LOGICAL) + LWLockRelease(ProcArrayLock); + /* GetRunningTransactionData() acquired XidGenLock, we must release it */ LWLockRelease(XidGenLock); diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index fa460ca82e..f595a0747c 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -781,10 +781,6 @@ ProcKill(int code, Datum arg) /* Make sure we're out of the sync rep lists */ SyncRepCleanupAtProcExit(); - /* Make sure active replication slots are released */ - if (MyReplicationSlot != NULL) - ReplicationSlotRelease(); - #ifdef USE_ASSERT_CHECKING if (assert_enabled) { @@ -803,6 +799,10 @@ ProcKill(int code, Datum arg) */ LWLockReleaseAll(); + /* Make sure active replication slots are released */ + if (MyReplicationSlot != NULL) + ReplicationSlotRelease(); + /* * Clear MyProc first; then disown the process latch. This is so that * signal handlers won't try to clear the process latch after it's no diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index a230d7eda6..be961017d6 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -55,6 +55,7 @@ #include "pg_getopt.h" #include "postmaster/autovacuum.h" #include "postmaster/postmaster.h" +#include "replication/slot.h" #include "replication/walsender.h" #include "rewrite/rewriteHandler.h" #include "storage/bufmgr.h" @@ -3853,6 +3854,16 @@ PostgresMain(int argc, char *argv[], if (am_walsender) WalSndErrorCleanup(); + /* + * We can't release replication slots inside AbortTransaction() as we + * need to be able to start and abort transactions while having a slot + * acquired. But we never need to hold them across top level errors, + * so releasing here is fine. There's another cleanup in ProcKill() + * ensuring we'll correctly cleanup on FATAL errors as well. + */ + if (MyReplicationSlot != NULL) + ReplicationSlotRelease(); + /* * Now return to normal top-level context and clear ErrorContext for * next time. diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 4423fe01bd..115bcac5d2 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -512,7 +512,7 @@ RegisterSnapshotInvalidation(Oid dbId, Oid relId) * Only the local caches are flushed; this does not transmit the message * to other backends. */ -static void +void LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) { if (msg->id >= 0) @@ -596,7 +596,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) * since that tells us we've lost some shared-inval messages and hence * don't know what needs to be invalidated. */ -static void +void InvalidateSystemCaches(void) { int i; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 2810b35eea..32313244ad 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -73,6 +73,7 @@ #include "utils/memutils.h" #include "utils/relmapper.h" #include "utils/resowner_private.h" +#include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/tqual.h" @@ -235,7 +236,7 @@ static void formrdesc(const char *relationName, Oid relationReltype, bool isshared, bool hasoids, int natts, const FormData_pg_attribute *attrs); -static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK); +static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic); static Relation AllocateRelationDesc(Form_pg_class relp); static void RelationParseRelOptions(Relation relation, HeapTuple tuple); static void RelationBuildTupleDesc(Relation relation); @@ -274,12 +275,13 @@ static void unlink_initfile(const char *initfilename); * and must eventually be freed with heap_freetuple. */ static HeapTuple -ScanPgRelation(Oid targetRelId, bool indexOK) +ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_historic) { HeapTuple pg_class_tuple; Relation pg_class_desc; SysScanDesc pg_class_scan; ScanKeyData key[1]; + Snapshot snapshot; /* * If something goes wrong during backend startup, we might find ourselves @@ -305,9 +307,20 @@ ScanPgRelation(Oid targetRelId, bool indexOK) * scan by setting indexOK == false. */ pg_class_desc = heap_open(RelationRelationId, AccessShareLock); + + /* + * The caller might need a tuple that's newer than the one the historic + * snapshot; currently the only case requiring to do so is looking up the + * relfilenode of non mapped system relations during decoding. + */ + if (force_non_historic) + snapshot = GetNonHistoricCatalogSnapshot(RelationRelationId); + else + snapshot = GetCatalogSnapshot(RelationRelationId); + pg_class_scan = systable_beginscan(pg_class_desc, ClassOidIndexId, indexOK && criticalRelcachesBuilt, - NULL, + snapshot, 1, key); pg_class_tuple = systable_getnext(pg_class_scan); @@ -836,7 +849,7 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) /* * find the tuple in pg_class corresponding to the given relation id */ - pg_class_tuple = ScanPgRelation(targetRelId, true); + pg_class_tuple = ScanPgRelation(targetRelId, true, false); /* * if no such tuple exists, return NULL @@ -989,8 +1002,42 @@ RelationInitPhysicalAddr(Relation relation) relation->rd_node.dbNode = InvalidOid; else relation->rd_node.dbNode = MyDatabaseId; + if (relation->rd_rel->relfilenode) + { + /* + * Even if we are using a decoding snapshot that doesn't represent + * the current state of the catalog we need to make sure the + * filenode points to the current file since the older file will + * be gone (or truncated). The new file will still contain older + * rows so lookups in them will work correctly. This wouldn't work + * correctly if rewrites were allowed to change the schema in a + * noncompatible way, but those are prevented both on catalog + * tables and on user tables declared as additional catalog + * tables. + */ + if (HistoricSnapshotActive() + && RelationIsAccessibleInLogicalDecoding(relation) + && IsTransactionState()) + { + HeapTuple phys_tuple; + Form_pg_class physrel; + + phys_tuple = ScanPgRelation(RelationGetRelid(relation), + RelationGetRelid(relation) != ClassOidIndexId, + true); + if (!HeapTupleIsValid(phys_tuple)) + elog(ERROR, "could not find pg_class entry for %u", + RelationGetRelid(relation)); + physrel = (Form_pg_class) GETSTRUCT(phys_tuple); + + relation->rd_rel->reltablespace = physrel->reltablespace; + relation->rd_rel->relfilenode = physrel->relfilenode; + heap_freetuple(phys_tuple); + } + relation->rd_node.relNode = relation->rd_rel->relfilenode; + } else { /* Consult the relation mapper */ @@ -1742,7 +1789,7 @@ RelationReloadIndexInfo(Relation relation) * for pg_class_oid_index ... */ indexOK = (RelationGetRelid(relation) != ClassOidIndexId); - pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK); + pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), indexOK, false); if (!HeapTupleIsValid(pg_class_tuple)) elog(ERROR, "could not find pg_class tuple for index %u", RelationGetRelid(relation)); diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 4c0e0accc1..4146527d2f 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -19,6 +19,10 @@ * have regd_count = 1 and are counted in RegisteredSnapshots, but are not * tracked by any resource owner. * + * The same is true for historic snapshots used during logical decoding, + * their lifetime is managed separately (as they life longer as one xact.c + * transaction). + * * These arrangements let us reset MyPgXact->xmin when there are no snapshots * referenced by this transaction. (One possible improvement would be to be * able to advance Xmin when the snapshot with the earliest Xmin is no longer @@ -69,12 +73,13 @@ */ static SnapshotData CurrentSnapshotData = {HeapTupleSatisfiesMVCC}; static SnapshotData SecondarySnapshotData = {HeapTupleSatisfiesMVCC}; -static SnapshotData CatalogSnapshotData = {HeapTupleSatisfiesMVCC}; +SnapshotData CatalogSnapshotData = {HeapTupleSatisfiesMVCC}; /* Pointers to valid snapshots */ static Snapshot CurrentSnapshot = NULL; static Snapshot SecondarySnapshot = NULL; static Snapshot CatalogSnapshot = NULL; +static Snapshot HistoricSnapshot = NULL; /* * Staleness detection for CatalogSnapshot. @@ -86,13 +91,18 @@ static bool CatalogSnapshotStale = true; * for the convenience of TransactionIdIsInProgress: even in bootstrap * mode, we don't want it to say that BootstrapTransactionId is in progress. * - * RecentGlobalXmin is initialized to InvalidTransactionId, to ensure that no - * one tries to use a stale value. Readers should ensure that it has been set - * to something else before using it. + * RecentGlobalXmin and RecentGlobalDataXmin are initialized to + * InvalidTransactionId, to ensure that no one tries to use a stale + * value. Readers should ensure that it has been set to something else + * before using it. */ TransactionId TransactionXmin = FirstNormalTransactionId; TransactionId RecentXmin = FirstNormalTransactionId; TransactionId RecentGlobalXmin = InvalidTransactionId; +TransactionId RecentGlobalDataXmin = InvalidTransactionId; + +/* (table, ctid) => (cmin, cmax) mapping during timetravel */ +static HTAB *tuplecid_data = NULL; /* * Elements of the active snapshot stack. @@ -158,6 +168,18 @@ static void SnapshotResetXmin(void); Snapshot GetTransactionSnapshot(void) { + /* + * Return historic snapshot if doing logical decoding. We'll never + * need a non-historic transaction snapshot in this (sub-)transaction, so + * there's no need to be careful to set one up for later calls to + * GetTransactionSnapshot(). + */ + if (HistoricSnapshotActive()) + { + Assert(!FirstSnapshotSet); + return HistoricSnapshot; + } + /* First call in transaction? */ if (!FirstSnapshotSet) { @@ -214,6 +236,13 @@ GetTransactionSnapshot(void) Snapshot GetLatestSnapshot(void) { + /* + * So far there are no cases requiring support for GetLatestSnapshot() + * during logical decoding, but it wouldn't be hard to add if + * required. + */ + Assert(!HistoricSnapshotActive()); + /* If first call in transaction, go ahead and set the xact snapshot */ if (!FirstSnapshotSet) return GetTransactionSnapshot(); @@ -230,6 +259,26 @@ GetLatestSnapshot(void) */ Snapshot GetCatalogSnapshot(Oid relid) +{ + /* + * Return historic snapshot if we're doing logical decoding, but + * return a non-historic, snapshot if we temporarily are doing up2date + * lookups. + */ + if (HistoricSnapshotActive()) + return HistoricSnapshot; + + return GetNonHistoricCatalogSnapshot(relid); +} + +/* + * GetNonHistoricCatalogSnapshot + * Get a snapshot that is sufficiently up-to-date for scan of the system + * catalog with the specified OID, even while historic snapshots are set + * up. + */ +Snapshot +GetNonHistoricCatalogSnapshot(Oid relid) { /* * If the caller is trying to scan a relation that has no syscache, @@ -303,6 +352,7 @@ SetTransactionSnapshot(Snapshot sourcesnap, TransactionId sourcexid) Assert(RegisteredSnapshots == 0); Assert(FirstXactSnapshot == NULL); + Assert(HistoricSnapshotActive()); /* * Even though we are not going to use the snapshot it computes, we must @@ -796,7 +846,7 @@ AtEOXact_Snapshot(bool isCommit) * Returns the token (the file name) that can be used to import this * snapshot. */ -static char * +char * ExportSnapshot(Snapshot snapshot) { TransactionId topXid; @@ -1258,3 +1308,45 @@ ThereAreNoPriorRegisteredSnapshots(void) return false; } + +/* + * Setup a snapshot that replaces normal catalog snapshots that allows catalog + * access to behave just like it did at a certain point in the past. + * + * Needed for logical decoding. + */ +void +SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids) +{ + Assert(historic_snapshot != NULL); + + /* setup the timetravel snapshot */ + HistoricSnapshot = historic_snapshot; + + /* setup (cmin, cmax) lookup hash */ + tuplecid_data = tuplecids; +} + + +/* + * Make catalog snapshots behave normally again. + */ +void +TeardownHistoricSnapshot(bool is_error) +{ + HistoricSnapshot = NULL; + tuplecid_data = NULL; +} + +bool +HistoricSnapshotActive(void) +{ + return HistoricSnapshot != NULL; +} + +HTAB * +HistoricSnapshotGetTupleCids(void) +{ + Assert(HistoricSnapshotActive()); + return tuplecid_data; +} diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index f626755257..c4732ed311 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -62,6 +62,9 @@ #include "access/xact.h" #include "storage/bufmgr.h" #include "storage/procarray.h" +#include "utils/builtins.h" +#include "utils/combocid.h" +#include "utils/snapmgr.h" #include "utils/tqual.h" @@ -73,7 +76,6 @@ SnapshotData SnapshotToastData = {HeapTupleSatisfiesToast}; /* local functions */ static bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot); - /* * SetHintBits() * @@ -1545,3 +1547,163 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) */ return true; } + +/* + * check whether the transaciont id 'xid' in in the pre-sorted array 'xip'. + */ +static bool +TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num) +{ + return bsearch(&xid, xip, num, + sizeof(TransactionId), xidComparator) != NULL; +} + +/* + * See the comments for HeapTupleSatisfiesMVCC for the semantics this function + * obeys. + * + * Only usable on tuples from catalog tables! + * + * We don't need to support HEAP_MOVED_(IN|OFF) for now because we only support + * reading catalog pages which couldn't have been created in an older version. + * + * We don't set any hint bits in here as it seems unlikely to be beneficial as + * those should already be set by normal access and it seems to be too + * dangerous to do so as the semantics of doing so during timetravel are more + * complicated than when dealing "only" with the present. + */ +bool +HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, + Buffer buffer) +{ + HeapTupleHeader tuple = htup->t_data; + TransactionId xmin = HeapTupleHeaderGetXmin(tuple); + TransactionId xmax = HeapTupleHeaderGetRawXmax(tuple); + + Assert(ItemPointerIsValid(&htup->t_self)); + Assert(htup->t_tableOid != InvalidOid); + + /* inserting transaction aborted */ + if (HeapTupleHeaderXminInvalid(tuple)) + { + Assert(!TransactionIdDidCommit(xmin)); + return false; + } + /* check if its one of our txids, toplevel is also in there */ + else if (TransactionIdInArray(xmin, snapshot->subxip, snapshot->subxcnt)) + { + bool resolved; + CommandId cmin = HeapTupleHeaderGetRawCommandId(tuple); + CommandId cmax = InvalidCommandId; + + /* + * another transaction might have (tried to) delete this tuple or + * cmin/cmax was stored in a combocid. S we need to to lookup the + * actual values externally. + */ + resolved = ResolveCminCmaxDuringDecoding(HistoricSnapshotGetTupleCids(), snapshot, + htup, buffer, + &cmin, &cmax); + + if (!resolved) + elog(ERROR, "could not resolve cmin/cmax of catalog tuple"); + + Assert(cmin != InvalidCommandId); + + if (cmin >= snapshot->curcid) + return false; /* inserted after scan started */ + /* fall through */ + } + /* committed before our xmin horizon. Do a normal visibility check. */ + else if (TransactionIdPrecedes(xmin, snapshot->xmin)) + { + Assert(!(HeapTupleHeaderXminCommitted(tuple) && + !TransactionIdDidCommit(xmin))); + + /* check for hint bit first, consult clog afterwards */ + if (!HeapTupleHeaderXminCommitted(tuple) && + !TransactionIdDidCommit(xmin)) + return false; + /* fall through */ + } + /* beyond our xmax horizon, i.e. invisible */ + else if (TransactionIdFollowsOrEquals(xmin, snapshot->xmax)) + { + return false; + } + /* check if it's a committed transaction in [xmin, xmax) */ + else if(TransactionIdInArray(xmin, snapshot->xip, snapshot->xcnt)) + { + /* fall through */ + } + /* + * none of the above, i.e. between [xmin, xmax) but hasn't + * committed. I.e. invisible. + */ + else + { + return false; + } + + /* at this point we know xmin is visible, go on to check xmax */ + + /* xid invalid or aborted */ + if (tuple->t_infomask & HEAP_XMAX_INVALID) + return true; + /* locked tuples are always visible */ + else if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) + return true; + /* + * We can see multis here if we're looking at user tables or if + * somebody SELECT ... FOR SHARE/UPDATE a system table. + */ + else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) + { + xmax = HeapTupleGetUpdateXid(tuple); + } + + /* check if its one of our txids, toplevel is also in there */ + if (TransactionIdInArray(xmax, snapshot->subxip, snapshot->subxcnt)) + { + bool resolved; + CommandId cmin; + CommandId cmax = HeapTupleHeaderGetRawCommandId(tuple); + + /* Lookup actual cmin/cmax values */ + resolved = ResolveCminCmaxDuringDecoding(HistoricSnapshotGetTupleCids(), snapshot, + htup, buffer, + &cmin, &cmax); + + if (!resolved) + elog(ERROR, "could not resolve combocid to cmax"); + + Assert(cmax != InvalidCommandId); + + if (cmax >= snapshot->curcid) + return true; /* deleted after scan started */ + else + return false; /* deleted before scan started */ + } + /* below xmin horizon, normal transaction state is valid */ + else if (TransactionIdPrecedes(xmax, snapshot->xmin)) + { + Assert(!(tuple->t_infomask & HEAP_XMAX_COMMITTED && + !TransactionIdDidCommit(xmax))); + + /* check hint bit first */ + if (tuple->t_infomask & HEAP_XMAX_COMMITTED) + return false; + + /* check clog */ + return !TransactionIdDidCommit(xmax); + } + /* above xmax horizon, we cannot possibly see the deleting transaction */ + else if (TransactionIdFollowsOrEquals(xmax, snapshot->xmax)) + return true; + /* xmax is between [xmin, xmax), check known committed array */ + else if (TransactionIdInArray(xmax, snapshot->xip, snapshot->xcnt)) + return false; + /* xmax is between [xmin, xmax), but known not to have committed yet */ + else + return true; +} diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 71248ee1bc..a7d5f7a153 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -198,7 +198,10 @@ static const char *subdirs[] = { "pg_replslot", "pg_tblspc", "pg_stat", - "pg_stat_tmp" + "pg_stat_tmp", + "pg_llog", + "pg_llog/snapshots", + "pg_llog/mappings" }; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index bfdadc3d5b..0f802577c7 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -164,8 +164,7 @@ extern void heap_restrpos(HeapScanDesc scan); extern void heap_sync(Relation relation); /* in heap/pruneheap.c */ -extern void heap_page_prune_opt(Relation relation, Buffer buffer, - TransactionId OldestXmin); +extern void heap_page_prune_opt(Relation relation, Buffer buffer); extern int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, bool report_stats, TransactionId *latestRemovedXid); diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index d4383ab2cb..194635952c 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -48,7 +48,7 @@ * the ones above associated with RM_HEAP_ID. XLOG_HEAP_OPMASK applies to * these, too. */ -/* 0x00 is free, was XLOG_HEAP2_FREEZE */ +#define XLOG_HEAP2_REWRITE 0x00 #define XLOG_HEAP2_CLEAN 0x10 #define XLOG_HEAP2_FREEZE_PAGE 0x20 #define XLOG_HEAP2_CLEANUP_INFO 0x30 @@ -332,6 +332,17 @@ typedef struct xl_heap_new_cid xl_heaptid target; } xl_heap_new_cid; +/* logical rewrite xlog record header */ +typedef struct xl_heap_rewrite_mapping +{ + TransactionId mapped_xid; /* xid that might need to see the row */ + Oid mapped_db; /* DbOid or InvalidOid for shared rels */ + Oid mapped_rel; /* Oid of the mapped relation */ + off_t offset; /* How far have we written so far */ + uint32 num_mappings; /* Number of in-memory mappings */ + XLogRecPtr start_lsn; /* Insert LSN at begin of rewrite */ +} xl_heap_rewrite_mapping; + #define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target) + SizeOfHeapTid) extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, @@ -341,6 +352,7 @@ extern void heap_redo(XLogRecPtr lsn, XLogRecord *rptr); extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec); extern void heap2_redo(XLogRecPtr lsn, XLogRecord *rptr); extern void heap2_desc(StringInfo buf, uint8 xl_info, char *rec); +extern void heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r); extern XLogRecPtr log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid); diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h index d098a0b171..07df3b4f2b 100644 --- a/src/include/access/rewriteheap.h +++ b/src/include/access/rewriteheap.h @@ -14,12 +14,14 @@ #define REWRITE_HEAP_H #include "access/htup.h" +#include "storage/itemptr.h" +#include "storage/relfilenode.h" #include "utils/relcache.h" /* struct definition is private to rewriteheap.c */ typedef struct RewriteStateData *RewriteState; -extern RewriteState begin_heap_rewrite(Relation NewHeap, +extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap, TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff, bool use_wal); extern void end_heap_rewrite(RewriteState state); @@ -27,4 +29,29 @@ extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple, HeapTuple newTuple); extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple); +/* + * On-Disk data format for an individual logical rewrite mapping. + */ +typedef struct LogicalRewriteMappingData +{ + RelFileNode old_node; + RelFileNode new_node; + ItemPointerData old_tid; + ItemPointerData new_tid; +} LogicalRewriteMappingData; + +/* --- + * The filename consists out of the following, dash separated, + * components: + * 1) database oid or InvalidOid for shared relations + * 2) the oid of the relation + * 3) xid we are mapping for + * 4) upper 32bit of the LSN at which a rewrite started + * 5) lower 32bit of the LSN at which a rewrite started + * 6) xid of the xact performing the mapping + * --- + */ +#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x-%x" +void CheckPointLogicalRewriteHeap(void); + #endif /* REWRITE_HEAP_H */ diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 8376dfd669..a9774e9f59 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -63,6 +63,11 @@ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ (int32) ((id1) - (id2)) < 0) +/* compare two XIDs already known to be normal; this is a macro for speed */ +#define NormalTransactionIdFollows(id1, id2) \ + (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ + (int32) ((id1) - (id2)) > 0) + /* ---------- * Object ID (OID) zero is InvalidOid. * diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h index 5adf4f2816..296d016c9f 100644 --- a/src/include/access/tuptoaster.h +++ b/src/include/access/tuptoaster.h @@ -98,9 +98,34 @@ /* Size of an EXTERNAL datum that contains a standard TOAST pointer */ #define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_external)) -/* Size of an indirect datum that contains an indirect TOAST pointer */ +/* Size of an indirect datum that contains a standard TOAST pointer */ #define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(struct varatt_indirect)) +/* + * Testing whether an externally-stored value is compressed now requires + * comparing extsize (the actual length of the external data) to rawsize + * (the original uncompressed datum's size). The latter includes VARHDRSZ + * overhead, the former doesn't. We never use compression unless it actually + * saves space, so we expect either equality or less-than. + */ +#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \ + ((toast_pointer).va_extsize < (toast_pointer).va_rawsize - VARHDRSZ) + +/* + * Macro to fetch the possibly-unaligned contents of an EXTERNAL datum + * into a local "struct varatt_external" toast pointer. This should be + * just a memcpy, but some versions of gcc seem to produce broken code + * that assumes the datum contents are aligned. Introducing an explicit + * intermediate "varattrib_1b_e *" variable seems to fix it. + */ +#define VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr) \ +do { \ + varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \ + Assert(VARATT_IS_EXTERNAL(attre)); \ + Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \ + memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \ +} while (0) + /* ---------- * toast_insert_or_update - * diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 11ab277199..a238292b76 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -288,6 +288,7 @@ extern int XLogFileOpen(XLogSegNo segno); extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std); extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli); +extern XLogSegNo XLogGetLastRemovedSegno(void); extern void XLogSetAsyncXactLSN(XLogRecPtr record); extern void XLogSetReplicationSlotMinimumLSN(XLogRecPtr lsn); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 80560574bf..22dd0fc58e 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201403031 +#define CATALOG_VERSION_NO 201403032 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 7a11721ba4..c570651872 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4804,8 +4804,18 @@ DATA(insert OID = 3779 ( pg_create_physical_replication_slot PGNSP PGUID 12 1 0 DESCR("create a physical replication slot"); DATA(insert OID = 3780 ( pg_drop_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f f f v 1 0 2278 "19" _null_ _null_ _null_ _null_ pg_drop_replication_slot _null_ _null_ _null_ )); DESCR("drop a replication slot"); -DATA(insert OID = 3781 ( pg_get_replication_slots PGNSP PGUID 12 1 10 0 0 f f f f f t s 0 0 2249 "" "{19,25,26,16,28,3220}" "{o,o,o,o,o,o}" "{slot_name,slot_type,datoid,active,xmin,restart_lsn}" _null_ pg_get_replication_slots _null_ _null_ _null_ )); +DATA(insert OID = 3781 ( pg_get_replication_slots PGNSP PGUID 12 1 10 0 0 f f f f f t s 0 0 2249 "" "{19,19,25,26,16,28,28,3220}" "{o,o,o,o,o,o,o,o}" "{slot_name,plugin,slot_type,datoid,active,xmin,catalog_xmin,restart_lsn}" _null_ pg_get_replication_slots _null_ _null_ _null_ )); DESCR("information about replication slots currently in use"); +DATA(insert OID = 3786 ( pg_create_logical_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f f f v 2 0 2249 "19 19" "{19,19,25,3220}" "{i,i,o,o}" "{slotname,plugin,slotname,xlog_position}" _null_ pg_create_logical_replication_slot _null_ _null_ _null_ )); +DESCR("set up a logical replication slot"); +DATA(insert OID = 3782 ( pg_logical_slot_get_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,25}" "{i,i,i,v,o,o,o}" "{slotname,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ pg_logical_slot_get_changes _null_ _null_ _null_ )); +DESCR("get changes from replication slot"); +DATA(insert OID = 3783 ( pg_logical_slot_get_binary_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,17}" "{i,i,i,v,o,o,o}" "{slotname,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ pg_logical_slot_get_binary_changes _null_ _null_ _null_ )); +DESCR("get binary changes from replication slot"); +DATA(insert OID = 3784 ( pg_logical_slot_peek_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,25}" "{i,i,i,v,o,o,o}" "{slotname,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ pg_logical_slot_peek_changes _null_ _null_ _null_ )); +DESCR("peek at changes from replication slot"); +DATA(insert OID = 3785 ( pg_logical_slot_peek_binary_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,17}" "{i,i,i,v,o,o,o}" "{slotname,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ pg_logical_slot_peek_binary_changes _null_ _null_ _null_ )); +DESCR("peek at binary changes from replication slot"); /* event triggers */ DATA(insert OID = 3566 ( pg_event_trigger_dropped_objects PGNSP PGUID 12 10 100 0 0 f f f f t t s 0 0 2249 "" "{26,26,23,25,25,25,25}" "{o,o,o,o,o,o,o}" "{classid, objid, objsubid, object_type, schema_name, object_name, object_identity}" _null_ pg_event_trigger_dropped_objects _null_ _null_ _null_ )); diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 70350e02cb..058dc5f667 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -157,10 +157,10 @@ extern void vac_update_relstats(Relation relation, bool hasindex, TransactionId frozenxid, MultiXactId minmulti); -extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age, +extern void vacuum_set_xid_limits(Relation rel, + int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, - bool sharedRel, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, diff --git a/src/include/replication/decode.h b/src/include/replication/decode.h new file mode 100644 index 0000000000..7f55d789a2 --- /dev/null +++ b/src/include/replication/decode.h @@ -0,0 +1,19 @@ +/*------------------------------------------------------------------------- + * decode.h + * PostgreSQL WAL to logical transformation + * + * Portions Copyright (c) 2012-2014, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ +#ifndef DECODE_H +#define DECODE_H + +#include "access/xlogreader.h" +#include "replication/reorderbuffer.h" +#include "replication/logical.h" + +void LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, + XLogRecord *record); + +#endif diff --git a/src/include/replication/logical.h b/src/include/replication/logical.h new file mode 100644 index 0000000000..e65c8b8075 --- /dev/null +++ b/src/include/replication/logical.h @@ -0,0 +1,100 @@ +/*------------------------------------------------------------------------- + * logical.h + * PostgreSQL logical decoding coordination + * + * Copyright (c) 2012-2014, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ +#ifndef LOGICAL_H +#define LOGICAL_H + +#include "replication/slot.h" + +#include "access/xlog.h" +#include "access/xlogreader.h" +#include "replication/output_plugin.h" + +struct LogicalDecodingContext; + +typedef void (*LogicalOutputPluginWriterWrite) ( + struct LogicalDecodingContext *lr, + XLogRecPtr Ptr, + TransactionId xid, + bool last_write +); + +typedef LogicalOutputPluginWriterWrite LogicalOutputPluginWriterPrepareWrite; + +typedef struct LogicalDecodingContext +{ + /* memory context this is all allocated in */ + MemoryContext context; + + /* infrastructure pieces */ + XLogReaderState *reader; + ReplicationSlot *slot; + struct ReorderBuffer *reorder; + struct SnapBuild *snapshot_builder; + + OutputPluginCallbacks callbacks; + OutputPluginOptions options; + + /* + * User specified options + */ + List *output_plugin_options; + + /* + * User-Provided callback for writing/streaming out data. + */ + LogicalOutputPluginWriterPrepareWrite prepare_write; + LogicalOutputPluginWriterWrite write; + + /* + * Output buffer. + */ + StringInfo out; + + /* + * Private data pointer of the output plugin. + */ + void *output_plugin_private; + + /* + * Private data pointer for the data writer. + */ + void *output_writer_private; + + /* + * State for writing output. + */ + bool accept_writes; + bool prepared_write; + XLogRecPtr write_location; + TransactionId write_xid; +} LogicalDecodingContext; + +extern void CheckLogicalDecodingRequirements(void); + +extern LogicalDecodingContext *CreateInitDecodingContext(char *plugin, + List *output_plugin_options, + XLogPageReadCB read_page, + LogicalOutputPluginWriterPrepareWrite prepare_write, + LogicalOutputPluginWriterWrite do_write); +extern LogicalDecodingContext *CreateDecodingContext( + XLogRecPtr start_lsn, + List *output_plugin_options, + XLogPageReadCB read_page, + LogicalOutputPluginWriterPrepareWrite prepare_write, + LogicalOutputPluginWriterWrite do_write); +extern void DecodingContextFindStartpoint(LogicalDecodingContext *ctx); +extern bool DecodingContextReady(LogicalDecodingContext *ctx); +extern void FreeDecodingContext(LogicalDecodingContext *ctx); + +extern void LogicalIncreaseXminForSlot(XLogRecPtr lsn, TransactionId xmin); +extern void LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, + XLogRecPtr restart_lsn); +extern void LogicalConfirmReceivedLocation(XLogRecPtr lsn); + +#endif diff --git a/src/include/replication/logicalfuncs.h b/src/include/replication/logicalfuncs.h new file mode 100644 index 0000000000..21bf44ec4b --- /dev/null +++ b/src/include/replication/logicalfuncs.h @@ -0,0 +1,24 @@ +/*------------------------------------------------------------------------- + * logicalfuncs.h + * PostgreSQL WAL to logical transformation support functions + * + * Copyright (c) 2012-2014, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ +#ifndef LOGICALFUNCS_H +#define LOGICALFUNCS_H + +#include "replication/logical.h" + +extern int logical_read_local_xlog_page(XLogReaderState *state, + XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, + char *cur_page, TimeLineID *pageTLI); + +extern Datum pg_logical_slot_get_changes(PG_FUNCTION_ARGS); +extern Datum pg_logical_slot_get_binary_changes(PG_FUNCTION_ARGS); +extern Datum pg_logical_slot_peek_changes(PG_FUNCTION_ARGS); +extern Datum pg_logical_slot_peek_binary_changes(PG_FUNCTION_ARGS); + +#endif diff --git a/src/include/replication/output_plugin.h b/src/include/replication/output_plugin.h new file mode 100644 index 0000000000..c47c24c8db --- /dev/null +++ b/src/include/replication/output_plugin.h @@ -0,0 +1,98 @@ +/*------------------------------------------------------------------------- + * output_plugin.h + * PostgreSQL Logical Decode Plugin Interface + * + * Copyright (c) 2012-2014, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ +#ifndef OUTPUT_PLUGIN_H +#define OUTPUT_PLUGIN_H + +#include "replication/reorderbuffer.h" + +struct LogicalDecodingContext; +struct OutputPluginCallbacks; + +typedef enum OutputPluginOutputType +{ + OUTPUT_PLUGIN_BINARY_OUTPUT, + OUTPUT_PLUGIN_TEXTUAL_OUTPUT +} OutputPluginOutputType; + +/* + * Options set by the output plugin, in the startup callback. + */ +typedef struct OutputPluginOptions +{ + OutputPluginOutputType output_type; +} OutputPluginOptions; + +/* + * Type of the shared library symbol _PG_output_plugin_init that is looked up + * when loading an output plugin shared library. + */ +typedef void (*LogicalOutputPluginInit)(struct OutputPluginCallbacks *cb); + +/* + * Callback that gets called in a user-defined plugin. ctx->private_data can + * be set to some private data. + * + * "is_init" will be set to "true" if the decoding slot just got defined. When + * the same slot is used from there one, it will be "false". + */ +typedef void (*LogicalDecodeStartupCB) ( + struct LogicalDecodingContext *ctx, + OutputPluginOptions *options, + bool is_init +); + +/* + * Callback called for every (explicit or implicit) BEGIN of a successful + * transaction. + */ +typedef void (*LogicalDecodeBeginCB) ( + struct LogicalDecodingContext *, + ReorderBufferTXN *txn); + +/* + * Callback for every individual change in a successful transaction. + */ +typedef void (*LogicalDecodeChangeCB) ( + struct LogicalDecodingContext *, + ReorderBufferTXN *txn, + Relation relation, + ReorderBufferChange *change +); + +/* + * Called for every (explicit or implicit) COMMIT of a successful transaction. + */ +typedef void (*LogicalDecodeCommitCB) ( + struct LogicalDecodingContext *, + ReorderBufferTXN *txn, + XLogRecPtr commit_lsn); + +/* + * Called to shutdown an output plugin. + */ +typedef void (*LogicalDecodeShutdownCB) ( + struct LogicalDecodingContext * +); + +/* + * Output plugin callbacks + */ +typedef struct OutputPluginCallbacks +{ + LogicalDecodeStartupCB startup_cb; + LogicalDecodeBeginCB begin_cb; + LogicalDecodeChangeCB change_cb; + LogicalDecodeCommitCB commit_cb; + LogicalDecodeShutdownCB shutdown_cb; +} OutputPluginCallbacks; + +void OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write); +void OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write); + +#endif /* OUTPUT_PLUGIN_H */ diff --git a/src/include/replication/reorderbuffer.h b/src/include/replication/reorderbuffer.h new file mode 100644 index 0000000000..01eabfb7be --- /dev/null +++ b/src/include/replication/reorderbuffer.h @@ -0,0 +1,351 @@ +/* + * reorderbuffer.h + * PostgreSQL logical replay/reorder buffer management. + * + * Copyright (c) 2012-2014, PostgreSQL Global Development Group + * + * src/include/replication/reorderbuffer.h + */ +#ifndef REORDERBUFFER_H +#define REORDERBUFFER_H + +#include "access/htup_details.h" + +#include "lib/ilist.h" + +#include "storage/sinval.h" + +#include "utils/hsearch.h" +#include "utils/rel.h" +#include "utils/snapshot.h" +#include "utils/timestamp.h" + +/* an individual tuple, stored in one chunk of memory */ +typedef struct ReorderBufferTupleBuf +{ + /* position in preallocated list */ + slist_node node; + + /* tuple, stored sequentially */ + HeapTupleData tuple; + HeapTupleHeaderData header; + char data[MaxHeapTupleSize]; +} ReorderBufferTupleBuf; + +/* types of the change passed to a 'change' callback */ +enum ReorderBufferChangeType +{ + REORDER_BUFFER_CHANGE_INSERT, + REORDER_BUFFER_CHANGE_UPDATE, + REORDER_BUFFER_CHANGE_DELETE +}; + +/* + * a single 'change', can be an insert (with one tuple), an update (old, new), + * or a delete (old). + * + * The same struct is also used internally for other purposes but that should + * never be visible outside reorderbuffer.c. + */ +typedef struct ReorderBufferChange +{ + XLogRecPtr lsn; + + /* type of change */ + union + { + enum ReorderBufferChangeType action; + /* do not leak internal enum values to the outside */ + int action_internal; + }; + + /* + * Context data for the change, which part of the union is valid depends + * on action/action_internal. + */ + union + { + /* old, new tuples when action == *_INSERT|UPDATE|DELETE */ + struct + { + /* relation that has been changed */ + RelFileNode relnode; + /* valid for DELETE || UPDATE */ + ReorderBufferTupleBuf *oldtuple; + /* valid for INSERT || UPDATE */ + ReorderBufferTupleBuf *newtuple; + } tp; + + /* new snapshot */ + Snapshot snapshot; + + /* new command id for existing snapshot in a catalog changing tx */ + CommandId command_id; + + /* new cid mapping for catalog changing transaction */ + struct + { + RelFileNode node; + ItemPointerData tid; + CommandId cmin; + CommandId cmax; + CommandId combocid; + } tuplecid; + }; + + /* + * While in use this is how a change is linked into a transactions, + * otherwise it's the preallocated list. + */ + dlist_node node; +} ReorderBufferChange; + +typedef struct ReorderBufferTXN +{ + /* + * The transactions transaction id, can be a toplevel or sub xid. + */ + TransactionId xid; + + /* did the TX have catalog changes */ + bool has_catalog_changes; + + /* + * Do we know this is a subxact? + */ + bool is_known_as_subxact; + + /* + * LSN of the first data carrying, WAL record with knowledge about this + * xid. This is allowed to *not* be first record adorned with this xid, if + * the previous records aren't relevant for logical decoding. + */ + XLogRecPtr first_lsn; + + /* ---- + * LSN of the record that lead to this xact to be committed or + * aborted. This can be a + * * plain commit record + * * plain commit record, of a parent transaction + * * prepared transaction commit + * * plain abort record + * * prepared transaction abort + * * error during decoding + * ---- + */ + XLogRecPtr final_lsn; + + /* + * LSN pointing to the end of the commit record + 1. + */ + XLogRecPtr end_lsn; + + /* + * LSN of the last lsn at which snapshot information reside, so we can + * restart decoding from there and fully recover this transaction from + * WAL. + */ + XLogRecPtr restart_decoding_lsn; + + /* + * Commit time, only known when we read the actual commit record. + */ + TimestampTz commit_time; + + /* + * Base snapshot or NULL. + */ + Snapshot base_snapshot; + XLogRecPtr base_snapshot_lsn; + + /* + * How many ReorderBufferChange's do we have in this txn. + * + * Changes in subtransactions are *not* included but tracked separately. + */ + uint64 nentries; + + /* + * How many of the above entries are stored in memory in contrast to being + * spilled to disk. + */ + uint64 nentries_mem; + + /* + * List of ReorderBufferChange structs, including new Snapshots and new + * CommandIds + */ + dlist_head changes; + + /* + * List of (relation, ctid) => (cmin, cmax) mappings for catalog tuples. + * Those are always assigned to the toplevel transaction. (Keep track of + * #entries to create a hash of the right size) + */ + dlist_head tuplecids; + uint64 ntuplecids; + + /* + * On-demand built hash for looking up the above values. + */ + HTAB *tuplecid_hash; + + /* + * Hash containing (potentially partial) toast entries. NULL if no toast + * tuples have been found for the current change. + */ + HTAB *toast_hash; + + /* + * non-hierarchical list of subtransactions that are *not* aborted. Only + * used in toplevel transactions. + */ + dlist_head subtxns; + uint32 nsubtxns; + + /* + * Stored cache invalidations. This is not a linked list because we get + * all the invalidations at once. + */ + uint32 ninvalidations; + SharedInvalidationMessage *invalidations; + + /* --- + * Position in one of three lists: + * * list of subtransactions if we are *known* to be subxact + * * list of toplevel xacts (can be a as-yet unknown subxact) + * * list of preallocated ReorderBufferTXNs + * --- + */ + dlist_node node; + +} ReorderBufferTXN; + +/* so we can define the callbacks used inside struct ReorderBuffer itself */ +typedef struct ReorderBuffer ReorderBuffer; + +/* change callback signature */ +typedef void (*ReorderBufferApplyChangeCB) ( + ReorderBuffer *rb, + ReorderBufferTXN *txn, + Relation relation, + ReorderBufferChange *change); + +/* begin callback signature */ +typedef void (*ReorderBufferBeginCB) ( + ReorderBuffer *rb, + ReorderBufferTXN *txn); + +/* commit callback signature */ +typedef void (*ReorderBufferCommitCB) ( + ReorderBuffer *rb, + ReorderBufferTXN *txn, + XLogRecPtr commit_lsn); + +struct ReorderBuffer +{ + /* + * xid => ReorderBufferTXN lookup table + */ + HTAB *by_txn; + + /* + * Transactions that could be a toplevel xact, ordered by LSN of the first + * record bearing that xid.. + */ + dlist_head toplevel_by_lsn; + + /* + * one-entry sized cache for by_txn. Very frequently the same txn gets + * looked up over and over again. + */ + TransactionId by_txn_last_xid; + ReorderBufferTXN *by_txn_last_txn; + + /* + * Callacks to be called when a transactions commits. + */ + ReorderBufferBeginCB begin; + ReorderBufferApplyChangeCB apply_change; + ReorderBufferCommitCB commit; + + /* + * Pointer that will be passed untouched to the callbacks. + */ + void *private_data; + + /* + * Private memory context. + */ + MemoryContext context; + + /* + * Data structure slab cache. + * + * We allocate/deallocate some structures very frequently, to avoid bigger + * overhead we cache some unused ones here. + * + * The maximum number of cached entries is controlled by const variables + * ontop of reorderbuffer.c + */ + + /* cached ReorderBufferTXNs */ + dlist_head cached_transactions; + Size nr_cached_transactions; + + /* cached ReorderBufferChanges */ + dlist_head cached_changes; + Size nr_cached_changes; + + /* cached ReorderBufferTupleBufs */ + slist_head cached_tuplebufs; + Size nr_cached_tuplebufs; + + XLogRecPtr current_restart_decoding_lsn; + + /* buffer for disk<->memory conversions */ + char *outbuf; + Size outbufsize; +}; + + +ReorderBuffer *ReorderBufferAllocate(void); +void ReorderBufferFree(ReorderBuffer *); + +ReorderBufferTupleBuf *ReorderBufferGetTupleBuf(ReorderBuffer *); +void ReorderBufferReturnTupleBuf(ReorderBuffer *, ReorderBufferTupleBuf *tuple); +ReorderBufferChange *ReorderBufferGetChange(ReorderBuffer *); +void ReorderBufferReturnChange(ReorderBuffer *, ReorderBufferChange *); + +void ReorderBufferQueueChange(ReorderBuffer *, TransactionId, XLogRecPtr lsn, ReorderBufferChange *); +void ReorderBufferCommit(ReorderBuffer *, TransactionId, + XLogRecPtr commit_lsn, XLogRecPtr end_lsn, + TimestampTz commit_time); +void ReorderBufferAssignChild(ReorderBuffer *, TransactionId, TransactionId, XLogRecPtr commit_lsn); +void ReorderBufferCommitChild(ReorderBuffer *, TransactionId, TransactionId, + XLogRecPtr commit_lsn, XLogRecPtr end_lsn); +void ReorderBufferAbort(ReorderBuffer *, TransactionId, XLogRecPtr lsn); +void ReorderBufferAbortOld(ReorderBuffer *, TransactionId xid); +void ReorderBufferForget(ReorderBuffer *, TransactionId, XLogRecPtr lsn); + +void ReorderBufferSetBaseSnapshot(ReorderBuffer *, TransactionId, XLogRecPtr lsn, struct SnapshotData *snap); +void ReorderBufferAddSnapshot(ReorderBuffer *, TransactionId, XLogRecPtr lsn, struct SnapshotData *snap); +void ReorderBufferAddNewCommandId(ReorderBuffer *, TransactionId, XLogRecPtr lsn, + CommandId cid); +void ReorderBufferAddNewTupleCids(ReorderBuffer *, TransactionId, XLogRecPtr lsn, + RelFileNode node, ItemPointerData pt, + CommandId cmin, CommandId cmax, CommandId combocid); +void ReorderBufferAddInvalidations(ReorderBuffer *, TransactionId, XLogRecPtr lsn, + Size nmsgs, SharedInvalidationMessage *msgs); +bool ReorderBufferIsXidKnown(ReorderBuffer *, TransactionId xid); +void ReorderBufferXidSetCatalogChanges(ReorderBuffer *, TransactionId xid, XLogRecPtr lsn); +bool ReorderBufferXidHasCatalogChanges(ReorderBuffer *, TransactionId xid); +bool ReorderBufferXidHasBaseSnapshot(ReorderBuffer *, TransactionId xid); + +ReorderBufferTXN *ReorderBufferGetOldestTXN(ReorderBuffer *); + +void ReorderBufferSetRestartPoint(ReorderBuffer *, XLogRecPtr ptr); + +void StartupReorderBuffer(void); + +#endif diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index 089b0f4b70..c354c9133b 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -16,6 +16,24 @@ #include "storage/shmem.h" #include "storage/spin.h" +/* + * Behaviour of replication slots, upon release or crash. + * + * Slots marked as PERSISTENT are crashsafe and will not be dropped when + * released. Slots marked as EPHEMERAL will be dropped when released or after + * restarts. + * + * EPHEMERAL slots can be made PERSISTENT by calling ReplicationSlotPersist(). + */ +typedef enum ReplicationSlotPersistency +{ + RS_PERSISTENT, + RS_EPHEMERAL +} ReplicationSlotPersistency; + +/* + * On-Disk data of a replication slot, preserved across restarts. + */ typedef struct ReplicationSlotPersistentData { /* The slot's identifier */ @@ -24,6 +42,11 @@ typedef struct ReplicationSlotPersistentData /* database the slot is active on */ Oid database; + /* + * The slot's behaviour when being dropped (or restored after a crash). + */ + ReplicationSlotPersistency persistency; + /* * xmin horizon for data * @@ -32,9 +55,22 @@ typedef struct ReplicationSlotPersistentData */ TransactionId xmin; + /* + * xmin horizon for catalog tuples + * + * NB: This may represent a value that hasn't been written to disk yet; + * see notes for effective_xmin, below. + */ + TransactionId catalog_xmin; + /* oldest LSN that might be required by this replication slot */ XLogRecPtr restart_lsn; + /* oldest LSN that the client has acked receipt for */ + XLogRecPtr confirmed_flush; + + /* plugin name */ + NameData plugin; } ReplicationSlotPersistentData; /* @@ -67,12 +103,26 @@ typedef struct ReplicationSlot * same as the persistent value (data.xmin). */ TransactionId effective_xmin; + TransactionId effective_catalog_xmin; /* data surviving shutdowns and crashes */ ReplicationSlotPersistentData data; /* is somebody performing io on this slot? */ LWLock *io_in_progress_lock; + + /* all the remaining data is only used for logical slots */ + + /* ---- + * When the client has confirmed flushes >= candidate_xmin_lsn we can + * advance the catalog xmin, when restart_valid has been passed, + * restart_lsn can be increased. + * ---- + */ + TransactionId candidate_catalog_xmin; + XLogRecPtr candidate_xmin_lsn; + XLogRecPtr candidate_restart_valid; + XLogRecPtr candidate_restart_lsn; } ReplicationSlot; /* @@ -97,8 +147,11 @@ extern Size ReplicationSlotsShmemSize(void); extern void ReplicationSlotsShmemInit(void); /* management of individual slots */ -extern void ReplicationSlotCreate(const char *name, bool db_specific); +extern void ReplicationSlotCreate(const char *name, bool db_specific, + ReplicationSlotPersistency p); +extern void ReplicationSlotPersist(void); extern void ReplicationSlotDrop(const char *name); + extern void ReplicationSlotAcquire(const char *name); extern void ReplicationSlotRelease(void); extern void ReplicationSlotSave(void); @@ -106,15 +159,20 @@ extern void ReplicationSlotMarkDirty(void); /* misc stuff */ extern bool ReplicationSlotValidateName(const char *name, int elevel); -extern void ReplicationSlotsComputeRequiredXmin(void); +extern void ReplicationSlotsComputeRequiredXmin(bool already_locked); extern void ReplicationSlotsComputeRequiredLSN(void); +extern XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void); +extern bool ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive); + extern void StartupReplicationSlots(XLogRecPtr checkPointRedo); extern void CheckPointReplicationSlots(void); extern void CheckSlotRequirements(void); -extern void ReplicationSlotAtProcExit(void); /* SQL callable functions */ +extern Datum pg_create_physical_replication_slot(PG_FUNCTION_ARGS); +extern Datum pg_create_logical_replication_slot(PG_FUNCTION_ARGS); +extern Datum pg_drop_replication_slot(PG_FUNCTION_ARGS); extern Datum pg_get_replication_slots(PG_FUNCTION_ARGS); #endif /* SLOT_H */ diff --git a/src/include/replication/snapbuild.h b/src/include/replication/snapbuild.h new file mode 100644 index 0000000000..087c0e510d --- /dev/null +++ b/src/include/replication/snapbuild.h @@ -0,0 +1,83 @@ +/*------------------------------------------------------------------------- + * + * snapbuild.h + * Exports from replication/logical/snapbuild.c. + * + * Copyright (c) 2012-2014, PostgreSQL Global Development Group + * + * src/include/replication/snapbuild.h + * + *------------------------------------------------------------------------- + */ +#ifndef SNAPBUILD_H +#define SNAPBUILD_H + +#include "access/xlogdefs.h" +#include "utils/snapmgr.h" + +typedef enum +{ + /* + * Initial state, we can't do much yet. + */ + SNAPBUILD_START, + + /* + * We have collected enough information to decode tuples in transactions + * that started after this. + * + * Once we reached this we start to collect changes. We cannot apply them + * yet because the might be based on transactions that were still running + * when we reached them yet. + */ + SNAPBUILD_FULL_SNAPSHOT, + + /* + * Found a point after hitting built_full_snapshot where all transactions + * that were running at that point finished. Till we reach that we hold + * off calling any commit callbacks. + */ + SNAPBUILD_CONSISTENT +} SnapBuildState; + +/* forward declare so we don't have to expose the struct to the public */ +struct SnapBuild; +typedef struct SnapBuild SnapBuild; + +/* forward declare so we don't have to include reorderbuffer.h */ +struct ReorderBuffer; + +/* forward declare so we don't have to include heapam_xlog.h */ +struct xl_heap_new_cid; +struct xl_running_xacts; + +extern void CheckPointSnapBuild(void); + +extern SnapBuild *AllocateSnapshotBuilder(struct ReorderBuffer *cache, + TransactionId xmin_horizon, XLogRecPtr start_lsn); +extern void FreeSnapshotBuilder(SnapBuild *cache); + +extern void SnapBuildSnapDecRefcount(Snapshot snap); + +extern const char *SnapBuildExportSnapshot(SnapBuild *snapstate); +extern void SnapBuildClearExportedSnapshot(void); + +extern SnapBuildState SnapBuildCurrentState(SnapBuild *snapstate); + +extern bool SnapBuildXactNeedsSkip(SnapBuild *snapstate, XLogRecPtr ptr); + +extern void SnapBuildCommitTxn(SnapBuild *builder, XLogRecPtr lsn, + TransactionId xid, int nsubxacts, + TransactionId *subxacts); +extern void SnapBuildAbortTxn(SnapBuild *builder, XLogRecPtr lsn, + TransactionId xid, int nsubxacts, + TransactionId *subxacts); +extern bool SnapBuildProcessChange(SnapBuild *builder, TransactionId xid, + XLogRecPtr lsn); +extern void SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid, + XLogRecPtr lsn, struct xl_heap_new_cid *cid); +extern void SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, + struct xl_running_xacts *running); +extern void SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn); + +#endif /* SNAPBUILD_H */ diff --git a/src/include/storage/itemptr.h b/src/include/storage/itemptr.h index 67bbdbb988..0b81d53f5f 100644 --- a/src/include/storage/itemptr.h +++ b/src/include/storage/itemptr.h @@ -116,6 +116,9 @@ typedef ItemPointerData *ItemPointer; /* * ItemPointerCopy * Copies the contents of one disk item pointer to another. + * + * Should there ever be padding in an ItemPointer this would need to be handled + * differently as it's used as hash key. */ #define ItemPointerCopy(fromPointer, toPointer) \ ( \ diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index a3cadd9a01..5218b448cd 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -41,10 +41,12 @@ struct XidCache #define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */ #define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */ #define PROC_IN_ANALYZE 0x04 /* currently running analyze */ -#define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */ +#define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */ +#define PROC_IN_LOGICAL_DECODING 0x10 /* currently doing logical decoding */ /* flags reset at EOXact */ -#define PROC_VACUUM_STATE_MASK (0x0E) +#define PROC_VACUUM_STATE_MASK \ + (PROC_IN_VACUUM | PROC_IN_ANALYZE | PROC_VACUUM_FOR_WRAPAROUND) /* * We allow a small number of "weak" relation locks (AccesShareLock, diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index d1a58a3661..d0b4103a09 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -15,6 +15,7 @@ #define PROCARRAY_H #include "storage/standby.h" +#include "utils/relcache.h" #include "utils/snapshot.h" @@ -50,8 +51,9 @@ extern RunningTransactions GetRunningTransactionData(void); extern bool TransactionIdIsInProgress(TransactionId xid); extern bool TransactionIdIsActive(TransactionId xid); -extern TransactionId GetOldestXmin(bool allDbs, bool ignoreVacuum); +extern TransactionId GetOldestXmin(Relation rel, bool ignoreVacuum); extern TransactionId GetOldestActiveTransactionId(void); +extern TransactionId GetOldestSafeDecodingTransactionId(void); extern VirtualTransactionId *GetVirtualXIDsDelayingChkpt(int *nvxids); extern bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids); @@ -77,6 +79,10 @@ extern void XidCacheRemoveRunningXids(TransactionId xid, int nxids, const TransactionId *xids, TransactionId latestXid); -extern void ProcArraySetReplicationSlotXmin(TransactionId xmin); +extern void ProcArraySetReplicationSlotXmin(TransactionId xmin, + TransactionId catalog_xmin, bool already_locked); + +extern void ProcArrayGetReplicationSlotXmin(TransactionId *xmin, + TransactionId *catalog_xmin); #endif /* PROCARRAY_H */ diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index 0cae810f49..d5bb850337 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -147,4 +147,6 @@ extern void ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs int nmsgs, bool RelcacheInitFileInval, Oid dbid, Oid tsid); +extern void LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg); + #endif /* SINVAL_H */ diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index c1409c863d..6156e0219d 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -64,4 +64,5 @@ extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func, extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue); +extern void InvalidateSystemCaches(void); #endif /* INVAL_H */ diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index c601770ec9..abe7016d04 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -23,12 +23,14 @@ extern bool FirstSnapshotSet; extern TransactionId TransactionXmin; extern TransactionId RecentXmin; extern TransactionId RecentGlobalXmin; +extern TransactionId RecentGlobalDataXmin; extern Snapshot GetTransactionSnapshot(void); extern Snapshot GetLatestSnapshot(void); extern void SnapshotSetCommandId(CommandId curcid); extern Snapshot GetCatalogSnapshot(Oid relid); +extern Snapshot GetNonHistoricCatalogSnapshot(Oid relid); extern void InvalidateCatalogSnapshot(void); extern void PushActiveSnapshot(Snapshot snapshot); @@ -53,4 +55,13 @@ extern bool XactHasExportedSnapshots(void); extern void DeleteAllExportedSnapshotFiles(void); extern bool ThereAreNoPriorRegisteredSnapshots(void); +extern char *ExportSnapshot(Snapshot snapshot); + +/* Support for catalog timetravel for logical decoding */ +struct HTAB; +extern struct HTAB *HistoricSnapshotGetTupleCids(void); +extern void SetupHistoricSnapshot(Snapshot snapshot_now, struct HTAB *tuplecids); +extern void TeardownHistoricSnapshot(bool is_error); +extern bool HistoricSnapshotActive(void); + #endif /* SNAPMGR_H */ diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h index c542238825..4b256074b0 100644 --- a/src/include/utils/snapshot.h +++ b/src/include/utils/snapshot.h @@ -30,6 +30,22 @@ typedef struct SnapshotData *Snapshot; typedef bool (*SnapshotSatisfiesFunc) (HeapTuple htup, Snapshot snapshot, Buffer buffer); +/* + * Struct representing all kind of possible snapshots. + * + * There are several different kinds of snapshots: + * * Normal MVCC snapshots + * * MVCC snapshots taken during recovery (in Hot-Standby mode) + * * Historic MVCC snapshots used during logical decoding + * * snapshots passed to HeapTupleSatisfiesDirty() + * * snapshots used for SatisfiesAny, Toast, Self where no members are + * accessed. + * + * TODO: It's probably a good idea to split this struct using a NodeTag + * similar to how parser and executor nodes are handled, with one type for + * each different kind of snapshot to avoid overloading the meaning of + * individual fields. + */ typedef struct SnapshotData { SnapshotSatisfiesFunc satisfies; /* tuple test function */ @@ -46,11 +62,23 @@ typedef struct SnapshotData */ TransactionId xmin; /* all XID < xmin are visible to me */ TransactionId xmax; /* all XID >= xmax are invisible to me */ - TransactionId *xip; /* array of xact IDs in progress */ + /* + * For normal MVCC snapshot this contains the all xact IDs that are in + * progress, unless the snapshot was taken during recovery in which case + * it's empty. For historic MVCC snapshots, the meaning is inverted, + * i.e. it contains *committed* transactions between xmin and xmax. + */ + TransactionId *xip; uint32 xcnt; /* # of xact ids in xip[] */ /* note: all ids in xip[] satisfy xmin <= xip[i] < xmax */ int32 subxcnt; /* # of xact ids in subxip[] */ - TransactionId *subxip; /* array of subxact IDs in progress */ + /* + * For non-historic MVCC snapshots, this contains subxact IDs that are in + * progress (and other transactions that are in progress if taken during + * recovery). For historic snapshot it contains *all* xids assigned to the + * replayed transaction, including the toplevel xid. + */ + TransactionId *subxip; bool suboverflowed; /* has the subxip array overflowed? */ bool takenDuringRecovery; /* recovery-shaped snapshot? */ bool copied; /* false if it's a static snapshot */ diff --git a/src/include/utils/tqual.h b/src/include/utils/tqual.h index e34c28a4f7..48abe62983 100644 --- a/src/include/utils/tqual.h +++ b/src/include/utils/tqual.h @@ -22,6 +22,7 @@ extern PGDLLIMPORT SnapshotData SnapshotSelfData; extern PGDLLIMPORT SnapshotData SnapshotAnyData; extern PGDLLIMPORT SnapshotData SnapshotToastData; +extern PGDLLIMPORT SnapshotData CatalogSnapshotData; #define SnapshotSelf (&SnapshotSelfData) #define SnapshotAny (&SnapshotAnyData) @@ -37,7 +38,8 @@ extern PGDLLIMPORT SnapshotData SnapshotToastData; /* This macro encodes the knowledge of which snapshots are MVCC-safe */ #define IsMVCCSnapshot(snapshot) \ - ((snapshot)->satisfies == HeapTupleSatisfiesMVCC) + ((snapshot)->satisfies == HeapTupleSatisfiesMVCC || \ + (snapshot)->satisfies == HeapTupleSatisfiesHistoricMVCC) /* * HeapTupleSatisfiesVisibility @@ -73,6 +75,8 @@ extern bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer); extern bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer); +extern bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, + Snapshot snapshot, Buffer buffer); /* Special "satisfies" routines with different APIs */ extern HTSU_Result HeapTupleSatisfiesUpdate(HeapTuple htup, @@ -86,4 +90,13 @@ extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid); extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple); +/* + * To avoid leaking to much knowledge about reorderbuffer implementation + * details this is implemented in reorderbuffer.c not tqual.c. + */ +extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data, + Snapshot snapshot, + HeapTuple htup, + Buffer buffer, + CommandId *cmin, CommandId *cmax); #endif /* TQUAL_H */ diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index ef50f4da21..b0b6e27d8a 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1368,13 +1368,15 @@ pg_prepared_xacts| SELECT p.transaction, LEFT JOIN pg_authid u ON ((p.ownerid = u.oid))) LEFT JOIN pg_database d ON ((p.dbid = d.oid))); pg_replication_slots| SELECT l.slot_name, + l.plugin, l.slot_type, l.datoid, d.datname AS database, l.active, l.xmin, + l.catalog_xmin, l.restart_lsn - FROM (pg_get_replication_slots() l(slot_name, slot_type, datoid, active, xmin, restart_lsn) + FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, active, xmin, catalog_xmin, restart_lsn) LEFT JOIN pg_database d ON ((l.datoid = d.oid))); pg_roles| SELECT pg_authid.rolname, pg_authid.rolsuper, diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 3b7f61ef20..f9604541c7 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -940,6 +940,17 @@ LockTupleMode LockingClause LogOpts LogStmtLevel +LogicalDecodeBeginCB +LogicalDecodeChangeCB +LogicalDecodeCleanupCB +LogicalDecodeCommitCB +LogicalDecodeInitCB +LogicalDecodingCheckpointData +LogicalDecodingContext +LogicalDecodingCtlData +LogicalDecodingSlot +LogicalOutputPluginWriterPrepareWrite +LogicalOutputPluginWriterWrite LogicalTape LogicalTapeSet MAGIC @@ -1053,6 +1064,7 @@ OprInfo OprProofCacheEntry OprProofCacheKey OutputContext +OutputPluginCallbacks OverrideSearchPath OverrideStackEntry PACE_HEADER @@ -1468,6 +1480,21 @@ Relids RelocationBufferInfo RenameStmt ReopenPtr +ReorderBuffer +ReorderBufferApplyChangeCB +ReorderBufferBeginCB +ReorderBufferChange +ReorderBufferChangeTypeInternal +ReorderBufferCommitCB +ReorderBufferDiskChange +ReorderBufferIterTXNEntry +ReorderBufferIterTXNState +ReorderBufferToastEnt +ReorderBufferTupleBuf +ReorderBufferTupleCidEnt +ReorderBufferTupleCidKey +ReorderBufferTXN +ReorderBufferTXNByIdEnt ReplaceVarsFromTargetList_context ReplaceVarsNoMatchOption ResTarget @@ -1522,6 +1549,8 @@ SID_NAME_USE SISeg SMgrRelation SMgrRelationData +SnapBuildAction +SnapBuildState SOCKADDR SOCKET SPELL @@ -1613,6 +1642,8 @@ SlruSharedData Snapshot SnapshotData SnapshotSatisfiesFunc +Snapstate +SnapstateOnDisk SockAddr Sort SortBy @@ -1929,6 +1960,7 @@ XLogReaderState XLogRecData XLogRecPtr XLogRecord +XLogRecordBuffer XLogSegNo XLogSource XLogwrtResult @@ -2351,6 +2383,7 @@ symbol tablespaceinfo teReqs teSection +TestDecodingData temp_tablespaces_extra text timeKEY