2017-01-19 18:00:00 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
* worker.c
|
|
|
|
* PostgreSQL logical replication worker (apply)
|
|
|
|
*
|
2022-01-08 01:04:57 +01:00
|
|
|
* Copyright (c) 2016-2022, PostgreSQL Global Development Group
|
2017-01-19 18:00:00 +01:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/replication/logical/worker.c
|
|
|
|
*
|
|
|
|
* NOTES
|
|
|
|
* This file contains the worker which applies logical changes as they come
|
|
|
|
* from remote logical replication stream.
|
|
|
|
*
|
|
|
|
* The main worker (apply) is started by logical replication worker
|
|
|
|
* launcher for every enabled subscription in a database. It uses
|
|
|
|
* walsender protocol to communicate with publisher.
|
|
|
|
*
|
|
|
|
* This module includes server facing code and shares libpqwalreceiver
|
|
|
|
* module with walreceiver for providing the libpq specific functionality.
|
|
|
|
*
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
*
|
|
|
|
* STREAMED TRANSACTIONS
|
|
|
|
* ---------------------
|
|
|
|
* Streamed transactions (large transactions exceeding a memory limit on the
|
|
|
|
* upstream) are not applied immediately, but instead, the data is written
|
|
|
|
* to temporary files and then applied at once when the final commit arrives.
|
|
|
|
*
|
|
|
|
* Unlike the regular (non-streamed) case, handling streamed transactions has
|
|
|
|
* to handle aborts of both the toplevel transaction and subtransactions. This
|
|
|
|
* is achieved by tracking offsets for subtransactions, which is then used
|
|
|
|
* to truncate the file with serialized changes.
|
|
|
|
*
|
|
|
|
* The files are placed in tmp file directory by default, and the filenames
|
|
|
|
* include both the XID of the toplevel transaction and OID of the
|
|
|
|
* subscription. This is necessary so that different workers processing a
|
|
|
|
* remote transaction with the same XID doesn't interfere.
|
|
|
|
*
|
|
|
|
* We use BufFiles instead of using normal temporary files because (a) the
|
|
|
|
* BufFile infrastructure supports temporary files that exceed the OS file size
|
|
|
|
* limit, (b) provides a way for automatic clean up on the error and (c) provides
|
|
|
|
* a way to survive these files across local transactions and allow to open and
|
2021-08-30 05:15:35 +02:00
|
|
|
* close at stream start and close. We decided to use FileSet
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
* infrastructure as without that it deletes the files on the closure of the
|
|
|
|
* file and if we decide to keep stream files open across the start/stop stream
|
|
|
|
* then it will consume a lot of memory (more than 8K for each BufFile and
|
|
|
|
* there could be multiple such BufFiles as the subscriber could receive
|
|
|
|
* multiple start/stop streams for different transactions before getting the
|
2021-08-30 05:15:35 +02:00
|
|
|
* commit). Moreover, if we don't use FileSet then we also need to invent
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
* a new way to pass filenames to BufFile APIs so that we are allowed to open
|
|
|
|
* the file we desired across multiple stream-open calls for the same
|
|
|
|
* transaction.
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
*
|
|
|
|
* TWO_PHASE TRANSACTIONS
|
|
|
|
* ----------------------
|
|
|
|
* Two phase transactions are replayed at prepare and then committed or
|
|
|
|
* rolled back at commit prepared and rollback prepared respectively. It is
|
|
|
|
* possible to have a prepared transaction that arrives at the apply worker
|
|
|
|
* when the tablesync is busy doing the initial copy. In this case, the apply
|
|
|
|
* worker skips all the prepared operations [e.g. inserts] while the tablesync
|
|
|
|
* is still busy (see the condition of should_apply_changes_for_rel). The
|
|
|
|
* tablesync worker might not get such a prepared transaction because say it
|
|
|
|
* was prior to the initial consistent point but might have got some later
|
|
|
|
* commits. Now, the tablesync worker will exit without doing anything for the
|
|
|
|
* prepared transaction skipped by the apply worker as the sync location for it
|
|
|
|
* will be already ahead of the apply worker's current location. This would lead
|
|
|
|
* to an "empty prepare", because later when the apply worker does the commit
|
|
|
|
* prepare, there is nothing in it (the inserts were skipped earlier).
|
|
|
|
*
|
|
|
|
* To avoid this, and similar prepare confusions the subscription's two_phase
|
|
|
|
* commit is enabled only after the initial sync is over. The two_phase option
|
|
|
|
* has been implemented as a tri-state with values DISABLED, PENDING, and
|
|
|
|
* ENABLED.
|
|
|
|
*
|
|
|
|
* Even if the user specifies they want a subscription with two_phase = on,
|
|
|
|
* internally it will start with a tri-state of PENDING which only becomes
|
|
|
|
* ENABLED after all tablesync initializations are completed - i.e. when all
|
|
|
|
* tablesync workers have reached their READY state. In other words, the value
|
|
|
|
* PENDING is only a temporary state for subscription start-up.
|
|
|
|
*
|
|
|
|
* Until the two_phase is properly available (ENABLED) the subscription will
|
|
|
|
* behave as if two_phase = off. When the apply worker detects that all
|
|
|
|
* tablesyncs have become READY (while the tri-state was PENDING) it will
|
|
|
|
* restart the apply worker process. This happens in
|
|
|
|
* process_syncing_tables_for_apply.
|
|
|
|
*
|
|
|
|
* When the (re-started) apply worker finds that all tablesyncs are READY for a
|
|
|
|
* two_phase tri-state of PENDING it start streaming messages with the
|
|
|
|
* two_phase option which in turn enables the decoding of two-phase commits at
|
|
|
|
* the publisher. Then, it updates the tri-state value from PENDING to ENABLED.
|
|
|
|
* Now, it is possible that during the time we have not enabled two_phase, the
|
|
|
|
* publisher (replication server) would have skipped some prepares but we
|
|
|
|
* ensure that such prepares are sent along with commit prepare, see
|
|
|
|
* ReorderBufferFinishPrepared.
|
|
|
|
*
|
|
|
|
* If the subscription has no tables then a two_phase tri-state PENDING is
|
|
|
|
* left unchanged. This lets the user still do an ALTER TABLE REFRESH
|
|
|
|
* PUBLICATION which might otherwise be disallowed (see below).
|
|
|
|
*
|
|
|
|
* If ever a user needs to be aware of the tri-state value, they can fetch it
|
|
|
|
* from the pg_subscription catalog (see column subtwophasestate).
|
|
|
|
*
|
|
|
|
* We don't allow to toggle two_phase option of a subscription because it can
|
|
|
|
* lead to an inconsistent replica. Consider, initially, it was on and we have
|
|
|
|
* received some prepare then we turn it off, now at commit time the server
|
|
|
|
* will send the entire transaction data along with the commit. With some more
|
|
|
|
* analysis, we can allow changing this option from off to on but not sure if
|
|
|
|
* that alone would be useful.
|
|
|
|
*
|
|
|
|
* Finally, to avoid problems mentioned in previous paragraphs from any
|
|
|
|
* subsequent (not READY) tablesyncs (need to toggle two_phase option from 'on'
|
|
|
|
* to 'off' and then again back to 'on') there is a restriction for
|
|
|
|
* ALTER SUBSCRIPTION REFRESH PUBLICATION. This command is not permitted when
|
|
|
|
* the two_phase tri-state is ENABLED, except when copy_data = false.
|
|
|
|
*
|
|
|
|
* We can get prepare of the same GID more than once for the genuine cases
|
|
|
|
* where we have defined multiple subscriptions for publications on the same
|
|
|
|
* server and prepared transaction has operations on tables subscribed to those
|
|
|
|
* subscriptions. For such cases, if we use the GID sent by publisher one of
|
|
|
|
* the prepares will be successful and others will fail, in which case the
|
|
|
|
* server will send them again. Now, this can lead to a deadlock if user has
|
|
|
|
* set synchronous_standby_names for all the subscriptions on subscriber. To
|
|
|
|
* avoid such deadlocks, we generate a unique GID (consisting of the
|
|
|
|
* subscription oid and the xid of the prepared transaction) for each prepare
|
|
|
|
* transaction on the subscriber.
|
2017-01-19 18:00:00 +01:00
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
2019-01-21 19:18:20 +01:00
|
|
|
#include "access/table.h"
|
tableam: Add and use scan APIs.
Too allow table accesses to be not directly dependent on heap, several
new abstractions are needed. Specifically:
1) Heap scans need to be generalized into table scans. Do this by
introducing TableScanDesc, which will be the "base class" for
individual AMs. This contains the AM independent fields from
HeapScanDesc.
The previous heap_{beginscan,rescan,endscan} et al. have been
replaced with a table_ version.
There's no direct replacement for heap_getnext(), as that returned
a HeapTuple, which is undesirable for a other AMs. Instead there's
table_scan_getnextslot(). But note that heap_getnext() lives on,
it's still used widely to access catalog tables.
This is achieved by new scan_begin, scan_end, scan_rescan,
scan_getnextslot callbacks.
2) The portion of parallel scans that's shared between backends need
to be able to do so without the user doing per-AM work. To achieve
that new parallelscan_{estimate, initialize, reinitialize}
callbacks are introduced, which operate on a new
ParallelTableScanDesc, which again can be subclassed by AMs.
As it is likely that several AMs are going to be block oriented,
block oriented callbacks that can be shared between such AMs are
provided and used by heap. table_block_parallelscan_{estimate,
intiialize, reinitialize} as callbacks, and
table_block_parallelscan_{nextpage, init} for use in AMs. These
operate on a ParallelBlockTableScanDesc.
3) Index scans need to be able to access tables to return a tuple, and
there needs to be state across individual accesses to the heap to
store state like buffers. That's now handled by introducing a
sort-of-scan IndexFetchTable, which again is intended to be
subclassed by individual AMs (for heap IndexFetchHeap).
The relevant callbacks for an AM are index_fetch_{end, begin,
reset} to create the necessary state, and index_fetch_tuple to
retrieve an indexed tuple. Note that index_fetch_tuple
implementations need to be smarter than just blindly fetching the
tuples for AMs that have optimizations similar to heap's HOT - the
currently alive tuple in the update chain needs to be fetched if
appropriate.
Similar to table_scan_getnextslot(), it's undesirable to continue
to return HeapTuples. Thus index_fetch_heap (might want to rename
that later) now accepts a slot as an argument. Core code doesn't
have a lot of call sites performing index scans without going
through the systable_* API (in contrast to loads of heap_getnext
calls and working directly with HeapTuples).
Index scans now store the result of a search in
IndexScanDesc->xs_heaptid, rather than xs_ctup->t_self. As the
target is not generally a HeapTuple anymore that seems cleaner.
To be able to sensible adapt code to use the above, two further
callbacks have been introduced:
a) slot_callbacks returns a TupleTableSlotOps* suitable for creating
slots capable of holding a tuple of the AMs
type. table_slot_callbacks() and table_slot_create() are based
upon that, but have additional logic to deal with views, foreign
tables, etc.
While this change could have been done separately, nearly all the
call sites that needed to be adapted for the rest of this commit
also would have been needed to be adapted for
table_slot_callbacks(), making separation not worthwhile.
b) tuple_satisfies_snapshot checks whether the tuple in a slot is
currently visible according to a snapshot. That's required as a few
places now don't have a buffer + HeapTuple around, but a
slot (which in heap's case internally has that information).
Additionally a few infrastructure changes were needed:
I) SysScanDesc, as used by systable_{beginscan, getnext} et al. now
internally uses a slot to keep track of tuples. While
systable_getnext() still returns HeapTuples, and will so for the
foreseeable future, the index API (see 1) above) now only deals with
slots.
The remainder, and largest part, of this commit is then adjusting all
scans in postgres to use the new APIs.
Author: Andres Freund, Haribabu Kommi, Alvaro Herrera
Discussion:
https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
https://postgr.es/m/20160812231527.GA690404@alvherre.pgsql
2019-03-11 20:46:41 +01:00
|
|
|
#include "access/tableam.h"
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
#include "access/twophase.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "access/xact.h"
|
|
|
|
#include "access/xlog_internal.h"
|
2018-04-07 17:24:53 +02:00
|
|
|
#include "catalog/catalog.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "catalog/namespace.h"
|
2020-04-06 15:15:52 +02:00
|
|
|
#include "catalog/partition.h"
|
|
|
|
#include "catalog/pg_inherits.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "catalog/pg_subscription.h"
|
2017-03-23 13:36:36 +01:00
|
|
|
#include "catalog/pg_subscription_rel.h"
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
#include "catalog/pg_tablespace.h"
|
2018-04-07 17:24:53 +02:00
|
|
|
#include "commands/tablecmds.h"
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
#include "commands/tablespace.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "commands/trigger.h"
|
|
|
|
#include "executor/executor.h"
|
2020-04-06 15:15:52 +02:00
|
|
|
#include "executor/execPartition.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "executor/nodeModifyTable.h"
|
2019-01-29 21:48:51 +01:00
|
|
|
#include "funcapi.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "libpq/pqformat.h"
|
|
|
|
#include "libpq/pqsignal.h"
|
|
|
|
#include "mb/pg_wchar.h"
|
2019-01-29 21:48:51 +01:00
|
|
|
#include "miscadmin.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "nodes/makefuncs.h"
|
2019-01-29 21:48:51 +01:00
|
|
|
#include "optimizer/optimizer.h"
|
|
|
|
#include "pgstat.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "postmaster/bgworker.h"
|
2019-12-17 19:14:28 +01:00
|
|
|
#include "postmaster/interrupt.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "postmaster/postmaster.h"
|
Reduce delay for last logicalrep feedback message when master goes idle.
The regression tests contain numerous cases where we do some activity on a
master server and then wait till the slave has ack'd flushing its copy of
that transaction. Because WAL flush on the slave is asynchronous to the
logicalrep worker process, the worker cannot send such a feedback message
during the LogicalRepApplyLoop iteration where it processes the last data
from the master. In the previous coding, the feedback message would come
out only when the loop's WaitLatchOrSocket call returned WL_TIMEOUT. That
requires one full second of delay (NAPTIME_PER_CYCLE); and to add insult
to injury, it could take more than that if the WaitLatchOrSocket was
interrupted a few times by latch-setting events.
In reality we can expect the slave's walwriter process to have flushed the
WAL data after, more or less, WalWriterDelay (typically 200ms). Hence,
if there are unacked transactions pending, make the wait delay only that
long rather than the full NAPTIME_PER_CYCLE. Also, move one of the
send_feedback() calls into the loop main line, so that we'll check for the
need to send feedback even if we were woken by a latch event and not either
socket data or timeout.
It's not clear how much this matters for production purposes, but
it's definitely helpful for testing.
Discussion: https://postgr.es/m/30864.1498861103@sss.pgh.pa.us
2017-07-01 18:15:51 +02:00
|
|
|
#include "postmaster/walwriter.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "replication/decode.h"
|
|
|
|
#include "replication/logical.h"
|
|
|
|
#include "replication/logicalproto.h"
|
|
|
|
#include "replication/logicalrelation.h"
|
|
|
|
#include "replication/logicalworker.h"
|
|
|
|
#include "replication/origin.h"
|
2019-01-29 21:48:51 +01:00
|
|
|
#include "replication/reorderbuffer.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "replication/snapbuild.h"
|
|
|
|
#include "replication/walreceiver.h"
|
|
|
|
#include "replication/worker_internal.h"
|
|
|
|
#include "rewrite/rewriteHandler.h"
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
#include "storage/buffile.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "storage/bufmgr.h"
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
#include "storage/fd.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "storage/ipc.h"
|
|
|
|
#include "storage/lmgr.h"
|
|
|
|
#include "storage/proc.h"
|
|
|
|
#include "storage/procarray.h"
|
2017-06-02 20:46:00 +02:00
|
|
|
#include "tcop/tcopprot.h"
|
Respect permissions within logical replication.
Prevent logical replication workers from performing insert, update,
delete, truncate, or copy commands on tables unless the subscription
owner has permission to do so.
Prevent subscription owners from circumventing row-level security by
forbidding replication into tables with row-level security policies
which the subscription owner is subject to, without regard to whether
the policy would ordinarily allow the INSERT, UPDATE, DELETE or
TRUNCATE which is being replicated. This seems sufficient for now, as
superusers, roles with bypassrls, and target table owners should still
be able to replicate despite RLS policies. We can revisit the
question of applying row-level security policies on a per-row basis if
this restriction proves too severe in practice.
Author: Mark Dilger
Reviewed-by: Jeff Davis, Andrew Dunstan, Ronan Dunklau
Discussion: https://postgr.es/m/9DFC88D3-1300-4DE8-ACBC-4CEF84399A53%40enterprisedb.com
2022-01-08 02:38:20 +01:00
|
|
|
#include "utils/acl.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "utils/builtins.h"
|
|
|
|
#include "utils/catcache.h"
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
#include "utils/dynahash.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "utils/datum.h"
|
|
|
|
#include "utils/fmgroids.h"
|
|
|
|
#include "utils/guc.h"
|
|
|
|
#include "utils/inval.h"
|
|
|
|
#include "utils/lsyscache.h"
|
|
|
|
#include "utils/memutils.h"
|
2018-04-07 17:24:53 +02:00
|
|
|
#include "utils/rel.h"
|
Respect permissions within logical replication.
Prevent logical replication workers from performing insert, update,
delete, truncate, or copy commands on tables unless the subscription
owner has permission to do so.
Prevent subscription owners from circumventing row-level security by
forbidding replication into tables with row-level security policies
which the subscription owner is subject to, without regard to whether
the policy would ordinarily allow the INSERT, UPDATE, DELETE or
TRUNCATE which is being replicated. This seems sufficient for now, as
superusers, roles with bypassrls, and target table owners should still
be able to replicate despite RLS policies. We can revisit the
question of applying row-level security policies on a per-row basis if
this restriction proves too severe in practice.
Author: Mark Dilger
Reviewed-by: Jeff Davis, Andrew Dunstan, Ronan Dunklau
Discussion: https://postgr.es/m/9DFC88D3-1300-4DE8-ACBC-4CEF84399A53%40enterprisedb.com
2022-01-08 02:38:20 +01:00
|
|
|
#include "utils/rls.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "utils/syscache.h"
|
2019-01-29 21:48:51 +01:00
|
|
|
#include "utils/timeout.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
#define NAPTIME_PER_CYCLE 1000 /* max sleep time between cycles (1s) */
|
|
|
|
|
|
|
|
typedef struct FlushPosition
|
|
|
|
{
|
|
|
|
dlist_node node;
|
|
|
|
XLogRecPtr local_end;
|
|
|
|
XLogRecPtr remote_end;
|
|
|
|
} FlushPosition;
|
|
|
|
|
|
|
|
static dlist_head lsn_mapping = DLIST_STATIC_INIT(lsn_mapping);
|
|
|
|
|
2021-05-23 03:24:48 +02:00
|
|
|
typedef struct ApplyExecutionData
|
|
|
|
{
|
|
|
|
EState *estate; /* executor state, used to track resources */
|
|
|
|
|
|
|
|
LogicalRepRelMapEntry *targetRel; /* replication target rel */
|
|
|
|
ResultRelInfo *targetRelInfo; /* ResultRelInfo for same */
|
|
|
|
|
|
|
|
/* These fields are used when the target relation is partitioned: */
|
|
|
|
ModifyTableState *mtstate; /* dummy ModifyTable state */
|
|
|
|
PartitionTupleRouting *proute; /* partition routing info */
|
|
|
|
} ApplyExecutionData;
|
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Struct for saving and restoring apply errcontext information */
|
|
|
|
typedef struct ApplyErrorCallbackArg
|
|
|
|
{
|
|
|
|
LogicalRepMsgType command; /* 0 if invalid */
|
|
|
|
LogicalRepRelMapEntry *rel;
|
|
|
|
|
|
|
|
/* Remote node information */
|
|
|
|
int remote_attnum; /* -1 if invalid */
|
|
|
|
TransactionId remote_xid;
|
|
|
|
TimestampTz ts; /* commit, rollback, or prepare timestamp */
|
|
|
|
} ApplyErrorCallbackArg;
|
|
|
|
|
|
|
|
static ApplyErrorCallbackArg apply_error_callback_arg =
|
|
|
|
{
|
|
|
|
.command = 0,
|
|
|
|
.rel = NULL,
|
|
|
|
.remote_attnum = -1,
|
|
|
|
.remote_xid = InvalidTransactionId,
|
|
|
|
.ts = 0,
|
|
|
|
};
|
|
|
|
|
2017-05-09 20:40:42 +02:00
|
|
|
static MemoryContext ApplyMessageContext = NULL;
|
|
|
|
MemoryContext ApplyContext = NULL;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
/* per stream context for streaming transactions */
|
|
|
|
static MemoryContext LogicalStreamingContext = NULL;
|
|
|
|
|
2021-05-13 01:13:54 +02:00
|
|
|
WalReceiverConn *LogRepWorkerWalRcvConn = NULL;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
Subscription *MySubscription = NULL;
|
|
|
|
bool MySubscriptionValid = false;
|
|
|
|
|
|
|
|
bool in_remote_transaction = false;
|
2017-03-23 13:36:36 +01:00
|
|
|
static XLogRecPtr remote_final_lsn = InvalidXLogRecPtr;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
/* fields valid only when processing streamed transaction */
|
2021-06-12 18:59:15 +02:00
|
|
|
static bool in_streamed_transaction = false;
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
static TransactionId stream_xid = InvalidTransactionId;
|
|
|
|
|
|
|
|
/* BufFile handle of the current streaming file */
|
|
|
|
static BufFile *stream_fd = NULL;
|
|
|
|
|
|
|
|
typedef struct SubXactInfo
|
|
|
|
{
|
|
|
|
TransactionId xid; /* XID of the subxact */
|
|
|
|
int fileno; /* file number in the buffile */
|
|
|
|
off_t offset; /* offset in the file */
|
|
|
|
} SubXactInfo;
|
|
|
|
|
|
|
|
/* Sub-transaction data for the current streaming transaction */
|
|
|
|
typedef struct ApplySubXactData
|
|
|
|
{
|
|
|
|
uint32 nsubxacts; /* number of sub-transactions */
|
|
|
|
uint32 nsubxacts_max; /* current capacity of subxacts */
|
|
|
|
TransactionId subxact_last; /* xid of the last sub-transaction */
|
|
|
|
SubXactInfo *subxacts; /* sub-xact offset in changes file */
|
|
|
|
} ApplySubXactData;
|
|
|
|
|
|
|
|
static ApplySubXactData subxact_data = {0, 0, InvalidTransactionId, NULL};
|
|
|
|
|
2020-09-04 07:55:16 +02:00
|
|
|
static inline void subxact_filename(char *path, Oid subid, TransactionId xid);
|
|
|
|
static inline void changes_filename(char *path, Oid subid, TransactionId xid);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Information about subtransactions of a given toplevel transaction.
|
|
|
|
*/
|
|
|
|
static void subxact_info_write(Oid subid, TransactionId xid);
|
|
|
|
static void subxact_info_read(Oid subid, TransactionId xid);
|
|
|
|
static void subxact_info_add(TransactionId xid);
|
|
|
|
static inline void cleanup_subxact_info(void);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Serialize and deserialize changes for a toplevel transaction.
|
|
|
|
*/
|
|
|
|
static void stream_cleanup_files(Oid subid, TransactionId xid);
|
|
|
|
static void stream_open_file(Oid subid, TransactionId xid, bool first);
|
|
|
|
static void stream_write_change(char action, StringInfo s);
|
|
|
|
static void stream_close_file(void);
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
static void send_feedback(XLogRecPtr recvpos, bool force, bool requestReply);
|
|
|
|
|
|
|
|
static void store_flush_position(XLogRecPtr remote_lsn);
|
|
|
|
|
2017-06-03 17:37:47 +02:00
|
|
|
static void maybe_reread_subscription(void);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
/* prototype needed because of stream_commit */
|
|
|
|
static void apply_dispatch(StringInfo s);
|
|
|
|
|
2021-07-30 04:47:38 +02:00
|
|
|
static void apply_handle_commit_internal(LogicalRepCommitData *commit_data);
|
2021-05-23 03:24:48 +02:00
|
|
|
static void apply_handle_insert_internal(ApplyExecutionData *edata,
|
|
|
|
ResultRelInfo *relinfo,
|
|
|
|
TupleTableSlot *remoteslot);
|
|
|
|
static void apply_handle_update_internal(ApplyExecutionData *edata,
|
|
|
|
ResultRelInfo *relinfo,
|
2020-03-24 14:00:58 +01:00
|
|
|
TupleTableSlot *remoteslot,
|
2021-05-23 03:24:48 +02:00
|
|
|
LogicalRepTupleData *newtup);
|
|
|
|
static void apply_handle_delete_internal(ApplyExecutionData *edata,
|
|
|
|
ResultRelInfo *relinfo,
|
|
|
|
TupleTableSlot *remoteslot);
|
2020-04-01 15:31:47 +02:00
|
|
|
static bool FindReplTupleInLocalRel(EState *estate, Relation localrel,
|
|
|
|
LogicalRepRelation *remoterel,
|
|
|
|
TupleTableSlot *remoteslot,
|
|
|
|
TupleTableSlot **localslot);
|
2021-05-23 03:24:48 +02:00
|
|
|
static void apply_handle_tuple_routing(ApplyExecutionData *edata,
|
2020-04-06 15:15:52 +02:00
|
|
|
TupleTableSlot *remoteslot,
|
|
|
|
LogicalRepTupleData *newtup,
|
|
|
|
CmdType operation);
|
2020-03-24 14:00:58 +01:00
|
|
|
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
/* Compute GID for two_phase transactions */
|
|
|
|
static void TwoPhaseTransactionGid(Oid subid, TransactionId xid, char *gid, int szgid);
|
|
|
|
|
2021-07-29 12:21:45 +02:00
|
|
|
/* Common streaming function to apply all the spooled messages */
|
|
|
|
static void apply_spooled_messages(TransactionId xid, XLogRecPtr lsn);
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Functions for apply error callback */
|
|
|
|
static void apply_error_callback(void *arg);
|
|
|
|
static inline void set_apply_error_context_xact(TransactionId xid, TimestampTz ts);
|
|
|
|
static inline void reset_apply_error_context_info(void);
|
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
/*
|
|
|
|
* Should this worker apply changes for given relation.
|
|
|
|
*
|
|
|
|
* This is mainly needed for initial relation data sync as that runs in
|
|
|
|
* separate worker process running in parallel and we need some way to skip
|
|
|
|
* changes coming to the main apply worker during the sync of a table.
|
|
|
|
*
|
|
|
|
* Note we need to do smaller or equals comparison for SYNCDONE state because
|
2017-04-04 15:03:24 +02:00
|
|
|
* it might hold position of end of initial slot consistent point WAL
|
2017-03-23 13:36:36 +01:00
|
|
|
* record + 1 (ie start of next record) and next record can be COMMIT of
|
|
|
|
* transaction we are now processing (which is what we set remote_final_lsn
|
|
|
|
* to in apply_handle_begin).
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
should_apply_changes_for_rel(LogicalRepRelMapEntry *rel)
|
|
|
|
{
|
|
|
|
if (am_tablesync_worker())
|
|
|
|
return MyLogicalRepWorker->relid == rel->localreloid;
|
|
|
|
else
|
|
|
|
return (rel->state == SUBREL_STATE_READY ||
|
|
|
|
(rel->state == SUBREL_STATE_SYNCDONE &&
|
|
|
|
rel->statelsn <= remote_final_lsn));
|
|
|
|
}
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/*
|
2021-06-10 18:27:27 +02:00
|
|
|
* Begin one step (one INSERT, UPDATE, etc) of a replication transaction.
|
2017-01-19 18:00:00 +01:00
|
|
|
*
|
2021-06-10 18:27:27 +02:00
|
|
|
* Start a transaction, if this is the first step (else we keep using the
|
|
|
|
* existing transaction).
|
|
|
|
* Also provide a global snapshot and ensure we run in ApplyMessageContext.
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
2021-06-10 18:27:27 +02:00
|
|
|
static void
|
|
|
|
begin_replication_step(void)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
2021-06-10 18:27:27 +02:00
|
|
|
SetCurrentStatementStartTimestamp();
|
2017-05-09 20:40:42 +02:00
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
if (!IsTransactionState())
|
|
|
|
{
|
|
|
|
StartTransactionCommand();
|
|
|
|
maybe_reread_subscription();
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
PushActiveSnapshot(GetTransactionSnapshot());
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2017-05-09 20:40:42 +02:00
|
|
|
MemoryContextSwitchTo(ApplyMessageContext);
|
2021-06-10 18:27:27 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Finish up one step of a replication transaction.
|
|
|
|
* Callers of begin_replication_step() must also call this.
|
|
|
|
*
|
|
|
|
* We don't close out the transaction here, but we should increment
|
|
|
|
* the command counter to make the effects of this step visible.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
end_replication_step(void)
|
|
|
|
{
|
|
|
|
PopActiveSnapshot();
|
|
|
|
|
|
|
|
CommandCounterIncrement();
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
/*
|
|
|
|
* Handle streamed transactions.
|
|
|
|
*
|
|
|
|
* If in streaming mode (receiving a block of streamed transaction), we
|
|
|
|
* simply redirect it to a file for the proper toplevel transaction.
|
|
|
|
*
|
|
|
|
* Returns true for streamed transactions, false otherwise (regular mode).
|
|
|
|
*/
|
|
|
|
static bool
|
2020-11-26 04:51:14 +01:00
|
|
|
handle_streamed_transaction(LogicalRepMsgType action, StringInfo s)
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
{
|
|
|
|
TransactionId xid;
|
|
|
|
|
|
|
|
/* not in streaming mode */
|
|
|
|
if (!in_streamed_transaction)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Assert(stream_fd != NULL);
|
|
|
|
Assert(TransactionIdIsValid(stream_xid));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We should have received XID of the subxact as the first part of the
|
|
|
|
* message, so extract it.
|
|
|
|
*/
|
|
|
|
xid = pq_getmsgint(s, 4);
|
|
|
|
|
2021-06-12 18:59:15 +02:00
|
|
|
if (!TransactionIdIsValid(xid))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg_internal("invalid transaction ID in streamed replication transaction")));
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
/* Add the new subxact to the array (unless already there). */
|
|
|
|
subxact_info_add(xid);
|
|
|
|
|
|
|
|
/* write the change to the current file */
|
|
|
|
stream_write_change(action, s);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Executor state preparation for evaluation of constraint expressions,
|
2021-05-23 03:24:48 +02:00
|
|
|
* indexes and triggers for the specified relation.
|
2017-01-19 18:00:00 +01:00
|
|
|
*
|
2021-05-23 03:24:48 +02:00
|
|
|
* Note that the caller must open and close any indexes to be updated.
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
2021-05-23 03:24:48 +02:00
|
|
|
static ApplyExecutionData *
|
|
|
|
create_edata_for_relation(LogicalRepRelMapEntry *rel)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
2021-05-23 03:24:48 +02:00
|
|
|
ApplyExecutionData *edata;
|
2017-01-19 18:00:00 +01:00
|
|
|
EState *estate;
|
|
|
|
RangeTblEntry *rte;
|
2021-05-23 03:24:48 +02:00
|
|
|
ResultRelInfo *resultRelInfo;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2021-05-23 03:24:48 +02:00
|
|
|
edata = (ApplyExecutionData *) palloc0(sizeof(ApplyExecutionData));
|
|
|
|
edata->targetRel = rel;
|
|
|
|
|
|
|
|
edata->estate = estate = CreateExecutorState();
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
rte = makeNode(RangeTblEntry);
|
|
|
|
rte->rtekind = RTE_RELATION;
|
|
|
|
rte->relid = RelationGetRelid(rel->localrel);
|
|
|
|
rte->relkind = rel->localrel->rd_rel->relkind;
|
Create an RTE field to record the query's lock mode for each relation.
Add RangeTblEntry.rellockmode, which records the appropriate lock mode for
each RTE_RELATION rangetable entry (either AccessShareLock, RowShareLock,
or RowExclusiveLock depending on the RTE's role in the query).
This patch creates the field and makes all creators of RTE nodes fill it
in reasonably, but for the moment nothing much is done with it. The plan
is to replace assorted post-parser logic that re-determines the right
lockmode to use with simple uses of rte->rellockmode. For now, just add
Asserts in each of those places that the rellockmode matches what they are
computing today. (In some cases the match isn't perfect, so the Asserts
are weaker than you might expect; but this seems OK, as per discussion.)
This passes check-world for me, but it seems worth pushing in this state
to see if the buildfarm finds any problems in cases I failed to test.
catversion bump due to change of stored rules.
Amit Langote, reviewed by David Rowley and Jesper Pedersen,
and whacked around a bit more by me
Discussion: https://postgr.es/m/468c85d9-540e-66a2-1dde-fec2b741e688@lab.ntt.co.jp
2018-09-30 19:55:51 +02:00
|
|
|
rte->rellockmode = AccessShareLock;
|
2018-10-04 21:48:17 +02:00
|
|
|
ExecInitRangeTable(estate, list_make1(rte));
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2021-05-23 03:24:48 +02:00
|
|
|
edata->targetRelInfo = resultRelInfo = makeNode(ResultRelInfo);
|
Fix relation leak for subscribers firing triggers in logical replication
Creating a trigger on a relation to which an apply operation is
triggered would cause a relation leak once the change gets committed,
as the executor would miss that the relation needs to be closed
beforehand. This issue got introduced with the refactoring done in
1375422c, where it becomes necessary to track relations within
es_opened_result_relations to make sure that they are closed.
We have discussed using ExecInitResultRelation() coupled with
ExecCloseResultRelations() for the relations in need of tracking by the
apply operations in the subscribers, which would simplify greatly the
opening and closing of indexes, but this requires a larger rework and
reorganization of the worker code, particularly for the tuple routing
part. And that's not really welcome post feature freeze. So, for now,
settle down to the same solution as TRUNCATE which is to fill in
es_opened_result_relations with the relation opened, to make sure that
ExecGetTriggerResultRel() finds them and that they get closed.
The code is lightly refactored so as a relation is not registered three
times for each DML code path, making the whole a bit easier to follow.
Reported-by: Tang Haiying, Shi Yu, Hou Zhijie
Author: Amit Langote, Masahiko Sawada, Hou Zhijie
Reviewed-by: Amit Kapila, Michael Paquier
Discussion: https://postgr.es/m/OS0PR01MB611383FA0FE92EB9DE21946AFB769@OS0PR01MB6113.jpnprd01.prod.outlook.com
2021-04-22 05:48:54 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Use Relation opened by logicalrep_rel_open() instead of opening it
|
|
|
|
* again.
|
|
|
|
*/
|
2021-05-23 03:24:48 +02:00
|
|
|
InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0);
|
Fix relation leak for subscribers firing triggers in logical replication
Creating a trigger on a relation to which an apply operation is
triggered would cause a relation leak once the change gets committed,
as the executor would miss that the relation needs to be closed
beforehand. This issue got introduced with the refactoring done in
1375422c, where it becomes necessary to track relations within
es_opened_result_relations to make sure that they are closed.
We have discussed using ExecInitResultRelation() coupled with
ExecCloseResultRelations() for the relations in need of tracking by the
apply operations in the subscribers, which would simplify greatly the
opening and closing of indexes, but this requires a larger rework and
reorganization of the worker code, particularly for the tuple routing
part. And that's not really welcome post feature freeze. So, for now,
settle down to the same solution as TRUNCATE which is to fill in
es_opened_result_relations with the relation opened, to make sure that
ExecGetTriggerResultRel() finds them and that they get closed.
The code is lightly refactored so as a relation is not registered three
times for each DML code path, making the whole a bit easier to follow.
Reported-by: Tang Haiying, Shi Yu, Hou Zhijie
Author: Amit Langote, Masahiko Sawada, Hou Zhijie
Reviewed-by: Amit Kapila, Michael Paquier
Discussion: https://postgr.es/m/OS0PR01MB611383FA0FE92EB9DE21946AFB769@OS0PR01MB6113.jpnprd01.prod.outlook.com
2021-04-22 05:48:54 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We put the ResultRelInfo in the es_opened_result_relations list, even
|
|
|
|
* though we don't populate the es_result_relations array. That's a bit
|
|
|
|
* bogus, but it's enough to make ExecGetTriggerResultRel() find them.
|
|
|
|
*
|
|
|
|
* ExecOpenIndices() is not called here either, each execution path doing
|
|
|
|
* an apply operation being responsible for that.
|
|
|
|
*/
|
|
|
|
estate->es_opened_result_relations =
|
2021-05-23 03:24:48 +02:00
|
|
|
lappend(estate->es_opened_result_relations, resultRelInfo);
|
Fix relation leak for subscribers firing triggers in logical replication
Creating a trigger on a relation to which an apply operation is
triggered would cause a relation leak once the change gets committed,
as the executor would miss that the relation needs to be closed
beforehand. This issue got introduced with the refactoring done in
1375422c, where it becomes necessary to track relations within
es_opened_result_relations to make sure that they are closed.
We have discussed using ExecInitResultRelation() coupled with
ExecCloseResultRelations() for the relations in need of tracking by the
apply operations in the subscribers, which would simplify greatly the
opening and closing of indexes, but this requires a larger rework and
reorganization of the worker code, particularly for the tuple routing
part. And that's not really welcome post feature freeze. So, for now,
settle down to the same solution as TRUNCATE which is to fill in
es_opened_result_relations with the relation opened, to make sure that
ExecGetTriggerResultRel() finds them and that they get closed.
The code is lightly refactored so as a relation is not registered three
times for each DML code path, making the whole a bit easier to follow.
Reported-by: Tang Haiying, Shi Yu, Hou Zhijie
Author: Amit Langote, Masahiko Sawada, Hou Zhijie
Reviewed-by: Amit Kapila, Michael Paquier
Discussion: https://postgr.es/m/OS0PR01MB611383FA0FE92EB9DE21946AFB769@OS0PR01MB6113.jpnprd01.prod.outlook.com
2021-04-22 05:48:54 +02:00
|
|
|
|
2017-11-22 06:28:14 +01:00
|
|
|
estate->es_output_cid = GetCurrentCommandId(true);
|
|
|
|
|
2017-03-03 16:05:56 +01:00
|
|
|
/* Prepare to catch AFTER triggers. */
|
|
|
|
AfterTriggerBeginQuery();
|
|
|
|
|
2021-05-23 03:24:48 +02:00
|
|
|
/* other fields of edata remain NULL for now */
|
|
|
|
|
|
|
|
return edata;
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
Fix relation leak for subscribers firing triggers in logical replication
Creating a trigger on a relation to which an apply operation is
triggered would cause a relation leak once the change gets committed,
as the executor would miss that the relation needs to be closed
beforehand. This issue got introduced with the refactoring done in
1375422c, where it becomes necessary to track relations within
es_opened_result_relations to make sure that they are closed.
We have discussed using ExecInitResultRelation() coupled with
ExecCloseResultRelations() for the relations in need of tracking by the
apply operations in the subscribers, which would simplify greatly the
opening and closing of indexes, but this requires a larger rework and
reorganization of the worker code, particularly for the tuple routing
part. And that's not really welcome post feature freeze. So, for now,
settle down to the same solution as TRUNCATE which is to fill in
es_opened_result_relations with the relation opened, to make sure that
ExecGetTriggerResultRel() finds them and that they get closed.
The code is lightly refactored so as a relation is not registered three
times for each DML code path, making the whole a bit easier to follow.
Reported-by: Tang Haiying, Shi Yu, Hou Zhijie
Author: Amit Langote, Masahiko Sawada, Hou Zhijie
Reviewed-by: Amit Kapila, Michael Paquier
Discussion: https://postgr.es/m/OS0PR01MB611383FA0FE92EB9DE21946AFB769@OS0PR01MB6113.jpnprd01.prod.outlook.com
2021-04-22 05:48:54 +02:00
|
|
|
/*
|
|
|
|
* Finish any operations related to the executor state created by
|
2021-05-23 03:24:48 +02:00
|
|
|
* create_edata_for_relation().
|
Fix relation leak for subscribers firing triggers in logical replication
Creating a trigger on a relation to which an apply operation is
triggered would cause a relation leak once the change gets committed,
as the executor would miss that the relation needs to be closed
beforehand. This issue got introduced with the refactoring done in
1375422c, where it becomes necessary to track relations within
es_opened_result_relations to make sure that they are closed.
We have discussed using ExecInitResultRelation() coupled with
ExecCloseResultRelations() for the relations in need of tracking by the
apply operations in the subscribers, which would simplify greatly the
opening and closing of indexes, but this requires a larger rework and
reorganization of the worker code, particularly for the tuple routing
part. And that's not really welcome post feature freeze. So, for now,
settle down to the same solution as TRUNCATE which is to fill in
es_opened_result_relations with the relation opened, to make sure that
ExecGetTriggerResultRel() finds them and that they get closed.
The code is lightly refactored so as a relation is not registered three
times for each DML code path, making the whole a bit easier to follow.
Reported-by: Tang Haiying, Shi Yu, Hou Zhijie
Author: Amit Langote, Masahiko Sawada, Hou Zhijie
Reviewed-by: Amit Kapila, Michael Paquier
Discussion: https://postgr.es/m/OS0PR01MB611383FA0FE92EB9DE21946AFB769@OS0PR01MB6113.jpnprd01.prod.outlook.com
2021-04-22 05:48:54 +02:00
|
|
|
*/
|
|
|
|
static void
|
2021-05-23 03:24:48 +02:00
|
|
|
finish_edata(ApplyExecutionData *edata)
|
Fix relation leak for subscribers firing triggers in logical replication
Creating a trigger on a relation to which an apply operation is
triggered would cause a relation leak once the change gets committed,
as the executor would miss that the relation needs to be closed
beforehand. This issue got introduced with the refactoring done in
1375422c, where it becomes necessary to track relations within
es_opened_result_relations to make sure that they are closed.
We have discussed using ExecInitResultRelation() coupled with
ExecCloseResultRelations() for the relations in need of tracking by the
apply operations in the subscribers, which would simplify greatly the
opening and closing of indexes, but this requires a larger rework and
reorganization of the worker code, particularly for the tuple routing
part. And that's not really welcome post feature freeze. So, for now,
settle down to the same solution as TRUNCATE which is to fill in
es_opened_result_relations with the relation opened, to make sure that
ExecGetTriggerResultRel() finds them and that they get closed.
The code is lightly refactored so as a relation is not registered three
times for each DML code path, making the whole a bit easier to follow.
Reported-by: Tang Haiying, Shi Yu, Hou Zhijie
Author: Amit Langote, Masahiko Sawada, Hou Zhijie
Reviewed-by: Amit Kapila, Michael Paquier
Discussion: https://postgr.es/m/OS0PR01MB611383FA0FE92EB9DE21946AFB769@OS0PR01MB6113.jpnprd01.prod.outlook.com
2021-04-22 05:48:54 +02:00
|
|
|
{
|
2021-05-23 03:24:48 +02:00
|
|
|
EState *estate = edata->estate;
|
|
|
|
|
Fix relation leak for subscribers firing triggers in logical replication
Creating a trigger on a relation to which an apply operation is
triggered would cause a relation leak once the change gets committed,
as the executor would miss that the relation needs to be closed
beforehand. This issue got introduced with the refactoring done in
1375422c, where it becomes necessary to track relations within
es_opened_result_relations to make sure that they are closed.
We have discussed using ExecInitResultRelation() coupled with
ExecCloseResultRelations() for the relations in need of tracking by the
apply operations in the subscribers, which would simplify greatly the
opening and closing of indexes, but this requires a larger rework and
reorganization of the worker code, particularly for the tuple routing
part. And that's not really welcome post feature freeze. So, for now,
settle down to the same solution as TRUNCATE which is to fill in
es_opened_result_relations with the relation opened, to make sure that
ExecGetTriggerResultRel() finds them and that they get closed.
The code is lightly refactored so as a relation is not registered three
times for each DML code path, making the whole a bit easier to follow.
Reported-by: Tang Haiying, Shi Yu, Hou Zhijie
Author: Amit Langote, Masahiko Sawada, Hou Zhijie
Reviewed-by: Amit Kapila, Michael Paquier
Discussion: https://postgr.es/m/OS0PR01MB611383FA0FE92EB9DE21946AFB769@OS0PR01MB6113.jpnprd01.prod.outlook.com
2021-04-22 05:48:54 +02:00
|
|
|
/* Handle any queued AFTER triggers. */
|
|
|
|
AfterTriggerEndQuery(estate);
|
|
|
|
|
2021-05-23 03:24:48 +02:00
|
|
|
/* Shut down tuple routing, if any was done. */
|
|
|
|
if (edata->proute)
|
|
|
|
ExecCleanupTupleRouting(edata->mtstate, edata->proute);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Cleanup. It might seem that we should call ExecCloseResultRelations()
|
|
|
|
* here, but we intentionally don't. It would close the rel we added to
|
|
|
|
* es_opened_result_relations above, which is wrong because we took no
|
|
|
|
* corresponding refcount. We rely on ExecCleanupTupleRouting() to close
|
|
|
|
* any other relations opened during execution.
|
|
|
|
*/
|
Fix relation leak for subscribers firing triggers in logical replication
Creating a trigger on a relation to which an apply operation is
triggered would cause a relation leak once the change gets committed,
as the executor would miss that the relation needs to be closed
beforehand. This issue got introduced with the refactoring done in
1375422c, where it becomes necessary to track relations within
es_opened_result_relations to make sure that they are closed.
We have discussed using ExecInitResultRelation() coupled with
ExecCloseResultRelations() for the relations in need of tracking by the
apply operations in the subscribers, which would simplify greatly the
opening and closing of indexes, but this requires a larger rework and
reorganization of the worker code, particularly for the tuple routing
part. And that's not really welcome post feature freeze. So, for now,
settle down to the same solution as TRUNCATE which is to fill in
es_opened_result_relations with the relation opened, to make sure that
ExecGetTriggerResultRel() finds them and that they get closed.
The code is lightly refactored so as a relation is not registered three
times for each DML code path, making the whole a bit easier to follow.
Reported-by: Tang Haiying, Shi Yu, Hou Zhijie
Author: Amit Langote, Masahiko Sawada, Hou Zhijie
Reviewed-by: Amit Kapila, Michael Paquier
Discussion: https://postgr.es/m/OS0PR01MB611383FA0FE92EB9DE21946AFB769@OS0PR01MB6113.jpnprd01.prod.outlook.com
2021-04-22 05:48:54 +02:00
|
|
|
ExecResetTupleTable(estate->es_tupleTable, false);
|
|
|
|
FreeExecutorState(estate);
|
2021-05-23 03:24:48 +02:00
|
|
|
pfree(edata);
|
Fix relation leak for subscribers firing triggers in logical replication
Creating a trigger on a relation to which an apply operation is
triggered would cause a relation leak once the change gets committed,
as the executor would miss that the relation needs to be closed
beforehand. This issue got introduced with the refactoring done in
1375422c, where it becomes necessary to track relations within
es_opened_result_relations to make sure that they are closed.
We have discussed using ExecInitResultRelation() coupled with
ExecCloseResultRelations() for the relations in need of tracking by the
apply operations in the subscribers, which would simplify greatly the
opening and closing of indexes, but this requires a larger rework and
reorganization of the worker code, particularly for the tuple routing
part. And that's not really welcome post feature freeze. So, for now,
settle down to the same solution as TRUNCATE which is to fill in
es_opened_result_relations with the relation opened, to make sure that
ExecGetTriggerResultRel() finds them and that they get closed.
The code is lightly refactored so as a relation is not registered three
times for each DML code path, making the whole a bit easier to follow.
Reported-by: Tang Haiying, Shi Yu, Hou Zhijie
Author: Amit Langote, Masahiko Sawada, Hou Zhijie
Reviewed-by: Amit Kapila, Michael Paquier
Discussion: https://postgr.es/m/OS0PR01MB611383FA0FE92EB9DE21946AFB769@OS0PR01MB6113.jpnprd01.prod.outlook.com
2021-04-22 05:48:54 +02:00
|
|
|
}
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Executes default values for columns for which we can't map to remote
|
|
|
|
* relation columns.
|
|
|
|
*
|
|
|
|
* This allows us to support tables which have more columns on the downstream
|
|
|
|
* than on the upstream.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
slot_fill_defaults(LogicalRepRelMapEntry *rel, EState *estate,
|
|
|
|
TupleTableSlot *slot)
|
|
|
|
{
|
|
|
|
TupleDesc desc = RelationGetDescr(rel->localrel);
|
|
|
|
int num_phys_attrs = desc->natts;
|
|
|
|
int i;
|
|
|
|
int attnum,
|
|
|
|
num_defaults = 0;
|
|
|
|
int *defmap;
|
|
|
|
ExprState **defexprs;
|
|
|
|
ExprContext *econtext;
|
|
|
|
|
|
|
|
econtext = GetPerTupleExprContext(estate);
|
|
|
|
|
|
|
|
/* We got all the data via replication, no need to evaluate anything. */
|
|
|
|
if (num_phys_attrs == rel->remoterel.natts)
|
|
|
|
return;
|
|
|
|
|
|
|
|
defmap = (int *) palloc(num_phys_attrs * sizeof(int));
|
|
|
|
defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
|
|
|
|
|
2019-12-18 08:23:02 +01:00
|
|
|
Assert(rel->attrmap->maplen == num_phys_attrs);
|
2017-01-19 18:00:00 +01:00
|
|
|
for (attnum = 0; attnum < num_phys_attrs; attnum++)
|
|
|
|
{
|
|
|
|
Expr *defexpr;
|
|
|
|
|
2019-03-30 08:13:09 +01:00
|
|
|
if (TupleDescAttr(desc, attnum)->attisdropped || TupleDescAttr(desc, attnum)->attgenerated)
|
2017-01-19 18:00:00 +01:00
|
|
|
continue;
|
|
|
|
|
2019-12-18 08:23:02 +01:00
|
|
|
if (rel->attrmap->attnums[attnum] >= 0)
|
2017-01-19 18:00:00 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
defexpr = (Expr *) build_column_default(rel->localrel, attnum + 1);
|
|
|
|
|
|
|
|
if (defexpr != NULL)
|
|
|
|
{
|
|
|
|
/* Run the expression through planner */
|
|
|
|
defexpr = expression_planner(defexpr);
|
|
|
|
|
|
|
|
/* Initialize executable expression in copycontext */
|
|
|
|
defexprs[num_defaults] = ExecInitExpr(defexpr, NULL);
|
|
|
|
defmap[num_defaults] = attnum;
|
|
|
|
num_defaults++;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < num_defaults; i++)
|
|
|
|
slot->tts_values[defmap[i]] =
|
|
|
|
ExecEvalExpr(defexprs[i], econtext, &slot->tts_isnull[defmap[i]]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2020-07-18 18:44:51 +02:00
|
|
|
* Store tuple data into slot.
|
|
|
|
*
|
|
|
|
* Incoming data can be either text or binary format.
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
|
|
|
static void
|
2020-07-18 18:44:51 +02:00
|
|
|
slot_store_data(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
|
|
|
|
LogicalRepTupleData *tupleData)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
|
|
|
int natts = slot->tts_tupleDescriptor->natts;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
ExecClearTuple(slot);
|
|
|
|
|
2020-07-18 18:44:51 +02:00
|
|
|
/* Call the "in" function for each non-dropped, non-null attribute */
|
2019-12-18 08:23:02 +01:00
|
|
|
Assert(natts == rel->attrmap->maplen);
|
2017-01-19 18:00:00 +01:00
|
|
|
for (i = 0; i < natts; i++)
|
|
|
|
{
|
2017-08-20 20:19:07 +02:00
|
|
|
Form_pg_attribute att = TupleDescAttr(slot->tts_tupleDescriptor, i);
|
2019-12-18 08:23:02 +01:00
|
|
|
int remoteattnum = rel->attrmap->attnums[i];
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2020-07-18 18:44:51 +02:00
|
|
|
if (!att->attisdropped && remoteattnum >= 0)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
2020-07-18 18:44:51 +02:00
|
|
|
StringInfo colvalue = &tupleData->colvalues[remoteattnum];
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2020-07-20 19:40:16 +02:00
|
|
|
Assert(remoteattnum < tupleData->ncols);
|
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Set attnum for error callback */
|
|
|
|
apply_error_callback_arg.remote_attnum = remoteattnum;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2020-07-18 18:44:51 +02:00
|
|
|
if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_TEXT)
|
|
|
|
{
|
|
|
|
Oid typinput;
|
|
|
|
Oid typioparam;
|
|
|
|
|
|
|
|
getTypeInputInfo(att->atttypid, &typinput, &typioparam);
|
|
|
|
slot->tts_values[i] =
|
|
|
|
OidInputFunctionCall(typinput, colvalue->data,
|
|
|
|
typioparam, att->atttypmod);
|
|
|
|
slot->tts_isnull[i] = false;
|
|
|
|
}
|
|
|
|
else if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_BINARY)
|
|
|
|
{
|
|
|
|
Oid typreceive;
|
|
|
|
Oid typioparam;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In some code paths we may be asked to re-parse the same
|
|
|
|
* tuple data. Reset the StringInfo's cursor so that works.
|
|
|
|
*/
|
|
|
|
colvalue->cursor = 0;
|
|
|
|
|
|
|
|
getTypeBinaryInputInfo(att->atttypid, &typreceive, &typioparam);
|
|
|
|
slot->tts_values[i] =
|
|
|
|
OidReceiveFunctionCall(typreceive, colvalue,
|
|
|
|
typioparam, att->atttypmod);
|
|
|
|
|
|
|
|
/* Trouble if it didn't eat the whole buffer */
|
|
|
|
if (colvalue->cursor != colvalue->len)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
|
|
|
|
errmsg("incorrect binary data format in logical replication column %d",
|
|
|
|
remoteattnum + 1)));
|
|
|
|
slot->tts_isnull[i] = false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* NULL value from remote. (We don't expect to see
|
|
|
|
* LOGICALREP_COLUMN_UNCHANGED here, but if we do, treat it as
|
|
|
|
* NULL.)
|
|
|
|
*/
|
|
|
|
slot->tts_values[i] = (Datum) 0;
|
|
|
|
slot->tts_isnull[i] = true;
|
|
|
|
}
|
logical replication: fix OID type mapping mechanism
The logical replication type map seems to have been misused by its only
caller -- it would try to use the remote OID as input for local type
routines, which unsurprisingly could result in bogus "cache lookup
failed for type XYZ" errors, or random other type names being picked up
if they happened to use the right OID. Fix that, changing
Oid logicalrep_typmap_getid(Oid remoteid) to
char *logicalrep_typmap_gettypname(Oid remoteid)
which is more useful. If the remote type is not part of the typmap,
this simply prints "unrecognized type" instead of choking trying to
figure out -- a pointless exercise (because the only input for that
comes from replication messages, which are not under the local node's
control) and dangerous to boot, when called from within an error context
callback.
Once that is done, it comes to light that the local OID in the typmap
entry was not being used for anything; the type/schema names are what we
need, so remove local type OID from that struct.
Once you do that, it becomes pointless to attach a callback to regular
syscache invalidation. So remove that also.
Reported-by: Dang Minh Huong
Author: Masahiko Sawada
Reviewed-by: Álvaro Herrera, Petr Jelínek, Dang Minh Huong, Atsushi Torikoshi
Discussion: https://postgr.es/m/75DB81BEEA95B445AE6D576A0A5C9E936A6BE964@BPXM05GP.gisp.nec.co.jp
Discussion: https://postgr.es/m/75DB81BEEA95B445AE6D576A0A5C9E936A6C4B0A@BPXM05GP.gisp.nec.co.jp
2018-03-15 01:34:26 +01:00
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Reset attnum for error callback */
|
|
|
|
apply_error_callback_arg.remote_attnum = -1;
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2020-07-18 18:44:51 +02:00
|
|
|
* We assign NULL to dropped attributes and missing values
|
|
|
|
* (missing values should be later filled using
|
2017-01-19 18:00:00 +01:00
|
|
|
* slot_fill_defaults).
|
|
|
|
*/
|
|
|
|
slot->tts_values[i] = (Datum) 0;
|
|
|
|
slot->tts_isnull[i] = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ExecStoreVirtualTuple(slot);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2020-07-18 18:44:51 +02:00
|
|
|
* Replace updated columns with data from the LogicalRepTupleData struct.
|
2017-01-19 18:00:00 +01:00
|
|
|
* This is somewhat similar to heap_modify_tuple but also calls the type
|
2019-11-22 17:31:19 +01:00
|
|
|
* input functions on the user data.
|
2020-07-18 18:44:51 +02:00
|
|
|
*
|
|
|
|
* "slot" is filled with a copy of the tuple in "srcslot", replacing
|
|
|
|
* columns provided in "tupleData" and leaving others as-is.
|
|
|
|
*
|
2019-11-22 17:31:19 +01:00
|
|
|
* Caution: unreplaced pass-by-ref columns in "slot" will point into the
|
|
|
|
* storage for "srcslot". This is OK for current usage, but someday we may
|
|
|
|
* need to materialize "slot" at the end to make it independent of "srcslot".
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
|
|
|
static void
|
2020-07-18 18:44:51 +02:00
|
|
|
slot_modify_data(TupleTableSlot *slot, TupleTableSlot *srcslot,
|
|
|
|
LogicalRepRelMapEntry *rel,
|
|
|
|
LogicalRepTupleData *tupleData)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
|
|
|
int natts = slot->tts_tupleDescriptor->natts;
|
|
|
|
int i;
|
|
|
|
|
2019-11-22 17:31:19 +01:00
|
|
|
/* We'll fill "slot" with a virtual tuple, so we must start with ... */
|
2017-01-19 18:00:00 +01:00
|
|
|
ExecClearTuple(slot);
|
|
|
|
|
2019-11-22 17:31:19 +01:00
|
|
|
/*
|
|
|
|
* Copy all the column data from srcslot, so that we'll have valid values
|
|
|
|
* for unreplaced columns.
|
|
|
|
*/
|
|
|
|
Assert(natts == srcslot->tts_tupleDescriptor->natts);
|
|
|
|
slot_getallattrs(srcslot);
|
|
|
|
memcpy(slot->tts_values, srcslot->tts_values, natts * sizeof(Datum));
|
|
|
|
memcpy(slot->tts_isnull, srcslot->tts_isnull, natts * sizeof(bool));
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/* Call the "in" function for each replaced attribute */
|
2019-12-18 08:23:02 +01:00
|
|
|
Assert(natts == rel->attrmap->maplen);
|
2017-01-19 18:00:00 +01:00
|
|
|
for (i = 0; i < natts; i++)
|
|
|
|
{
|
2017-08-20 20:19:07 +02:00
|
|
|
Form_pg_attribute att = TupleDescAttr(slot->tts_tupleDescriptor, i);
|
2019-12-18 08:23:02 +01:00
|
|
|
int remoteattnum = rel->attrmap->attnums[i];
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2017-11-03 16:59:20 +01:00
|
|
|
if (remoteattnum < 0)
|
2017-01-19 18:00:00 +01:00
|
|
|
continue;
|
|
|
|
|
2020-07-20 19:40:16 +02:00
|
|
|
Assert(remoteattnum < tupleData->ncols);
|
|
|
|
|
2020-07-18 18:44:51 +02:00
|
|
|
if (tupleData->colstatus[remoteattnum] != LOGICALREP_COLUMN_UNCHANGED)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
2020-07-18 18:44:51 +02:00
|
|
|
StringInfo colvalue = &tupleData->colvalues[remoteattnum];
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Set attnum for error callback */
|
|
|
|
apply_error_callback_arg.remote_attnum = remoteattnum;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2020-07-18 18:44:51 +02:00
|
|
|
if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_TEXT)
|
|
|
|
{
|
|
|
|
Oid typinput;
|
|
|
|
Oid typioparam;
|
|
|
|
|
|
|
|
getTypeInputInfo(att->atttypid, &typinput, &typioparam);
|
|
|
|
slot->tts_values[i] =
|
|
|
|
OidInputFunctionCall(typinput, colvalue->data,
|
|
|
|
typioparam, att->atttypmod);
|
|
|
|
slot->tts_isnull[i] = false;
|
|
|
|
}
|
|
|
|
else if (tupleData->colstatus[remoteattnum] == LOGICALREP_COLUMN_BINARY)
|
|
|
|
{
|
|
|
|
Oid typreceive;
|
|
|
|
Oid typioparam;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In some code paths we may be asked to re-parse the same
|
|
|
|
* tuple data. Reset the StringInfo's cursor so that works.
|
|
|
|
*/
|
|
|
|
colvalue->cursor = 0;
|
|
|
|
|
|
|
|
getTypeBinaryInputInfo(att->atttypid, &typreceive, &typioparam);
|
|
|
|
slot->tts_values[i] =
|
|
|
|
OidReceiveFunctionCall(typreceive, colvalue,
|
|
|
|
typioparam, att->atttypmod);
|
|
|
|
|
|
|
|
/* Trouble if it didn't eat the whole buffer */
|
|
|
|
if (colvalue->cursor != colvalue->len)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
|
|
|
|
errmsg("incorrect binary data format in logical replication column %d",
|
|
|
|
remoteattnum + 1)));
|
|
|
|
slot->tts_isnull[i] = false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* must be LOGICALREP_COLUMN_NULL */
|
|
|
|
slot->tts_values[i] = (Datum) 0;
|
|
|
|
slot->tts_isnull[i] = true;
|
|
|
|
}
|
logical replication: fix OID type mapping mechanism
The logical replication type map seems to have been misused by its only
caller -- it would try to use the remote OID as input for local type
routines, which unsurprisingly could result in bogus "cache lookup
failed for type XYZ" errors, or random other type names being picked up
if they happened to use the right OID. Fix that, changing
Oid logicalrep_typmap_getid(Oid remoteid) to
char *logicalrep_typmap_gettypname(Oid remoteid)
which is more useful. If the remote type is not part of the typmap,
this simply prints "unrecognized type" instead of choking trying to
figure out -- a pointless exercise (because the only input for that
comes from replication messages, which are not under the local node's
control) and dangerous to boot, when called from within an error context
callback.
Once that is done, it comes to light that the local OID in the typmap
entry was not being used for anything; the type/schema names are what we
need, so remove local type OID from that struct.
Once you do that, it becomes pointless to attach a callback to regular
syscache invalidation. So remove that also.
Reported-by: Dang Minh Huong
Author: Masahiko Sawada
Reviewed-by: Álvaro Herrera, Petr Jelínek, Dang Minh Huong, Atsushi Torikoshi
Discussion: https://postgr.es/m/75DB81BEEA95B445AE6D576A0A5C9E936A6BE964@BPXM05GP.gisp.nec.co.jp
Discussion: https://postgr.es/m/75DB81BEEA95B445AE6D576A0A5C9E936A6C4B0A@BPXM05GP.gisp.nec.co.jp
2018-03-15 01:34:26 +01:00
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Reset attnum for error callback */
|
|
|
|
apply_error_callback_arg.remote_attnum = -1;
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-22 17:31:19 +01:00
|
|
|
/* And finally, declare that "slot" contains a valid virtual tuple */
|
2017-01-19 18:00:00 +01:00
|
|
|
ExecStoreVirtualTuple(slot);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle BEGIN message.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_begin(StringInfo s)
|
|
|
|
{
|
|
|
|
LogicalRepBeginData begin_data;
|
|
|
|
|
|
|
|
logicalrep_read_begin(s, &begin_data);
|
2021-08-27 05:00:23 +02:00
|
|
|
set_apply_error_context_xact(begin_data.xid, begin_data.committime);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
remote_final_lsn = begin_data.final_lsn;
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
in_remote_transaction = true;
|
|
|
|
|
|
|
|
pgstat_report_activity(STATE_RUNNING, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle COMMIT message.
|
|
|
|
*
|
|
|
|
* TODO, support tracking of multiple origins
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_commit(StringInfo s)
|
|
|
|
{
|
|
|
|
LogicalRepCommitData commit_data;
|
|
|
|
|
|
|
|
logicalrep_read_commit(s, &commit_data);
|
|
|
|
|
2021-06-12 18:59:15 +02:00
|
|
|
if (commit_data.commit_lsn != remote_final_lsn)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg_internal("incorrect commit LSN %X/%X in commit message (expected %X/%X)",
|
|
|
|
LSN_FORMAT_ARGS(commit_data.commit_lsn),
|
|
|
|
LSN_FORMAT_ARGS(remote_final_lsn))));
|
2017-03-23 13:36:36 +01:00
|
|
|
|
2021-07-30 04:47:38 +02:00
|
|
|
apply_handle_commit_internal(&commit_data);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
/* Process any tables that are being synchronized in parallel. */
|
|
|
|
process_syncing_tables(commit_data.end_lsn);
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
pgstat_report_activity(STATE_IDLE, NULL);
|
2021-08-27 05:00:23 +02:00
|
|
|
reset_apply_error_context_info();
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
/*
|
|
|
|
* Handle BEGIN PREPARE message.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_begin_prepare(StringInfo s)
|
|
|
|
{
|
|
|
|
LogicalRepPreparedTxnData begin_data;
|
|
|
|
|
|
|
|
/* Tablesync should never receive prepare. */
|
|
|
|
if (am_tablesync_worker())
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg_internal("tablesync worker received a BEGIN PREPARE message")));
|
|
|
|
|
|
|
|
logicalrep_read_begin_prepare(s, &begin_data);
|
2021-08-27 05:00:23 +02:00
|
|
|
set_apply_error_context_xact(begin_data.xid, begin_data.prepare_time);
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
|
|
|
|
remote_final_lsn = begin_data.prepare_lsn;
|
|
|
|
|
|
|
|
in_remote_transaction = true;
|
|
|
|
|
|
|
|
pgstat_report_activity(STATE_RUNNING, NULL);
|
|
|
|
}
|
|
|
|
|
2021-07-29 12:21:45 +02:00
|
|
|
/*
|
|
|
|
* Common function to prepare the GID.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_prepare_internal(LogicalRepPreparedTxnData *prepare_data)
|
|
|
|
{
|
|
|
|
char gid[GIDSIZE];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compute unique GID for two_phase transactions. We don't use GID of
|
|
|
|
* prepared transaction sent by server as that can lead to deadlock when
|
|
|
|
* we have multiple subscriptions from same node point to publications on
|
|
|
|
* the same node. See comments atop worker.c
|
|
|
|
*/
|
|
|
|
TwoPhaseTransactionGid(MySubscription->oid, prepare_data->xid,
|
|
|
|
gid, sizeof(gid));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BeginTransactionBlock is necessary to balance the EndTransactionBlock
|
|
|
|
* called within the PrepareTransactionBlock below.
|
|
|
|
*/
|
|
|
|
BeginTransactionBlock();
|
|
|
|
CommitTransactionCommand(); /* Completes the preceding Begin command. */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update origin state so we can restart streaming from correct position
|
|
|
|
* in case of crash.
|
|
|
|
*/
|
|
|
|
replorigin_session_origin_lsn = prepare_data->end_lsn;
|
|
|
|
replorigin_session_origin_timestamp = prepare_data->prepare_time;
|
|
|
|
|
|
|
|
PrepareTransactionBlock(gid);
|
|
|
|
}
|
|
|
|
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
/*
|
|
|
|
* Handle PREPARE message.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_prepare(StringInfo s)
|
|
|
|
{
|
|
|
|
LogicalRepPreparedTxnData prepare_data;
|
|
|
|
|
|
|
|
logicalrep_read_prepare(s, &prepare_data);
|
|
|
|
|
|
|
|
if (prepare_data.prepare_lsn != remote_final_lsn)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg_internal("incorrect prepare LSN %X/%X in prepare message (expected %X/%X)",
|
|
|
|
LSN_FORMAT_ARGS(prepare_data.prepare_lsn),
|
|
|
|
LSN_FORMAT_ARGS(remote_final_lsn))));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Unlike commit, here, we always prepare the transaction even though no
|
|
|
|
* change has happened in this transaction. It is done this way because at
|
|
|
|
* commit prepared time, we won't know whether we have skipped preparing a
|
|
|
|
* transaction because of no change.
|
|
|
|
*
|
|
|
|
* XXX, We can optimize such that at commit prepared time, we first check
|
|
|
|
* whether we have prepared the transaction or not but that doesn't seem
|
|
|
|
* worthwhile because such cases shouldn't be common.
|
|
|
|
*/
|
|
|
|
begin_replication_step();
|
|
|
|
|
2021-07-29 12:21:45 +02:00
|
|
|
apply_handle_prepare_internal(&prepare_data);
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
|
|
|
|
end_replication_step();
|
|
|
|
CommitTransactionCommand();
|
|
|
|
pgstat_report_stat(false);
|
|
|
|
|
|
|
|
store_flush_position(prepare_data.end_lsn);
|
|
|
|
|
|
|
|
in_remote_transaction = false;
|
|
|
|
|
|
|
|
/* Process any tables that are being synchronized in parallel. */
|
|
|
|
process_syncing_tables(prepare_data.end_lsn);
|
|
|
|
|
|
|
|
pgstat_report_activity(STATE_IDLE, NULL);
|
2021-08-27 05:00:23 +02:00
|
|
|
reset_apply_error_context_info();
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle a COMMIT PREPARED of a previously PREPARED transaction.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_commit_prepared(StringInfo s)
|
|
|
|
{
|
|
|
|
LogicalRepCommitPreparedTxnData prepare_data;
|
|
|
|
char gid[GIDSIZE];
|
|
|
|
|
|
|
|
logicalrep_read_commit_prepared(s, &prepare_data);
|
2021-08-27 05:00:23 +02:00
|
|
|
set_apply_error_context_xact(prepare_data.xid, prepare_data.commit_time);
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
|
|
|
|
/* Compute GID for two_phase transactions. */
|
|
|
|
TwoPhaseTransactionGid(MySubscription->oid, prepare_data.xid,
|
|
|
|
gid, sizeof(gid));
|
|
|
|
|
|
|
|
/* There is no transaction when COMMIT PREPARED is called */
|
|
|
|
begin_replication_step();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update origin state so we can restart streaming from correct position
|
|
|
|
* in case of crash.
|
|
|
|
*/
|
|
|
|
replorigin_session_origin_lsn = prepare_data.end_lsn;
|
|
|
|
replorigin_session_origin_timestamp = prepare_data.commit_time;
|
|
|
|
|
|
|
|
FinishPreparedTransaction(gid, true);
|
|
|
|
end_replication_step();
|
|
|
|
CommitTransactionCommand();
|
|
|
|
pgstat_report_stat(false);
|
|
|
|
|
|
|
|
store_flush_position(prepare_data.end_lsn);
|
|
|
|
in_remote_transaction = false;
|
|
|
|
|
|
|
|
/* Process any tables that are being synchronized in parallel. */
|
|
|
|
process_syncing_tables(prepare_data.end_lsn);
|
|
|
|
|
|
|
|
pgstat_report_activity(STATE_IDLE, NULL);
|
2021-08-27 05:00:23 +02:00
|
|
|
reset_apply_error_context_info();
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle a ROLLBACK PREPARED of a previously PREPARED TRANSACTION.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_rollback_prepared(StringInfo s)
|
|
|
|
{
|
|
|
|
LogicalRepRollbackPreparedTxnData rollback_data;
|
|
|
|
char gid[GIDSIZE];
|
|
|
|
|
|
|
|
logicalrep_read_rollback_prepared(s, &rollback_data);
|
2021-08-27 05:00:23 +02:00
|
|
|
set_apply_error_context_xact(rollback_data.xid, rollback_data.rollback_time);
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
|
|
|
|
/* Compute GID for two_phase transactions. */
|
|
|
|
TwoPhaseTransactionGid(MySubscription->oid, rollback_data.xid,
|
|
|
|
gid, sizeof(gid));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It is possible that we haven't received prepare because it occurred
|
|
|
|
* before walsender reached a consistent point or the two_phase was still
|
|
|
|
* not enabled by that time, so in such cases, we need to skip rollback
|
|
|
|
* prepared.
|
|
|
|
*/
|
|
|
|
if (LookupGXact(gid, rollback_data.prepare_end_lsn,
|
|
|
|
rollback_data.prepare_time))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Update origin state so we can restart streaming from correct
|
|
|
|
* position in case of crash.
|
|
|
|
*/
|
|
|
|
replorigin_session_origin_lsn = rollback_data.rollback_end_lsn;
|
|
|
|
replorigin_session_origin_timestamp = rollback_data.rollback_time;
|
|
|
|
|
|
|
|
/* There is no transaction when ABORT/ROLLBACK PREPARED is called */
|
|
|
|
begin_replication_step();
|
|
|
|
FinishPreparedTransaction(gid, false);
|
|
|
|
end_replication_step();
|
|
|
|
CommitTransactionCommand();
|
|
|
|
}
|
|
|
|
|
|
|
|
pgstat_report_stat(false);
|
|
|
|
|
|
|
|
store_flush_position(rollback_data.rollback_end_lsn);
|
|
|
|
in_remote_transaction = false;
|
|
|
|
|
|
|
|
/* Process any tables that are being synchronized in parallel. */
|
|
|
|
process_syncing_tables(rollback_data.rollback_end_lsn);
|
|
|
|
|
|
|
|
pgstat_report_activity(STATE_IDLE, NULL);
|
2021-08-27 05:00:23 +02:00
|
|
|
reset_apply_error_context_info();
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
}
|
|
|
|
|
2021-08-04 04:17:06 +02:00
|
|
|
/*
|
|
|
|
* Handle STREAM PREPARE.
|
|
|
|
*
|
|
|
|
* Logic is in two parts:
|
|
|
|
* 1. Replay all the spooled operations
|
|
|
|
* 2. Mark the transaction as prepared
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_stream_prepare(StringInfo s)
|
|
|
|
{
|
|
|
|
LogicalRepPreparedTxnData prepare_data;
|
|
|
|
|
|
|
|
if (in_streamed_transaction)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg_internal("STREAM PREPARE message without STREAM STOP")));
|
|
|
|
|
|
|
|
/* Tablesync should never receive prepare. */
|
|
|
|
if (am_tablesync_worker())
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg_internal("tablesync worker received a STREAM PREPARE message")));
|
|
|
|
|
|
|
|
logicalrep_read_stream_prepare(s, &prepare_data);
|
2021-08-27 05:00:23 +02:00
|
|
|
set_apply_error_context_xact(prepare_data.xid, prepare_data.prepare_time);
|
2021-08-04 04:17:06 +02:00
|
|
|
|
|
|
|
elog(DEBUG1, "received prepare for streamed transaction %u", prepare_data.xid);
|
|
|
|
|
|
|
|
/* Replay all the spooled operations. */
|
|
|
|
apply_spooled_messages(prepare_data.xid, prepare_data.prepare_lsn);
|
|
|
|
|
|
|
|
/* Mark the transaction as prepared. */
|
|
|
|
apply_handle_prepare_internal(&prepare_data);
|
|
|
|
|
|
|
|
CommitTransactionCommand();
|
|
|
|
|
|
|
|
pgstat_report_stat(false);
|
|
|
|
|
|
|
|
store_flush_position(prepare_data.end_lsn);
|
|
|
|
|
|
|
|
in_remote_transaction = false;
|
|
|
|
|
|
|
|
/* unlink the files with serialized changes and subxact info. */
|
|
|
|
stream_cleanup_files(MyLogicalRepWorker->subid, prepare_data.xid);
|
|
|
|
|
|
|
|
/* Process any tables that are being synchronized in parallel. */
|
|
|
|
process_syncing_tables(prepare_data.end_lsn);
|
|
|
|
|
|
|
|
pgstat_report_activity(STATE_IDLE, NULL);
|
2021-08-27 05:00:23 +02:00
|
|
|
|
|
|
|
reset_apply_error_context_info();
|
2021-08-04 04:17:06 +02:00
|
|
|
}
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Handle ORIGIN message.
|
|
|
|
*
|
|
|
|
* TODO, support tracking of multiple origins
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_origin(StringInfo s)
|
|
|
|
{
|
|
|
|
/*
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
* ORIGIN message can only come inside streaming transaction or inside
|
|
|
|
* remote transaction and before any actual writes.
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
if (!in_streamed_transaction &&
|
|
|
|
(!in_remote_transaction ||
|
|
|
|
(IsTransactionState() && !am_tablesync_worker())))
|
2017-01-19 18:00:00 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
2021-06-12 18:59:15 +02:00
|
|
|
errmsg_internal("ORIGIN message sent out of order")));
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
/*
|
|
|
|
* Handle STREAM START message.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_stream_start(StringInfo s)
|
|
|
|
{
|
|
|
|
bool first_segment;
|
|
|
|
|
2021-06-12 18:59:15 +02:00
|
|
|
if (in_streamed_transaction)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg_internal("duplicate STREAM START message")));
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Start a transaction on stream start, this transaction will be committed
|
2021-01-19 03:40:13 +01:00
|
|
|
* on the stream stop unless it is a tablesync worker in which case it
|
|
|
|
* will be committed after processing all the messages. We need the
|
|
|
|
* transaction for handling the buffile, used for serializing the
|
|
|
|
* streaming data and subxact info.
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
*/
|
2021-06-10 18:27:27 +02:00
|
|
|
begin_replication_step();
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
/* notify handle methods we're processing a remote transaction */
|
|
|
|
in_streamed_transaction = true;
|
|
|
|
|
|
|
|
/* extract XID of the top-level transaction */
|
|
|
|
stream_xid = logicalrep_read_stream_start(s, &first_segment);
|
|
|
|
|
2021-06-12 18:59:15 +02:00
|
|
|
if (!TransactionIdIsValid(stream_xid))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg_internal("invalid transaction ID in streamed replication transaction")));
|
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
set_apply_error_context_xact(stream_xid, 0);
|
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
/*
|
2021-09-02 04:43:46 +02:00
|
|
|
* Initialize the worker's stream_fileset if we haven't yet. This will be
|
|
|
|
* used for the entire duration of the worker so create it in a permanent
|
|
|
|
* context. We create this on the very first streaming message from any
|
|
|
|
* transaction and then use it for this and other streaming transactions.
|
|
|
|
* Now, we could create a fileset at the start of the worker as well but
|
|
|
|
* then we won't be sure that it will ever be used.
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
*/
|
2021-09-02 04:43:46 +02:00
|
|
|
if (MyLogicalRepWorker->stream_fileset == NULL)
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
{
|
2021-09-02 04:43:46 +02:00
|
|
|
MemoryContext oldctx;
|
|
|
|
|
|
|
|
oldctx = MemoryContextSwitchTo(ApplyContext);
|
|
|
|
|
|
|
|
MyLogicalRepWorker->stream_fileset = palloc(sizeof(FileSet));
|
|
|
|
FileSetInit(MyLogicalRepWorker->stream_fileset);
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* open the spool file for this transaction */
|
|
|
|
stream_open_file(MyLogicalRepWorker->subid, stream_xid, first_segment);
|
|
|
|
|
|
|
|
/* if this is not the first segment, open existing subxact file */
|
|
|
|
if (!first_segment)
|
|
|
|
subxact_info_read(MyLogicalRepWorker->subid, stream_xid);
|
|
|
|
|
|
|
|
pgstat_report_activity(STATE_RUNNING, NULL);
|
2021-06-10 18:27:27 +02:00
|
|
|
|
|
|
|
end_replication_step();
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle STREAM STOP message.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_stream_stop(StringInfo s)
|
|
|
|
{
|
2021-06-12 18:59:15 +02:00
|
|
|
if (!in_streamed_transaction)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg_internal("STREAM STOP message without STREAM START")));
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Close the file with serialized changes, and serialize information about
|
|
|
|
* subxacts for the toplevel transaction.
|
|
|
|
*/
|
|
|
|
subxact_info_write(MyLogicalRepWorker->subid, stream_xid);
|
|
|
|
stream_close_file();
|
|
|
|
|
|
|
|
/* We must be in a valid transaction state */
|
|
|
|
Assert(IsTransactionState());
|
|
|
|
|
Allow multiple xacts during table sync in logical replication.
For the initial table data synchronization in logical replication, we use
a single transaction to copy the entire table and then synchronize the
position in the stream with the main apply worker.
There are multiple downsides of this approach: (a) We have to perform the
entire copy operation again if there is any error (network breakdown,
error in the database operation, etc.) while we synchronize the WAL
position between tablesync worker and apply worker; this will be onerous
especially for large copies, (b) Using a single transaction in the
synchronization-phase (where we can receive WAL from multiple
transactions) will have the risk of exceeding the CID limit, (c) The slot
will hold the WAL till the entire sync is complete because we never commit
till the end.
This patch solves all the above downsides by allowing multiple
transactions during the tablesync phase. The initial copy is done in a
single transaction and after that, we commit each transaction as we
receive. To allow recovery after any error or crash, we use a permanent
slot and origin to track the progress. The slot and origin will be removed
once we finish the synchronization of the table. We also remove slot and
origin of tablesync workers if the user performs DROP SUBSCRIPTION .. or
ALTER SUBSCRIPTION .. REFERESH and some of the table syncs are still not
finished.
The commands ALTER SUBSCRIPTION ... REFRESH PUBLICATION and
ALTER SUBSCRIPTION ... SET PUBLICATION ... with refresh option as true
cannot be executed inside a transaction block because they can now drop
the slots for which we have no provision to rollback.
This will also open up the path for logical replication of 2PC
transactions on the subscriber side. Previously, we can't do that because
of the requirement of maintaining a single transaction in tablesync
workers.
Bump catalog version due to change of state in the catalog
(pg_subscription_rel).
Author: Peter Smith, Amit Kapila, and Takamichi Osumi
Reviewed-by: Ajin Cherian, Petr Jelinek, Hou Zhijie and Amit Kapila
Discussion: https://postgr.es/m/CAA4eK1KHJxaZS-fod-0fey=0tq3=Gkn4ho=8N4-5HWiCfu0H1A@mail.gmail.com
2021-02-12 03:11:51 +01:00
|
|
|
/* Commit the per-stream transaction */
|
|
|
|
CommitTransactionCommand();
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
in_streamed_transaction = false;
|
|
|
|
|
|
|
|
/* Reset per-stream context */
|
|
|
|
MemoryContextReset(LogicalStreamingContext);
|
|
|
|
|
|
|
|
pgstat_report_activity(STATE_IDLE, NULL);
|
2021-08-27 05:00:23 +02:00
|
|
|
reset_apply_error_context_info();
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle STREAM abort message.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_stream_abort(StringInfo s)
|
|
|
|
{
|
|
|
|
TransactionId xid;
|
|
|
|
TransactionId subxid;
|
|
|
|
|
2021-06-12 18:59:15 +02:00
|
|
|
if (in_streamed_transaction)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg_internal("STREAM ABORT message without STREAM STOP")));
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
logicalrep_read_stream_abort(s, &xid, &subxid);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the two XIDs are the same, it's in fact abort of toplevel xact, so
|
|
|
|
* just delete the files with serialized info.
|
|
|
|
*/
|
|
|
|
if (xid == subxid)
|
2021-08-27 05:00:23 +02:00
|
|
|
{
|
|
|
|
set_apply_error_context_xact(xid, 0);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
stream_cleanup_files(MyLogicalRepWorker->subid, xid);
|
2021-08-27 05:00:23 +02:00
|
|
|
}
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* OK, so it's a subxact. We need to read the subxact file for the
|
|
|
|
* toplevel transaction, determine the offset tracked for the subxact,
|
|
|
|
* and truncate the file with changes. We also remove the subxacts
|
|
|
|
* with higher offsets (or rather higher XIDs).
|
|
|
|
*
|
|
|
|
* We intentionally scan the array from the tail, because we're likely
|
|
|
|
* aborting a change for the most recent subtransactions.
|
|
|
|
*
|
|
|
|
* We can't use the binary search here as subxact XIDs won't
|
|
|
|
* necessarily arrive in sorted order, consider the case where we have
|
|
|
|
* released the savepoint for multiple subtransactions and then
|
|
|
|
* performed rollback to savepoint for one of the earlier
|
|
|
|
* sub-transaction.
|
|
|
|
*/
|
|
|
|
int64 i;
|
|
|
|
int64 subidx;
|
|
|
|
BufFile *fd;
|
|
|
|
bool found = false;
|
|
|
|
char path[MAXPGPATH];
|
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
set_apply_error_context_xact(subxid, 0);
|
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
subidx = -1;
|
2021-06-10 18:27:27 +02:00
|
|
|
begin_replication_step();
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
subxact_info_read(MyLogicalRepWorker->subid, xid);
|
|
|
|
|
|
|
|
for (i = subxact_data.nsubxacts; i > 0; i--)
|
|
|
|
{
|
|
|
|
if (subxact_data.subxacts[i - 1].xid == subxid)
|
|
|
|
{
|
|
|
|
subidx = (i - 1);
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If it's an empty sub-transaction then we will not find the subxid
|
|
|
|
* here so just cleanup the subxact info and return.
|
|
|
|
*/
|
|
|
|
if (!found)
|
|
|
|
{
|
|
|
|
/* Cleanup the subxact info */
|
|
|
|
cleanup_subxact_info();
|
2021-06-10 18:27:27 +02:00
|
|
|
end_replication_step();
|
Allow multiple xacts during table sync in logical replication.
For the initial table data synchronization in logical replication, we use
a single transaction to copy the entire table and then synchronize the
position in the stream with the main apply worker.
There are multiple downsides of this approach: (a) We have to perform the
entire copy operation again if there is any error (network breakdown,
error in the database operation, etc.) while we synchronize the WAL
position between tablesync worker and apply worker; this will be onerous
especially for large copies, (b) Using a single transaction in the
synchronization-phase (where we can receive WAL from multiple
transactions) will have the risk of exceeding the CID limit, (c) The slot
will hold the WAL till the entire sync is complete because we never commit
till the end.
This patch solves all the above downsides by allowing multiple
transactions during the tablesync phase. The initial copy is done in a
single transaction and after that, we commit each transaction as we
receive. To allow recovery after any error or crash, we use a permanent
slot and origin to track the progress. The slot and origin will be removed
once we finish the synchronization of the table. We also remove slot and
origin of tablesync workers if the user performs DROP SUBSCRIPTION .. or
ALTER SUBSCRIPTION .. REFERESH and some of the table syncs are still not
finished.
The commands ALTER SUBSCRIPTION ... REFRESH PUBLICATION and
ALTER SUBSCRIPTION ... SET PUBLICATION ... with refresh option as true
cannot be executed inside a transaction block because they can now drop
the slots for which we have no provision to rollback.
This will also open up the path for logical replication of 2PC
transactions on the subscriber side. Previously, we can't do that because
of the requirement of maintaining a single transaction in tablesync
workers.
Bump catalog version due to change of state in the catalog
(pg_subscription_rel).
Author: Peter Smith, Amit Kapila, and Takamichi Osumi
Reviewed-by: Ajin Cherian, Petr Jelinek, Hou Zhijie and Amit Kapila
Discussion: https://postgr.es/m/CAA4eK1KHJxaZS-fod-0fey=0tq3=Gkn4ho=8N4-5HWiCfu0H1A@mail.gmail.com
2021-02-12 03:11:51 +01:00
|
|
|
CommitTransactionCommand();
|
2021-08-27 05:00:23 +02:00
|
|
|
reset_apply_error_context_info();
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* open the changes file */
|
|
|
|
changes_filename(path, MyLogicalRepWorker->subid, xid);
|
2021-09-02 04:43:46 +02:00
|
|
|
fd = BufFileOpenFileSet(MyLogicalRepWorker->stream_fileset, path,
|
|
|
|
O_RDWR, false);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
/* OK, truncate the file at the right offset */
|
2021-08-30 05:15:35 +02:00
|
|
|
BufFileTruncateFileSet(fd, subxact_data.subxacts[subidx].fileno,
|
|
|
|
subxact_data.subxacts[subidx].offset);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
BufFileClose(fd);
|
|
|
|
|
|
|
|
/* discard the subxacts added later */
|
|
|
|
subxact_data.nsubxacts = subidx;
|
|
|
|
|
|
|
|
/* write the updated subxact list */
|
|
|
|
subxact_info_write(MyLogicalRepWorker->subid, xid);
|
2020-11-27 03:13:34 +01:00
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
end_replication_step();
|
Allow multiple xacts during table sync in logical replication.
For the initial table data synchronization in logical replication, we use
a single transaction to copy the entire table and then synchronize the
position in the stream with the main apply worker.
There are multiple downsides of this approach: (a) We have to perform the
entire copy operation again if there is any error (network breakdown,
error in the database operation, etc.) while we synchronize the WAL
position between tablesync worker and apply worker; this will be onerous
especially for large copies, (b) Using a single transaction in the
synchronization-phase (where we can receive WAL from multiple
transactions) will have the risk of exceeding the CID limit, (c) The slot
will hold the WAL till the entire sync is complete because we never commit
till the end.
This patch solves all the above downsides by allowing multiple
transactions during the tablesync phase. The initial copy is done in a
single transaction and after that, we commit each transaction as we
receive. To allow recovery after any error or crash, we use a permanent
slot and origin to track the progress. The slot and origin will be removed
once we finish the synchronization of the table. We also remove slot and
origin of tablesync workers if the user performs DROP SUBSCRIPTION .. or
ALTER SUBSCRIPTION .. REFERESH and some of the table syncs are still not
finished.
The commands ALTER SUBSCRIPTION ... REFRESH PUBLICATION and
ALTER SUBSCRIPTION ... SET PUBLICATION ... with refresh option as true
cannot be executed inside a transaction block because they can now drop
the slots for which we have no provision to rollback.
This will also open up the path for logical replication of 2PC
transactions on the subscriber side. Previously, we can't do that because
of the requirement of maintaining a single transaction in tablesync
workers.
Bump catalog version due to change of state in the catalog
(pg_subscription_rel).
Author: Peter Smith, Amit Kapila, and Takamichi Osumi
Reviewed-by: Ajin Cherian, Petr Jelinek, Hou Zhijie and Amit Kapila
Discussion: https://postgr.es/m/CAA4eK1KHJxaZS-fod-0fey=0tq3=Gkn4ho=8N4-5HWiCfu0H1A@mail.gmail.com
2021-02-12 03:11:51 +01:00
|
|
|
CommitTransactionCommand();
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
}
|
2021-08-27 05:00:23 +02:00
|
|
|
|
|
|
|
reset_apply_error_context_info();
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2021-07-29 12:21:45 +02:00
|
|
|
* Common spoolfile processing.
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
*/
|
|
|
|
static void
|
2021-07-29 12:21:45 +02:00
|
|
|
apply_spooled_messages(TransactionId xid, XLogRecPtr lsn)
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
{
|
|
|
|
StringInfoData s2;
|
|
|
|
int nchanges;
|
|
|
|
char path[MAXPGPATH];
|
|
|
|
char *buffer = NULL;
|
|
|
|
MemoryContext oldcxt;
|
|
|
|
BufFile *fd;
|
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
/* Make sure we have an open transaction */
|
|
|
|
begin_replication_step();
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate file handle and memory required to process all the messages in
|
|
|
|
* TopTransactionContext to avoid them getting reset after each message is
|
|
|
|
* processed.
|
|
|
|
*/
|
|
|
|
oldcxt = MemoryContextSwitchTo(TopTransactionContext);
|
|
|
|
|
2021-08-04 04:17:06 +02:00
|
|
|
/* Open the spool file for the committed/prepared transaction */
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
changes_filename(path, MyLogicalRepWorker->subid, xid);
|
|
|
|
elog(DEBUG1, "replaying changes from file \"%s\"", path);
|
2021-06-12 18:59:15 +02:00
|
|
|
|
2021-09-02 04:43:46 +02:00
|
|
|
fd = BufFileOpenFileSet(MyLogicalRepWorker->stream_fileset, path, O_RDONLY,
|
|
|
|
false);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
buffer = palloc(BLCKSZ);
|
|
|
|
initStringInfo(&s2);
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcxt);
|
|
|
|
|
2021-07-29 12:21:45 +02:00
|
|
|
remote_final_lsn = lsn;
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Make sure the handle apply_dispatch methods are aware we're in a remote
|
|
|
|
* transaction.
|
|
|
|
*/
|
|
|
|
in_remote_transaction = true;
|
|
|
|
pgstat_report_activity(STATE_RUNNING, NULL);
|
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
end_replication_step();
|
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
/*
|
|
|
|
* Read the entries one by one and pass them through the same logic as in
|
|
|
|
* apply_dispatch.
|
|
|
|
*/
|
|
|
|
nchanges = 0;
|
|
|
|
while (true)
|
|
|
|
{
|
|
|
|
int nbytes;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
|
|
|
/* read length of the on-disk record */
|
|
|
|
nbytes = BufFileRead(fd, &len, sizeof(len));
|
|
|
|
|
|
|
|
/* have we reached end of the file? */
|
|
|
|
if (nbytes == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* do we have a correct length? */
|
|
|
|
if (nbytes != sizeof(len))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not read from streaming transaction's changes file \"%s\": %m",
|
|
|
|
path)));
|
|
|
|
|
2021-06-12 18:59:15 +02:00
|
|
|
if (len <= 0)
|
|
|
|
elog(ERROR, "incorrect length %d in streaming transaction's changes file \"%s\"",
|
|
|
|
len, path);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
/* make sure we have sufficiently large buffer */
|
|
|
|
buffer = repalloc(buffer, len);
|
|
|
|
|
|
|
|
/* and finally read the data into the buffer */
|
|
|
|
if (BufFileRead(fd, buffer, len) != len)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not read from streaming transaction's changes file \"%s\": %m",
|
|
|
|
path)));
|
|
|
|
|
|
|
|
/* copy the buffer to the stringinfo and call apply_dispatch */
|
|
|
|
resetStringInfo(&s2);
|
|
|
|
appendBinaryStringInfo(&s2, buffer, len);
|
|
|
|
|
|
|
|
/* Ensure we are reading the data into our memory context. */
|
|
|
|
oldcxt = MemoryContextSwitchTo(ApplyMessageContext);
|
|
|
|
|
|
|
|
apply_dispatch(&s2);
|
|
|
|
|
|
|
|
MemoryContextReset(ApplyMessageContext);
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcxt);
|
|
|
|
|
|
|
|
nchanges++;
|
|
|
|
|
|
|
|
if (nchanges % 1000 == 0)
|
2021-06-12 18:59:15 +02:00
|
|
|
elog(DEBUG1, "replayed %d changes from file \"%s\"",
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
nchanges, path);
|
|
|
|
}
|
|
|
|
|
|
|
|
BufFileClose(fd);
|
|
|
|
|
|
|
|
pfree(buffer);
|
|
|
|
pfree(s2.data);
|
|
|
|
|
|
|
|
elog(DEBUG1, "replayed %d (all) changes from file \"%s\"",
|
|
|
|
nchanges, path);
|
|
|
|
|
2021-07-29 12:21:45 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle STREAM COMMIT message.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_stream_commit(StringInfo s)
|
|
|
|
{
|
|
|
|
TransactionId xid;
|
|
|
|
LogicalRepCommitData commit_data;
|
|
|
|
|
|
|
|
if (in_streamed_transaction)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg_internal("STREAM COMMIT message without STREAM STOP")));
|
|
|
|
|
|
|
|
xid = logicalrep_read_stream_commit(s, &commit_data);
|
2021-08-27 05:00:23 +02:00
|
|
|
set_apply_error_context_xact(xid, commit_data.committime);
|
2021-07-29 12:21:45 +02:00
|
|
|
|
|
|
|
elog(DEBUG1, "received commit for streamed transaction %u", xid);
|
|
|
|
|
|
|
|
apply_spooled_messages(xid, commit_data.commit_lsn);
|
|
|
|
|
2021-07-30 04:47:38 +02:00
|
|
|
apply_handle_commit_internal(&commit_data);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
/* unlink the files with serialized changes and subxact info */
|
|
|
|
stream_cleanup_files(MyLogicalRepWorker->subid, xid);
|
|
|
|
|
2020-11-27 03:13:34 +01:00
|
|
|
/* Process any tables that are being synchronized in parallel. */
|
|
|
|
process_syncing_tables(commit_data.end_lsn);
|
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
pgstat_report_activity(STATE_IDLE, NULL);
|
2021-08-27 05:00:23 +02:00
|
|
|
|
|
|
|
reset_apply_error_context_info();
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
}
|
|
|
|
|
2020-11-27 03:13:34 +01:00
|
|
|
/*
|
|
|
|
* Helper function for apply_handle_commit and apply_handle_stream_commit.
|
|
|
|
*/
|
|
|
|
static void
|
2021-07-30 04:47:38 +02:00
|
|
|
apply_handle_commit_internal(LogicalRepCommitData *commit_data)
|
2020-11-27 03:13:34 +01:00
|
|
|
{
|
Allow multiple xacts during table sync in logical replication.
For the initial table data synchronization in logical replication, we use
a single transaction to copy the entire table and then synchronize the
position in the stream with the main apply worker.
There are multiple downsides of this approach: (a) We have to perform the
entire copy operation again if there is any error (network breakdown,
error in the database operation, etc.) while we synchronize the WAL
position between tablesync worker and apply worker; this will be onerous
especially for large copies, (b) Using a single transaction in the
synchronization-phase (where we can receive WAL from multiple
transactions) will have the risk of exceeding the CID limit, (c) The slot
will hold the WAL till the entire sync is complete because we never commit
till the end.
This patch solves all the above downsides by allowing multiple
transactions during the tablesync phase. The initial copy is done in a
single transaction and after that, we commit each transaction as we
receive. To allow recovery after any error or crash, we use a permanent
slot and origin to track the progress. The slot and origin will be removed
once we finish the synchronization of the table. We also remove slot and
origin of tablesync workers if the user performs DROP SUBSCRIPTION .. or
ALTER SUBSCRIPTION .. REFERESH and some of the table syncs are still not
finished.
The commands ALTER SUBSCRIPTION ... REFRESH PUBLICATION and
ALTER SUBSCRIPTION ... SET PUBLICATION ... with refresh option as true
cannot be executed inside a transaction block because they can now drop
the slots for which we have no provision to rollback.
This will also open up the path for logical replication of 2PC
transactions on the subscriber side. Previously, we can't do that because
of the requirement of maintaining a single transaction in tablesync
workers.
Bump catalog version due to change of state in the catalog
(pg_subscription_rel).
Author: Peter Smith, Amit Kapila, and Takamichi Osumi
Reviewed-by: Ajin Cherian, Petr Jelinek, Hou Zhijie and Amit Kapila
Discussion: https://postgr.es/m/CAA4eK1KHJxaZS-fod-0fey=0tq3=Gkn4ho=8N4-5HWiCfu0H1A@mail.gmail.com
2021-02-12 03:11:51 +01:00
|
|
|
if (IsTransactionState())
|
2020-11-27 03:13:34 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Update origin state so we can restart streaming from correct
|
|
|
|
* position in case of crash.
|
|
|
|
*/
|
|
|
|
replorigin_session_origin_lsn = commit_data->end_lsn;
|
|
|
|
replorigin_session_origin_timestamp = commit_data->committime;
|
|
|
|
|
|
|
|
CommitTransactionCommand();
|
|
|
|
pgstat_report_stat(false);
|
|
|
|
|
|
|
|
store_flush_position(commit_data->end_lsn);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Process any invalidation messages that might have accumulated. */
|
|
|
|
AcceptInvalidationMessages();
|
|
|
|
maybe_reread_subscription();
|
|
|
|
}
|
|
|
|
|
|
|
|
in_remote_transaction = false;
|
|
|
|
}
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Handle RELATION message.
|
|
|
|
*
|
|
|
|
* Note we don't do validation against local schema here. The validation
|
|
|
|
* against local schema is postponed until first change for given relation
|
|
|
|
* comes as we only care about it when applying changes for it anyway and we
|
|
|
|
* do less locking this way.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_relation(StringInfo s)
|
|
|
|
{
|
|
|
|
LogicalRepRelation *rel;
|
|
|
|
|
2020-11-26 04:51:14 +01:00
|
|
|
if (handle_streamed_transaction(LOGICAL_REP_MSG_RELATION, s))
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
return;
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
rel = logicalrep_read_rel(s);
|
|
|
|
logicalrep_relmap_update(rel);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle TYPE message.
|
|
|
|
*
|
2021-11-10 19:12:58 +01:00
|
|
|
* This implementation pays no attention to TYPE messages; we expect the user
|
|
|
|
* to have set things up so that the incoming data is acceptable to the input
|
|
|
|
* functions for the locally subscribed tables. Hence, we just read and
|
|
|
|
* discard the message.
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_type(StringInfo s)
|
|
|
|
{
|
|
|
|
LogicalRepTyp typ;
|
|
|
|
|
2020-11-26 04:51:14 +01:00
|
|
|
if (handle_streamed_transaction(LOGICAL_REP_MSG_TYPE, s))
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
return;
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
logicalrep_read_typ(s, &typ);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get replica identity index or if it is not defined a primary key.
|
|
|
|
*
|
|
|
|
* If neither is defined, returns InvalidOid
|
|
|
|
*/
|
|
|
|
static Oid
|
|
|
|
GetRelationIdentityOrPK(Relation rel)
|
|
|
|
{
|
|
|
|
Oid idxoid;
|
|
|
|
|
|
|
|
idxoid = RelationGetReplicaIndex(rel);
|
|
|
|
|
|
|
|
if (!OidIsValid(idxoid))
|
|
|
|
idxoid = RelationGetPrimaryKeyIndex(rel);
|
|
|
|
|
|
|
|
return idxoid;
|
|
|
|
}
|
|
|
|
|
Respect permissions within logical replication.
Prevent logical replication workers from performing insert, update,
delete, truncate, or copy commands on tables unless the subscription
owner has permission to do so.
Prevent subscription owners from circumventing row-level security by
forbidding replication into tables with row-level security policies
which the subscription owner is subject to, without regard to whether
the policy would ordinarily allow the INSERT, UPDATE, DELETE or
TRUNCATE which is being replicated. This seems sufficient for now, as
superusers, roles with bypassrls, and target table owners should still
be able to replicate despite RLS policies. We can revisit the
question of applying row-level security policies on a per-row basis if
this restriction proves too severe in practice.
Author: Mark Dilger
Reviewed-by: Jeff Davis, Andrew Dunstan, Ronan Dunklau
Discussion: https://postgr.es/m/9DFC88D3-1300-4DE8-ACBC-4CEF84399A53%40enterprisedb.com
2022-01-08 02:38:20 +01:00
|
|
|
/*
|
|
|
|
* Check that we (the subscription owner) have sufficient privileges on the
|
|
|
|
* target relation to perform the given operation.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
TargetPrivilegesCheck(Relation rel, AclMode mode)
|
|
|
|
{
|
|
|
|
Oid relid;
|
|
|
|
AclResult aclresult;
|
|
|
|
|
|
|
|
relid = RelationGetRelid(rel);
|
|
|
|
aclresult = pg_class_aclcheck(relid, GetUserId(), mode);
|
|
|
|
if (aclresult != ACLCHECK_OK)
|
|
|
|
aclcheck_error(aclresult,
|
|
|
|
get_relkind_objtype(rel->rd_rel->relkind),
|
|
|
|
get_rel_name(relid));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We lack the infrastructure to honor RLS policies. It might be possible
|
|
|
|
* to add such infrastructure here, but tablesync workers lack it, too, so
|
|
|
|
* we don't bother. RLS does not ordinarily apply to TRUNCATE commands,
|
|
|
|
* but it seems dangerous to replicate a TRUNCATE and then refuse to
|
|
|
|
* replicate subsequent INSERTs, so we forbid all commands the same.
|
|
|
|
*/
|
|
|
|
if (check_enable_rls(relid, InvalidOid, false) == RLS_ENABLED)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("\"%s\" cannot replicate into relation with row-level security enabled: \"%s\"",
|
|
|
|
GetUserNameFromId(GetUserId(), true),
|
|
|
|
RelationGetRelationName(rel))));
|
|
|
|
}
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Handle INSERT message.
|
|
|
|
*/
|
2020-03-24 14:00:58 +01:00
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
static void
|
|
|
|
apply_handle_insert(StringInfo s)
|
|
|
|
{
|
|
|
|
LogicalRepRelMapEntry *rel;
|
|
|
|
LogicalRepTupleData newtup;
|
|
|
|
LogicalRepRelId relid;
|
2021-05-23 03:24:48 +02:00
|
|
|
ApplyExecutionData *edata;
|
2017-01-19 18:00:00 +01:00
|
|
|
EState *estate;
|
|
|
|
TupleTableSlot *remoteslot;
|
|
|
|
MemoryContext oldctx;
|
|
|
|
|
2020-11-26 04:51:14 +01:00
|
|
|
if (handle_streamed_transaction(LOGICAL_REP_MSG_INSERT, s))
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
return;
|
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
begin_replication_step();
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
relid = logicalrep_read_insert(s, &newtup);
|
|
|
|
rel = logicalrep_rel_open(relid, RowExclusiveLock);
|
2017-03-23 13:36:36 +01:00
|
|
|
if (!should_apply_changes_for_rel(rel))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The relation can't become interesting in the middle of the
|
|
|
|
* transaction so it's safe to unlock it.
|
|
|
|
*/
|
|
|
|
logicalrep_rel_close(rel, RowExclusiveLock);
|
2021-06-10 18:27:27 +02:00
|
|
|
end_replication_step();
|
2017-03-23 13:36:36 +01:00
|
|
|
return;
|
|
|
|
}
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Set relation for error callback */
|
|
|
|
apply_error_callback_arg.rel = rel;
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/* Initialize the executor state. */
|
2021-05-23 03:24:48 +02:00
|
|
|
edata = create_edata_for_relation(rel);
|
|
|
|
estate = edata->estate;
|
2018-02-17 06:17:38 +01:00
|
|
|
remoteslot = ExecInitExtraTupleSlot(estate,
|
Introduce notion of different types of slots (without implementing them).
Upcoming work intends to allow pluggable ways to introduce new ways of
storing table data. Accessing those table access methods from the
executor requires TupleTableSlots to be carry tuples in the native
format of such storage methods; otherwise there'll be a significant
conversion overhead.
Different access methods will require different data to store tuples
efficiently (just like virtual, minimal, heap already require fields
in TupleTableSlot). To allow that without requiring additional pointer
indirections, we want to have different structs (embedding
TupleTableSlot) for different types of slots. Thus different types of
slots are needed, which requires adapting creators of slots.
The slot that most efficiently can represent a type of tuple in an
executor node will often depend on the type of slot a child node
uses. Therefore we need to track the type of slot is returned by
nodes, so parent slots can create slots based on that.
Relatedly, JIT compilation of tuple deforming needs to know which type
of slot a certain expression refers to, so it can create an
appropriate deforming function for the type of tuple in the slot.
But not all nodes will only return one type of slot, e.g. an append
node will potentially return different types of slots for each of its
subplans.
Therefore add function that allows to query the type of a node's
result slot, and whether it'll always be the same type (whether it's
fixed). This can be queried using ExecGetResultSlotOps().
The scan, result, inner, outer type of slots are automatically
inferred from ExecInitScanTupleSlot(), ExecInitResultSlot(),
left/right subtrees respectively. If that's not correct for a node,
that can be overwritten using new fields in PlanState.
This commit does not introduce the actually abstracted implementation
of different kind of TupleTableSlots, that will be left for a followup
commit. The different types of slots introduced will, for now, still
use the same backing implementation.
While this already partially invalidates the big comment in
tuptable.h, it seems to make more sense to update it later, when the
different TupleTableSlot implementations actually exist.
Author: Ashutosh Bapat and Andres Freund, with changes by Amit Khandekar
Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de
2018-11-16 07:00:30 +01:00
|
|
|
RelationGetDescr(rel->localrel),
|
2019-03-02 02:24:57 +01:00
|
|
|
&TTSOpsVirtual);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
/* Process and store remote tuple in the slot */
|
|
|
|
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
2020-07-18 18:44:51 +02:00
|
|
|
slot_store_data(remoteslot, rel, &newtup);
|
2017-01-19 18:00:00 +01:00
|
|
|
slot_fill_defaults(rel, estate, remoteslot);
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
|
2020-04-06 15:15:52 +02:00
|
|
|
/* For a partitioned table, insert the tuple into a partition. */
|
|
|
|
if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_tuple_routing(edata,
|
|
|
|
remoteslot, NULL, CMD_INSERT);
|
2020-04-06 15:15:52 +02:00
|
|
|
else
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_insert_internal(edata, edata->targetRelInfo,
|
2020-04-06 15:15:52 +02:00
|
|
|
remoteslot);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2021-05-23 03:24:48 +02:00
|
|
|
finish_edata(edata);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Reset relation for error callback */
|
|
|
|
apply_error_callback_arg.rel = NULL;
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
logicalrep_rel_close(rel, NoLock);
|
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
end_replication_step();
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
2021-05-23 03:24:48 +02:00
|
|
|
/*
|
|
|
|
* Workhorse for apply_handle_insert()
|
|
|
|
* relinfo is for the relation we're actually inserting into
|
|
|
|
* (could be a child partition of edata->targetRelInfo)
|
|
|
|
*/
|
2020-03-24 14:00:58 +01:00
|
|
|
static void
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_insert_internal(ApplyExecutionData *edata,
|
|
|
|
ResultRelInfo *relinfo,
|
|
|
|
TupleTableSlot *remoteslot)
|
2020-03-24 14:00:58 +01:00
|
|
|
{
|
2021-05-23 03:24:48 +02:00
|
|
|
EState *estate = edata->estate;
|
|
|
|
|
|
|
|
/* We must open indexes here. */
|
2020-03-24 14:00:58 +01:00
|
|
|
ExecOpenIndices(relinfo, false);
|
|
|
|
|
|
|
|
/* Do the insert. */
|
Respect permissions within logical replication.
Prevent logical replication workers from performing insert, update,
delete, truncate, or copy commands on tables unless the subscription
owner has permission to do so.
Prevent subscription owners from circumventing row-level security by
forbidding replication into tables with row-level security policies
which the subscription owner is subject to, without regard to whether
the policy would ordinarily allow the INSERT, UPDATE, DELETE or
TRUNCATE which is being replicated. This seems sufficient for now, as
superusers, roles with bypassrls, and target table owners should still
be able to replicate despite RLS policies. We can revisit the
question of applying row-level security policies on a per-row basis if
this restriction proves too severe in practice.
Author: Mark Dilger
Reviewed-by: Jeff Davis, Andrew Dunstan, Ronan Dunklau
Discussion: https://postgr.es/m/9DFC88D3-1300-4DE8-ACBC-4CEF84399A53%40enterprisedb.com
2022-01-08 02:38:20 +01:00
|
|
|
TargetPrivilegesCheck(relinfo->ri_RelationDesc, ACL_INSERT);
|
2020-10-14 10:41:40 +02:00
|
|
|
ExecSimpleRelationInsert(relinfo, estate, remoteslot);
|
2020-03-24 14:00:58 +01:00
|
|
|
|
|
|
|
/* Cleanup. */
|
|
|
|
ExecCloseIndices(relinfo);
|
|
|
|
}
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Check if the logical replication relation is updatable and throw
|
|
|
|
* appropriate error if it isn't.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
check_relation_updatable(LogicalRepRelMapEntry *rel)
|
|
|
|
{
|
|
|
|
/* Updatable, no error. */
|
|
|
|
if (rel->updatable)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We are in error mode so it's fine this is somewhat slow. It's better to
|
|
|
|
* give user correct error.
|
|
|
|
*/
|
|
|
|
if (OidIsValid(GetRelationIdentityOrPK(rel->localrel)))
|
|
|
|
{
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
2017-09-11 17:20:47 +02:00
|
|
|
errmsg("publisher did not send replica identity column "
|
2017-01-19 18:00:00 +01:00
|
|
|
"expected by the logical replication target relation \"%s.%s\"",
|
|
|
|
rel->remoterel.nspname, rel->remoterel.relname)));
|
|
|
|
}
|
|
|
|
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
|
|
errmsg("logical replication target relation \"%s.%s\" has "
|
2017-03-14 17:57:10 +01:00
|
|
|
"neither REPLICA IDENTITY index nor PRIMARY "
|
2017-01-19 18:00:00 +01:00
|
|
|
"KEY and published relation does not have "
|
|
|
|
"REPLICA IDENTITY FULL",
|
|
|
|
rel->remoterel.nspname, rel->remoterel.relname)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle UPDATE message.
|
|
|
|
*
|
|
|
|
* TODO: FDW support
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_update(StringInfo s)
|
|
|
|
{
|
|
|
|
LogicalRepRelMapEntry *rel;
|
|
|
|
LogicalRepRelId relid;
|
2021-05-23 03:24:48 +02:00
|
|
|
ApplyExecutionData *edata;
|
2017-01-19 18:00:00 +01:00
|
|
|
EState *estate;
|
|
|
|
LogicalRepTupleData oldtup;
|
|
|
|
LogicalRepTupleData newtup;
|
|
|
|
bool has_oldtup;
|
|
|
|
TupleTableSlot *remoteslot;
|
2020-01-06 08:21:14 +01:00
|
|
|
RangeTblEntry *target_rte;
|
2017-01-19 18:00:00 +01:00
|
|
|
MemoryContext oldctx;
|
|
|
|
|
2020-11-26 04:51:14 +01:00
|
|
|
if (handle_streamed_transaction(LOGICAL_REP_MSG_UPDATE, s))
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
return;
|
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
begin_replication_step();
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
relid = logicalrep_read_update(s, &has_oldtup, &oldtup,
|
|
|
|
&newtup);
|
|
|
|
rel = logicalrep_rel_open(relid, RowExclusiveLock);
|
2017-03-23 13:36:36 +01:00
|
|
|
if (!should_apply_changes_for_rel(rel))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The relation can't become interesting in the middle of the
|
|
|
|
* transaction so it's safe to unlock it.
|
|
|
|
*/
|
|
|
|
logicalrep_rel_close(rel, RowExclusiveLock);
|
2021-06-10 18:27:27 +02:00
|
|
|
end_replication_step();
|
2017-03-23 13:36:36 +01:00
|
|
|
return;
|
|
|
|
}
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Set relation for error callback */
|
|
|
|
apply_error_callback_arg.rel = rel;
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/* Check if we can do the update. */
|
|
|
|
check_relation_updatable(rel);
|
|
|
|
|
|
|
|
/* Initialize the executor state. */
|
2021-05-23 03:24:48 +02:00
|
|
|
edata = create_edata_for_relation(rel);
|
|
|
|
estate = edata->estate;
|
2018-02-17 06:17:38 +01:00
|
|
|
remoteslot = ExecInitExtraTupleSlot(estate,
|
Introduce notion of different types of slots (without implementing them).
Upcoming work intends to allow pluggable ways to introduce new ways of
storing table data. Accessing those table access methods from the
executor requires TupleTableSlots to be carry tuples in the native
format of such storage methods; otherwise there'll be a significant
conversion overhead.
Different access methods will require different data to store tuples
efficiently (just like virtual, minimal, heap already require fields
in TupleTableSlot). To allow that without requiring additional pointer
indirections, we want to have different structs (embedding
TupleTableSlot) for different types of slots. Thus different types of
slots are needed, which requires adapting creators of slots.
The slot that most efficiently can represent a type of tuple in an
executor node will often depend on the type of slot a child node
uses. Therefore we need to track the type of slot is returned by
nodes, so parent slots can create slots based on that.
Relatedly, JIT compilation of tuple deforming needs to know which type
of slot a certain expression refers to, so it can create an
appropriate deforming function for the type of tuple in the slot.
But not all nodes will only return one type of slot, e.g. an append
node will potentially return different types of slots for each of its
subplans.
Therefore add function that allows to query the type of a node's
result slot, and whether it'll always be the same type (whether it's
fixed). This can be queried using ExecGetResultSlotOps().
The scan, result, inner, outer type of slots are automatically
inferred from ExecInitScanTupleSlot(), ExecInitResultSlot(),
left/right subtrees respectively. If that's not correct for a node,
that can be overwritten using new fields in PlanState.
This commit does not introduce the actually abstracted implementation
of different kind of TupleTableSlots, that will be left for a followup
commit. The different types of slots introduced will, for now, still
use the same backing implementation.
While this already partially invalidates the big comment in
tuptable.h, it seems to make more sense to update it later, when the
different TupleTableSlot implementations actually exist.
Author: Ashutosh Bapat and Andres Freund, with changes by Amit Khandekar
Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de
2018-11-16 07:00:30 +01:00
|
|
|
RelationGetDescr(rel->localrel),
|
tableam: Add and use scan APIs.
Too allow table accesses to be not directly dependent on heap, several
new abstractions are needed. Specifically:
1) Heap scans need to be generalized into table scans. Do this by
introducing TableScanDesc, which will be the "base class" for
individual AMs. This contains the AM independent fields from
HeapScanDesc.
The previous heap_{beginscan,rescan,endscan} et al. have been
replaced with a table_ version.
There's no direct replacement for heap_getnext(), as that returned
a HeapTuple, which is undesirable for a other AMs. Instead there's
table_scan_getnextslot(). But note that heap_getnext() lives on,
it's still used widely to access catalog tables.
This is achieved by new scan_begin, scan_end, scan_rescan,
scan_getnextslot callbacks.
2) The portion of parallel scans that's shared between backends need
to be able to do so without the user doing per-AM work. To achieve
that new parallelscan_{estimate, initialize, reinitialize}
callbacks are introduced, which operate on a new
ParallelTableScanDesc, which again can be subclassed by AMs.
As it is likely that several AMs are going to be block oriented,
block oriented callbacks that can be shared between such AMs are
provided and used by heap. table_block_parallelscan_{estimate,
intiialize, reinitialize} as callbacks, and
table_block_parallelscan_{nextpage, init} for use in AMs. These
operate on a ParallelBlockTableScanDesc.
3) Index scans need to be able to access tables to return a tuple, and
there needs to be state across individual accesses to the heap to
store state like buffers. That's now handled by introducing a
sort-of-scan IndexFetchTable, which again is intended to be
subclassed by individual AMs (for heap IndexFetchHeap).
The relevant callbacks for an AM are index_fetch_{end, begin,
reset} to create the necessary state, and index_fetch_tuple to
retrieve an indexed tuple. Note that index_fetch_tuple
implementations need to be smarter than just blindly fetching the
tuples for AMs that have optimizations similar to heap's HOT - the
currently alive tuple in the update chain needs to be fetched if
appropriate.
Similar to table_scan_getnextslot(), it's undesirable to continue
to return HeapTuples. Thus index_fetch_heap (might want to rename
that later) now accepts a slot as an argument. Core code doesn't
have a lot of call sites performing index scans without going
through the systable_* API (in contrast to loads of heap_getnext
calls and working directly with HeapTuples).
Index scans now store the result of a search in
IndexScanDesc->xs_heaptid, rather than xs_ctup->t_self. As the
target is not generally a HeapTuple anymore that seems cleaner.
To be able to sensible adapt code to use the above, two further
callbacks have been introduced:
a) slot_callbacks returns a TupleTableSlotOps* suitable for creating
slots capable of holding a tuple of the AMs
type. table_slot_callbacks() and table_slot_create() are based
upon that, but have additional logic to deal with views, foreign
tables, etc.
While this change could have been done separately, nearly all the
call sites that needed to be adapted for the rest of this commit
also would have been needed to be adapted for
table_slot_callbacks(), making separation not worthwhile.
b) tuple_satisfies_snapshot checks whether the tuple in a slot is
currently visible according to a snapshot. That's required as a few
places now don't have a buffer + HeapTuple around, but a
slot (which in heap's case internally has that information).
Additionally a few infrastructure changes were needed:
I) SysScanDesc, as used by systable_{beginscan, getnext} et al. now
internally uses a slot to keep track of tuples. While
systable_getnext() still returns HeapTuples, and will so for the
foreseeable future, the index API (see 1) above) now only deals with
slots.
The remainder, and largest part, of this commit is then adjusting all
scans in postgres to use the new APIs.
Author: Andres Freund, Haribabu Kommi, Alvaro Herrera
Discussion:
https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
https://postgr.es/m/20160812231527.GA690404@alvherre.pgsql
2019-03-11 20:46:41 +01:00
|
|
|
&TTSOpsVirtual);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2020-01-06 08:21:14 +01:00
|
|
|
/*
|
2021-01-13 17:11:00 +01:00
|
|
|
* Populate updatedCols so that per-column triggers can fire, and so
|
|
|
|
* executor can correctly pass down indexUnchanged hint. This could
|
2020-01-06 08:21:14 +01:00
|
|
|
* include more columns than were actually changed on the publisher
|
|
|
|
* because the logical replication protocol doesn't contain that
|
|
|
|
* information. But it would for example exclude columns that only exist
|
|
|
|
* on the subscriber, since we are not touching those.
|
|
|
|
*/
|
|
|
|
target_rte = list_nth(estate->es_range_table, 0);
|
|
|
|
for (int i = 0; i < remoteslot->tts_tupleDescriptor->natts; i++)
|
|
|
|
{
|
2020-07-20 19:40:16 +02:00
|
|
|
Form_pg_attribute att = TupleDescAttr(remoteslot->tts_tupleDescriptor, i);
|
|
|
|
int remoteattnum = rel->attrmap->attnums[i];
|
|
|
|
|
|
|
|
if (!att->attisdropped && remoteattnum >= 0)
|
|
|
|
{
|
|
|
|
Assert(remoteattnum < newtup.ncols);
|
|
|
|
if (newtup.colstatus[remoteattnum] != LOGICALREP_COLUMN_UNCHANGED)
|
|
|
|
target_rte->updatedCols =
|
|
|
|
bms_add_member(target_rte->updatedCols,
|
|
|
|
i + 1 - FirstLowInvalidHeapAttributeNumber);
|
|
|
|
}
|
2020-01-06 08:21:14 +01:00
|
|
|
}
|
|
|
|
|
Calculate extraUpdatedCols in query rewriter, not parser.
It's unsafe to do this at parse time because addition of generated
columns to a table would not invalidate stored rules containing
UPDATEs on the table ... but there might now be dependent generated
columns that were not there when the rule was made. This also fixes
an oversight that rewriteTargetView failed to update extraUpdatedCols
when transforming an UPDATE on an updatable view. (Since the new
calculation is downstream of that, rewriteTargetView doesn't actually
need to do anything; but before, there was a demonstrable bug there.)
In v13 and HEAD, this leads to easily-visible bugs because (since
commit c6679e4fc) we won't recalculate generated columns that aren't
listed in extraUpdatedCols. In v12 this bitmap is mostly just used
for trigger-firing decisions, so you'd only notice a problem if a
trigger cared whether a generated column had been updated.
I'd complained about this back in May, but then forgot about it
until bug #16671 from Michael Paul Killian revived the issue.
Back-patch to v12 where this field was introduced. If existing
stored rules contain any extraUpdatedCols values, they'll be
ignored because the rewriter will overwrite them, so the bug will
be fixed even for existing rules. (But note that if someone were
to update to 13.1 or 12.5, store some rules with UPDATEs on tables
having generated columns, and then downgrade to a prior minor version,
they might observe issues similar to what this patch fixes. That
seems unlikely enough to not be worth going to a lot of effort to fix.)
Discussion: https://postgr.es/m/10206.1588964727@sss.pgh.pa.us
Discussion: https://postgr.es/m/16671-2fa55851859fb166@postgresql.org
2020-10-28 18:47:02 +01:00
|
|
|
/* Also populate extraUpdatedCols, in case we have generated columns */
|
|
|
|
fill_extraUpdatedCols(target_rte, rel->localrel);
|
2020-02-17 15:19:58 +01:00
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/* Build the search tuple. */
|
|
|
|
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
2020-07-18 18:44:51 +02:00
|
|
|
slot_store_data(remoteslot, rel,
|
|
|
|
has_oldtup ? &oldtup : &newtup);
|
2017-01-19 18:00:00 +01:00
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
|
2020-04-06 15:15:52 +02:00
|
|
|
/* For a partitioned table, apply update to correct partition. */
|
|
|
|
if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_tuple_routing(edata,
|
|
|
|
remoteslot, &newtup, CMD_UPDATE);
|
2020-04-06 15:15:52 +02:00
|
|
|
else
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_update_internal(edata, edata->targetRelInfo,
|
|
|
|
remoteslot, &newtup);
|
2020-03-24 14:00:58 +01:00
|
|
|
|
2021-05-23 03:24:48 +02:00
|
|
|
finish_edata(edata);
|
2020-03-24 14:00:58 +01:00
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Reset relation for error callback */
|
|
|
|
apply_error_callback_arg.rel = NULL;
|
|
|
|
|
2020-03-24 14:00:58 +01:00
|
|
|
logicalrep_rel_close(rel, NoLock);
|
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
end_replication_step();
|
2020-03-24 14:00:58 +01:00
|
|
|
}
|
|
|
|
|
2021-05-23 03:24:48 +02:00
|
|
|
/*
|
|
|
|
* Workhorse for apply_handle_update()
|
|
|
|
* relinfo is for the relation we're actually updating in
|
|
|
|
* (could be a child partition of edata->targetRelInfo)
|
|
|
|
*/
|
2020-03-24 14:00:58 +01:00
|
|
|
static void
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_update_internal(ApplyExecutionData *edata,
|
|
|
|
ResultRelInfo *relinfo,
|
|
|
|
TupleTableSlot *remoteslot,
|
|
|
|
LogicalRepTupleData *newtup)
|
2020-03-24 14:00:58 +01:00
|
|
|
{
|
2021-05-23 03:24:48 +02:00
|
|
|
EState *estate = edata->estate;
|
|
|
|
LogicalRepRelMapEntry *relmapentry = edata->targetRel;
|
2020-03-24 14:00:58 +01:00
|
|
|
Relation localrel = relinfo->ri_RelationDesc;
|
|
|
|
EPQState epqstate;
|
|
|
|
TupleTableSlot *localslot;
|
|
|
|
bool found;
|
|
|
|
MemoryContext oldctx;
|
|
|
|
|
|
|
|
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
|
|
|
|
ExecOpenIndices(relinfo, false);
|
|
|
|
|
2020-04-01 15:31:47 +02:00
|
|
|
found = FindReplTupleInLocalRel(estate, localrel,
|
|
|
|
&relmapentry->remoterel,
|
|
|
|
remoteslot, &localslot);
|
2017-01-19 18:00:00 +01:00
|
|
|
ExecClearTuple(remoteslot);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tuple found.
|
|
|
|
*
|
|
|
|
* Note this will fail if there are other conflicting unique indexes.
|
|
|
|
*/
|
|
|
|
if (found)
|
|
|
|
{
|
|
|
|
/* Process and store remote tuple in the slot */
|
|
|
|
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
2020-07-18 18:44:51 +02:00
|
|
|
slot_modify_data(remoteslot, localslot, relmapentry, newtup);
|
2017-01-19 18:00:00 +01:00
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
|
|
|
|
EvalPlanQualSetSlot(&epqstate, remoteslot);
|
|
|
|
|
|
|
|
/* Do the actual update. */
|
Respect permissions within logical replication.
Prevent logical replication workers from performing insert, update,
delete, truncate, or copy commands on tables unless the subscription
owner has permission to do so.
Prevent subscription owners from circumventing row-level security by
forbidding replication into tables with row-level security policies
which the subscription owner is subject to, without regard to whether
the policy would ordinarily allow the INSERT, UPDATE, DELETE or
TRUNCATE which is being replicated. This seems sufficient for now, as
superusers, roles with bypassrls, and target table owners should still
be able to replicate despite RLS policies. We can revisit the
question of applying row-level security policies on a per-row basis if
this restriction proves too severe in practice.
Author: Mark Dilger
Reviewed-by: Jeff Davis, Andrew Dunstan, Ronan Dunklau
Discussion: https://postgr.es/m/9DFC88D3-1300-4DE8-ACBC-4CEF84399A53%40enterprisedb.com
2022-01-08 02:38:20 +01:00
|
|
|
TargetPrivilegesCheck(relinfo->ri_RelationDesc, ACL_UPDATE);
|
2020-10-14 10:41:40 +02:00
|
|
|
ExecSimpleRelationUpdate(relinfo, estate, &epqstate, localslot,
|
|
|
|
remoteslot);
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2021-06-11 22:12:36 +02:00
|
|
|
* The tuple to be updated could not be found. Do nothing except for
|
|
|
|
* emitting a log message.
|
2017-01-19 18:00:00 +01:00
|
|
|
*
|
2021-06-11 22:12:36 +02:00
|
|
|
* XXX should this be promoted to ereport(LOG) perhaps?
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
|
|
|
elog(DEBUG1,
|
2021-06-11 22:12:36 +02:00
|
|
|
"logical replication did not find row to be updated "
|
2017-01-19 18:00:00 +01:00
|
|
|
"in replication target relation \"%s\"",
|
2020-03-24 14:00:58 +01:00
|
|
|
RelationGetRelationName(localrel));
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Cleanup. */
|
2020-03-24 14:00:58 +01:00
|
|
|
ExecCloseIndices(relinfo);
|
2017-01-19 18:00:00 +01:00
|
|
|
EvalPlanQualEnd(&epqstate);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle DELETE message.
|
|
|
|
*
|
|
|
|
* TODO: FDW support
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_delete(StringInfo s)
|
|
|
|
{
|
|
|
|
LogicalRepRelMapEntry *rel;
|
|
|
|
LogicalRepTupleData oldtup;
|
|
|
|
LogicalRepRelId relid;
|
2021-05-23 03:24:48 +02:00
|
|
|
ApplyExecutionData *edata;
|
2017-01-19 18:00:00 +01:00
|
|
|
EState *estate;
|
|
|
|
TupleTableSlot *remoteslot;
|
|
|
|
MemoryContext oldctx;
|
|
|
|
|
2020-11-26 04:51:14 +01:00
|
|
|
if (handle_streamed_transaction(LOGICAL_REP_MSG_DELETE, s))
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
return;
|
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
begin_replication_step();
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
relid = logicalrep_read_delete(s, &oldtup);
|
|
|
|
rel = logicalrep_rel_open(relid, RowExclusiveLock);
|
2017-03-23 13:36:36 +01:00
|
|
|
if (!should_apply_changes_for_rel(rel))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The relation can't become interesting in the middle of the
|
|
|
|
* transaction so it's safe to unlock it.
|
|
|
|
*/
|
|
|
|
logicalrep_rel_close(rel, RowExclusiveLock);
|
2021-06-10 18:27:27 +02:00
|
|
|
end_replication_step();
|
2017-03-23 13:36:36 +01:00
|
|
|
return;
|
|
|
|
}
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Set relation for error callback */
|
|
|
|
apply_error_callback_arg.rel = rel;
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/* Check if we can do the delete. */
|
|
|
|
check_relation_updatable(rel);
|
|
|
|
|
|
|
|
/* Initialize the executor state. */
|
2021-05-23 03:24:48 +02:00
|
|
|
edata = create_edata_for_relation(rel);
|
|
|
|
estate = edata->estate;
|
2018-02-17 06:17:38 +01:00
|
|
|
remoteslot = ExecInitExtraTupleSlot(estate,
|
Introduce notion of different types of slots (without implementing them).
Upcoming work intends to allow pluggable ways to introduce new ways of
storing table data. Accessing those table access methods from the
executor requires TupleTableSlots to be carry tuples in the native
format of such storage methods; otherwise there'll be a significant
conversion overhead.
Different access methods will require different data to store tuples
efficiently (just like virtual, minimal, heap already require fields
in TupleTableSlot). To allow that without requiring additional pointer
indirections, we want to have different structs (embedding
TupleTableSlot) for different types of slots. Thus different types of
slots are needed, which requires adapting creators of slots.
The slot that most efficiently can represent a type of tuple in an
executor node will often depend on the type of slot a child node
uses. Therefore we need to track the type of slot is returned by
nodes, so parent slots can create slots based on that.
Relatedly, JIT compilation of tuple deforming needs to know which type
of slot a certain expression refers to, so it can create an
appropriate deforming function for the type of tuple in the slot.
But not all nodes will only return one type of slot, e.g. an append
node will potentially return different types of slots for each of its
subplans.
Therefore add function that allows to query the type of a node's
result slot, and whether it'll always be the same type (whether it's
fixed). This can be queried using ExecGetResultSlotOps().
The scan, result, inner, outer type of slots are automatically
inferred from ExecInitScanTupleSlot(), ExecInitResultSlot(),
left/right subtrees respectively. If that's not correct for a node,
that can be overwritten using new fields in PlanState.
This commit does not introduce the actually abstracted implementation
of different kind of TupleTableSlots, that will be left for a followup
commit. The different types of slots introduced will, for now, still
use the same backing implementation.
While this already partially invalidates the big comment in
tuptable.h, it seems to make more sense to update it later, when the
different TupleTableSlot implementations actually exist.
Author: Ashutosh Bapat and Andres Freund, with changes by Amit Khandekar
Discussion: https://postgr.es/m/20181105210039.hh4vvi4vwoq5ba2q@alap3.anarazel.de
2018-11-16 07:00:30 +01:00
|
|
|
RelationGetDescr(rel->localrel),
|
|
|
|
&TTSOpsVirtual);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2020-03-24 14:00:58 +01:00
|
|
|
/* Build the search tuple. */
|
2017-01-19 18:00:00 +01:00
|
|
|
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
2020-07-18 18:44:51 +02:00
|
|
|
slot_store_data(remoteslot, rel, &oldtup);
|
2017-01-19 18:00:00 +01:00
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
|
2020-04-06 15:15:52 +02:00
|
|
|
/* For a partitioned table, apply delete to correct partition. */
|
|
|
|
if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_tuple_routing(edata,
|
|
|
|
remoteslot, NULL, CMD_DELETE);
|
2020-04-06 15:15:52 +02:00
|
|
|
else
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_delete_internal(edata, edata->targetRelInfo,
|
|
|
|
remoteslot);
|
2020-03-24 14:00:58 +01:00
|
|
|
|
2021-05-23 03:24:48 +02:00
|
|
|
finish_edata(edata);
|
2020-03-24 14:00:58 +01:00
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Reset relation for error callback */
|
|
|
|
apply_error_callback_arg.rel = NULL;
|
|
|
|
|
2020-03-24 14:00:58 +01:00
|
|
|
logicalrep_rel_close(rel, NoLock);
|
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
end_replication_step();
|
2020-03-24 14:00:58 +01:00
|
|
|
}
|
|
|
|
|
2021-05-23 03:24:48 +02:00
|
|
|
/*
|
|
|
|
* Workhorse for apply_handle_delete()
|
|
|
|
* relinfo is for the relation we're actually deleting from
|
|
|
|
* (could be a child partition of edata->targetRelInfo)
|
|
|
|
*/
|
2020-03-24 14:00:58 +01:00
|
|
|
static void
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_delete_internal(ApplyExecutionData *edata,
|
|
|
|
ResultRelInfo *relinfo,
|
|
|
|
TupleTableSlot *remoteslot)
|
2020-03-24 14:00:58 +01:00
|
|
|
{
|
2021-05-23 03:24:48 +02:00
|
|
|
EState *estate = edata->estate;
|
2020-03-24 14:00:58 +01:00
|
|
|
Relation localrel = relinfo->ri_RelationDesc;
|
2021-05-23 03:24:48 +02:00
|
|
|
LogicalRepRelation *remoterel = &edata->targetRel->remoterel;
|
2020-03-24 14:00:58 +01:00
|
|
|
EPQState epqstate;
|
|
|
|
TupleTableSlot *localslot;
|
|
|
|
bool found;
|
|
|
|
|
|
|
|
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
|
|
|
|
ExecOpenIndices(relinfo, false);
|
|
|
|
|
2020-04-01 15:31:47 +02:00
|
|
|
found = FindReplTupleInLocalRel(estate, localrel, remoterel,
|
|
|
|
remoteslot, &localslot);
|
2020-03-24 14:00:58 +01:00
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/* If found delete it. */
|
|
|
|
if (found)
|
|
|
|
{
|
|
|
|
EvalPlanQualSetSlot(&epqstate, localslot);
|
|
|
|
|
|
|
|
/* Do the actual delete. */
|
Respect permissions within logical replication.
Prevent logical replication workers from performing insert, update,
delete, truncate, or copy commands on tables unless the subscription
owner has permission to do so.
Prevent subscription owners from circumventing row-level security by
forbidding replication into tables with row-level security policies
which the subscription owner is subject to, without regard to whether
the policy would ordinarily allow the INSERT, UPDATE, DELETE or
TRUNCATE which is being replicated. This seems sufficient for now, as
superusers, roles with bypassrls, and target table owners should still
be able to replicate despite RLS policies. We can revisit the
question of applying row-level security policies on a per-row basis if
this restriction proves too severe in practice.
Author: Mark Dilger
Reviewed-by: Jeff Davis, Andrew Dunstan, Ronan Dunklau
Discussion: https://postgr.es/m/9DFC88D3-1300-4DE8-ACBC-4CEF84399A53%40enterprisedb.com
2022-01-08 02:38:20 +01:00
|
|
|
TargetPrivilegesCheck(relinfo->ri_RelationDesc, ACL_DELETE);
|
2020-10-14 10:41:40 +02:00
|
|
|
ExecSimpleRelationDelete(relinfo, estate, &epqstate, localslot);
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-06-11 22:12:36 +02:00
|
|
|
/*
|
|
|
|
* The tuple to be deleted could not be found. Do nothing except for
|
|
|
|
* emitting a log message.
|
|
|
|
*
|
|
|
|
* XXX should this be promoted to ereport(LOG) perhaps?
|
|
|
|
*/
|
2018-06-12 17:33:39 +02:00
|
|
|
elog(DEBUG1,
|
2021-06-11 22:12:36 +02:00
|
|
|
"logical replication did not find row to be deleted "
|
2018-06-12 17:33:39 +02:00
|
|
|
"in replication target relation \"%s\"",
|
2020-03-24 14:00:58 +01:00
|
|
|
RelationGetRelationName(localrel));
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Cleanup. */
|
2020-03-24 14:00:58 +01:00
|
|
|
ExecCloseIndices(relinfo);
|
2017-01-19 18:00:00 +01:00
|
|
|
EvalPlanQualEnd(&epqstate);
|
|
|
|
}
|
|
|
|
|
2020-04-01 15:31:47 +02:00
|
|
|
/*
|
|
|
|
* Try to find a tuple received from the publication side (in 'remoteslot') in
|
|
|
|
* the corresponding local relation using either replica identity index,
|
|
|
|
* primary key or if needed, sequential scan.
|
|
|
|
*
|
|
|
|
* Local tuple, if found, is returned in '*localslot'.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
FindReplTupleInLocalRel(EState *estate, Relation localrel,
|
|
|
|
LogicalRepRelation *remoterel,
|
|
|
|
TupleTableSlot *remoteslot,
|
|
|
|
TupleTableSlot **localslot)
|
|
|
|
{
|
|
|
|
Oid idxoid;
|
|
|
|
bool found;
|
|
|
|
|
2022-01-08 22:40:23 +01:00
|
|
|
/*
|
|
|
|
* Regardless of the top-level operation, we're performing a read here, so
|
|
|
|
* check for SELECT privileges.
|
|
|
|
*/
|
|
|
|
TargetPrivilegesCheck(localrel, ACL_SELECT);
|
|
|
|
|
2020-04-01 15:31:47 +02:00
|
|
|
*localslot = table_slot_create(localrel, &estate->es_tupleTable);
|
|
|
|
|
|
|
|
idxoid = GetRelationIdentityOrPK(localrel);
|
|
|
|
Assert(OidIsValid(idxoid) ||
|
|
|
|
(remoterel->replident == REPLICA_IDENTITY_FULL));
|
|
|
|
|
|
|
|
if (OidIsValid(idxoid))
|
|
|
|
found = RelationFindReplTupleByIndex(localrel, idxoid,
|
|
|
|
LockTupleExclusive,
|
|
|
|
remoteslot, *localslot);
|
|
|
|
else
|
|
|
|
found = RelationFindReplTupleSeq(localrel, LockTupleExclusive,
|
|
|
|
remoteslot, *localslot);
|
|
|
|
|
|
|
|
return found;
|
|
|
|
}
|
|
|
|
|
2020-04-06 15:15:52 +02:00
|
|
|
/*
|
|
|
|
* This handles insert, update, delete on a partitioned table.
|
|
|
|
*/
|
|
|
|
static void
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_tuple_routing(ApplyExecutionData *edata,
|
2020-04-06 15:15:52 +02:00
|
|
|
TupleTableSlot *remoteslot,
|
|
|
|
LogicalRepTupleData *newtup,
|
|
|
|
CmdType operation)
|
|
|
|
{
|
2021-05-23 03:24:48 +02:00
|
|
|
EState *estate = edata->estate;
|
|
|
|
LogicalRepRelMapEntry *relmapentry = edata->targetRel;
|
|
|
|
ResultRelInfo *relinfo = edata->targetRelInfo;
|
2020-04-06 15:15:52 +02:00
|
|
|
Relation parentrel = relinfo->ri_RelationDesc;
|
2021-05-23 03:24:48 +02:00
|
|
|
ModifyTableState *mtstate;
|
|
|
|
PartitionTupleRouting *proute;
|
2020-04-06 15:15:52 +02:00
|
|
|
ResultRelInfo *partrelinfo;
|
|
|
|
Relation partrel;
|
|
|
|
TupleTableSlot *remoteslot_part;
|
|
|
|
TupleConversionMap *map;
|
|
|
|
MemoryContext oldctx;
|
|
|
|
|
|
|
|
/* ModifyTableState is needed for ExecFindPartition(). */
|
2021-05-23 03:24:48 +02:00
|
|
|
edata->mtstate = mtstate = makeNode(ModifyTableState);
|
2020-04-06 15:15:52 +02:00
|
|
|
mtstate->ps.plan = NULL;
|
|
|
|
mtstate->ps.state = estate;
|
|
|
|
mtstate->operation = operation;
|
|
|
|
mtstate->resultRelInfo = relinfo;
|
2021-05-23 03:24:48 +02:00
|
|
|
|
|
|
|
/* ... as is PartitionTupleRouting. */
|
|
|
|
edata->proute = proute = ExecSetupPartitionTupleRouting(estate, parentrel);
|
2020-04-06 15:15:52 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the partition to which the "search tuple" belongs.
|
|
|
|
*/
|
|
|
|
Assert(remoteslot != NULL);
|
|
|
|
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
|
|
|
partrelinfo = ExecFindPartition(mtstate, relinfo, proute,
|
|
|
|
remoteslot, estate);
|
|
|
|
Assert(partrelinfo != NULL);
|
|
|
|
partrel = partrelinfo->ri_RelationDesc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* To perform any of the operations below, the tuple must match the
|
|
|
|
* partition's rowtype. Convert if needed or just copy, using a dedicated
|
|
|
|
* slot to store the tuple in any case.
|
|
|
|
*/
|
2020-10-19 13:11:57 +02:00
|
|
|
remoteslot_part = partrelinfo->ri_PartitionTupleSlot;
|
2020-04-06 15:15:52 +02:00
|
|
|
if (remoteslot_part == NULL)
|
|
|
|
remoteslot_part = table_slot_create(partrel, &estate->es_tupleTable);
|
2020-10-19 13:11:57 +02:00
|
|
|
map = partrelinfo->ri_RootToPartitionMap;
|
2020-04-06 15:15:52 +02:00
|
|
|
if (map != NULL)
|
|
|
|
remoteslot_part = execute_attr_map_slot(map->attrMap, remoteslot,
|
|
|
|
remoteslot_part);
|
|
|
|
else
|
|
|
|
{
|
|
|
|
remoteslot_part = ExecCopySlot(remoteslot_part, remoteslot);
|
|
|
|
slot_getallattrs(remoteslot_part);
|
|
|
|
}
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
|
|
|
|
switch (operation)
|
|
|
|
{
|
|
|
|
case CMD_INSERT:
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_insert_internal(edata, partrelinfo,
|
2020-04-06 15:15:52 +02:00
|
|
|
remoteslot_part);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CMD_DELETE:
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_delete_internal(edata, partrelinfo,
|
|
|
|
remoteslot_part);
|
2020-04-06 15:15:52 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
case CMD_UPDATE:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For UPDATE, depending on whether or not the updated tuple
|
|
|
|
* satisfies the partition's constraint, perform a simple UPDATE
|
|
|
|
* of the partition or move the updated tuple into a different
|
|
|
|
* suitable partition.
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
AttrMap *attrmap = map ? map->attrMap : NULL;
|
|
|
|
LogicalRepRelMapEntry *part_entry;
|
|
|
|
TupleTableSlot *localslot;
|
|
|
|
ResultRelInfo *partrelinfo_new;
|
|
|
|
bool found;
|
|
|
|
|
|
|
|
part_entry = logicalrep_partition_open(relmapentry, partrel,
|
|
|
|
attrmap);
|
|
|
|
|
|
|
|
/* Get the matching local tuple from the partition. */
|
|
|
|
found = FindReplTupleInLocalRel(estate, partrel,
|
|
|
|
&part_entry->remoterel,
|
|
|
|
remoteslot_part, &localslot);
|
2021-06-11 22:12:36 +02:00
|
|
|
if (!found)
|
2020-04-06 15:15:52 +02:00
|
|
|
{
|
|
|
|
/*
|
2021-06-11 22:12:36 +02:00
|
|
|
* The tuple to be updated could not be found. Do nothing
|
|
|
|
* except for emitting a log message.
|
2020-04-06 15:15:52 +02:00
|
|
|
*
|
2021-06-11 22:12:36 +02:00
|
|
|
* XXX should this be promoted to ereport(LOG) perhaps?
|
2020-04-06 15:15:52 +02:00
|
|
|
*/
|
|
|
|
elog(DEBUG1,
|
2021-06-11 22:12:36 +02:00
|
|
|
"logical replication did not find row to be updated "
|
|
|
|
"in replication target relation's partition \"%s\"",
|
2020-04-06 15:15:52 +02:00
|
|
|
RelationGetRelationName(partrel));
|
2021-06-11 22:12:36 +02:00
|
|
|
return;
|
2020-04-06 15:15:52 +02:00
|
|
|
}
|
|
|
|
|
2021-06-11 22:12:36 +02:00
|
|
|
/*
|
|
|
|
* Apply the update to the local tuple, putting the result in
|
|
|
|
* remoteslot_part.
|
|
|
|
*/
|
|
|
|
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
|
|
|
slot_modify_data(remoteslot_part, localslot, part_entry,
|
|
|
|
newtup);
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
|
2020-04-06 15:15:52 +02:00
|
|
|
/*
|
|
|
|
* Does the updated tuple still satisfy the current
|
|
|
|
* partition's constraint?
|
|
|
|
*/
|
Don't fetch partition check expression during InitResultRelInfo.
Since there is only one place that actually needs the partition check
expression, namely ExecPartitionCheck, it's better to fetch it from
the relcache there. In this way we will never fetch it at all if
the query never has use for it, and we still fetch it just once when
we do need it.
The reason for taking an interest in this is that if the relcache
doesn't already have the check expression cached, fetching it
requires obtaining AccessShareLock on the partition root. That
means that operations that look like they should only touch the
partition itself will also take a lock on the root. In particular
we observed that TRUNCATE on a partition may take a lock on the
partition's root, contributing to a deadlock situation in parallel
pg_restore.
As written, this patch does have a small cost, which is that we
are microscopically reducing efficiency for the case where a partition
has an empty check expression. ExecPartitionCheck will be called,
and will go through the motions of setting up and checking an empty
qual, where before it would not have been called at all. We could
avoid that by adding a separate boolean flag to track whether there
is a partition expression to test. However, this case only arises
for a default partition with no siblings, which surely is not an
interesting case in practice. Hence adding complexity for it
does not seem like a good trade-off.
Amit Langote, per a suggestion by me
Discussion: https://postgr.es/m/VI1PR03MB31670CA1BD9625C3A8C5DD05EB230@VI1PR03MB3167.eurprd03.prod.outlook.com
2020-09-16 20:28:11 +02:00
|
|
|
if (!partrel->rd_rel->relispartition ||
|
2020-04-06 15:15:52 +02:00
|
|
|
ExecPartitionCheck(partrelinfo, remoteslot_part, estate,
|
|
|
|
false))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Yes, so simply UPDATE the partition. We don't call
|
|
|
|
* apply_handle_update_internal() here, which would
|
|
|
|
* normally do the following work, to avoid repeating some
|
|
|
|
* work already done above to find the local tuple in the
|
|
|
|
* partition.
|
|
|
|
*/
|
|
|
|
EPQState epqstate;
|
|
|
|
|
|
|
|
EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
|
|
|
|
ExecOpenIndices(partrelinfo, false);
|
|
|
|
|
|
|
|
EvalPlanQualSetSlot(&epqstate, remoteslot_part);
|
Respect permissions within logical replication.
Prevent logical replication workers from performing insert, update,
delete, truncate, or copy commands on tables unless the subscription
owner has permission to do so.
Prevent subscription owners from circumventing row-level security by
forbidding replication into tables with row-level security policies
which the subscription owner is subject to, without regard to whether
the policy would ordinarily allow the INSERT, UPDATE, DELETE or
TRUNCATE which is being replicated. This seems sufficient for now, as
superusers, roles with bypassrls, and target table owners should still
be able to replicate despite RLS policies. We can revisit the
question of applying row-level security policies on a per-row basis if
this restriction proves too severe in practice.
Author: Mark Dilger
Reviewed-by: Jeff Davis, Andrew Dunstan, Ronan Dunklau
Discussion: https://postgr.es/m/9DFC88D3-1300-4DE8-ACBC-4CEF84399A53%40enterprisedb.com
2022-01-08 02:38:20 +01:00
|
|
|
TargetPrivilegesCheck(partrelinfo->ri_RelationDesc,
|
|
|
|
ACL_UPDATE);
|
2020-10-14 10:41:40 +02:00
|
|
|
ExecSimpleRelationUpdate(partrelinfo, estate, &epqstate,
|
|
|
|
localslot, remoteslot_part);
|
2020-04-06 15:15:52 +02:00
|
|
|
ExecCloseIndices(partrelinfo);
|
|
|
|
EvalPlanQualEnd(&epqstate);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Move the tuple into the new partition. */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* New partition will be found using tuple routing, which
|
|
|
|
* can only occur via the parent table. We might need to
|
|
|
|
* convert the tuple to the parent's rowtype. Note that
|
|
|
|
* this is the tuple found in the partition, not the
|
|
|
|
* original search tuple received by this function.
|
|
|
|
*/
|
|
|
|
if (map)
|
|
|
|
{
|
|
|
|
TupleConversionMap *PartitionToRootMap =
|
|
|
|
convert_tuples_by_name(RelationGetDescr(partrel),
|
|
|
|
RelationGetDescr(parentrel));
|
|
|
|
|
|
|
|
remoteslot =
|
|
|
|
execute_attr_map_slot(PartitionToRootMap->attrMap,
|
|
|
|
remoteslot_part, remoteslot);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
remoteslot = ExecCopySlot(remoteslot, remoteslot_part);
|
|
|
|
slot_getallattrs(remoteslot);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Find the new partition. */
|
|
|
|
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
|
|
|
partrelinfo_new = ExecFindPartition(mtstate, relinfo,
|
|
|
|
proute, remoteslot,
|
|
|
|
estate);
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
Assert(partrelinfo_new != partrelinfo);
|
|
|
|
|
|
|
|
/* DELETE old tuple found in the old partition. */
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_delete_internal(edata, partrelinfo,
|
|
|
|
localslot);
|
2020-04-06 15:15:52 +02:00
|
|
|
|
|
|
|
/* INSERT new tuple into the new partition. */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert the replacement tuple to match the destination
|
|
|
|
* partition rowtype.
|
|
|
|
*/
|
|
|
|
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
|
|
|
|
partrel = partrelinfo_new->ri_RelationDesc;
|
2020-10-19 13:11:57 +02:00
|
|
|
remoteslot_part = partrelinfo_new->ri_PartitionTupleSlot;
|
2020-04-06 15:15:52 +02:00
|
|
|
if (remoteslot_part == NULL)
|
|
|
|
remoteslot_part = table_slot_create(partrel,
|
|
|
|
&estate->es_tupleTable);
|
2020-10-19 13:11:57 +02:00
|
|
|
map = partrelinfo_new->ri_RootToPartitionMap;
|
2020-04-06 15:15:52 +02:00
|
|
|
if (map != NULL)
|
|
|
|
{
|
|
|
|
remoteslot_part = execute_attr_map_slot(map->attrMap,
|
|
|
|
remoteslot,
|
|
|
|
remoteslot_part);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
remoteslot_part = ExecCopySlot(remoteslot_part,
|
|
|
|
remoteslot);
|
|
|
|
slot_getallattrs(remoteslot);
|
|
|
|
}
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
2021-05-23 03:24:48 +02:00
|
|
|
apply_handle_insert_internal(edata, partrelinfo_new,
|
2020-04-06 15:15:52 +02:00
|
|
|
remoteslot_part);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
elog(ERROR, "unrecognized CmdType: %d", (int) operation);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-07 17:24:53 +02:00
|
|
|
/*
|
|
|
|
* Handle TRUNCATE message.
|
|
|
|
*
|
|
|
|
* TODO: FDW support
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_handle_truncate(StringInfo s)
|
|
|
|
{
|
|
|
|
bool cascade = false;
|
|
|
|
bool restart_seqs = false;
|
|
|
|
List *remote_relids = NIL;
|
|
|
|
List *remote_rels = NIL;
|
|
|
|
List *rels = NIL;
|
2020-04-06 15:15:52 +02:00
|
|
|
List *part_rels = NIL;
|
2018-04-07 17:24:53 +02:00
|
|
|
List *relids = NIL;
|
|
|
|
List *relids_logged = NIL;
|
|
|
|
ListCell *lc;
|
Restore the portal-level snapshot after procedure COMMIT/ROLLBACK.
COMMIT/ROLLBACK necessarily destroys all snapshots within the session.
The original implementation of intra-procedure transactions just
cavalierly did that, ignoring the fact that this left us executing in
a rather different environment than normal. In particular, it turns
out that handling of toasted datums depends rather critically on there
being an outer ActiveSnapshot: otherwise, when SPI or the core
executor pop whatever snapshot they used and return, it's unsafe to
dereference any toasted datums that may appear in the query result.
It's possible to demonstrate "no known snapshots" and "missing chunk
number N for toast value" errors as a result of this oversight.
Historically this outer snapshot has been held by the Portal code,
and that seems like a good plan to preserve. So add infrastructure
to pquery.c to allow re-establishing the Portal-owned snapshot if it's
not there anymore, and add enough bookkeeping support that we can tell
whether it is or not.
We can't, however, just re-establish the Portal snapshot as part of
COMMIT/ROLLBACK. As in normal transaction start, acquiring the first
snapshot should wait until after SET and LOCK commands. Hence, teach
spi.c about doing this at the right time. (Note that this patch
doesn't fix the problem for any PLs that try to run intra-procedure
transactions without using SPI to execute SQL commands.)
This makes SPI's no_snapshots parameter rather a misnomer, so in HEAD,
rename that to allow_nonatomic.
replication/logical/worker.c also needs some fixes, because it wasn't
careful to hold a snapshot open around AFTER trigger execution.
That code doesn't use a Portal, which I suspect someday we're gonna
have to fix. But for now, just rearrange the order of operations.
This includes back-patching the recent addition of finish_estate()
to centralize the cleanup logic there.
This also back-patches commit 2ecfeda3e into v13, to improve the
test coverage for worker.c (it was that test that exposed that
worker.c's snapshot management is wrong).
Per bug #15990 from Andreas Wicht. Back-patch to v11 where
intra-procedure COMMIT was added.
Discussion: https://postgr.es/m/15990-eee2ac466b11293d@postgresql.org
2021-05-21 20:03:53 +02:00
|
|
|
LOCKMODE lockmode = AccessExclusiveLock;
|
2018-04-07 17:24:53 +02:00
|
|
|
|
2020-11-26 04:51:14 +01:00
|
|
|
if (handle_streamed_transaction(LOGICAL_REP_MSG_TRUNCATE, s))
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
return;
|
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
begin_replication_step();
|
2018-04-07 17:24:53 +02:00
|
|
|
|
|
|
|
remote_relids = logicalrep_read_truncate(s, &cascade, &restart_seqs);
|
|
|
|
|
|
|
|
foreach(lc, remote_relids)
|
|
|
|
{
|
|
|
|
LogicalRepRelId relid = lfirst_oid(lc);
|
|
|
|
LogicalRepRelMapEntry *rel;
|
|
|
|
|
2021-05-21 04:24:27 +02:00
|
|
|
rel = logicalrep_rel_open(relid, lockmode);
|
2018-04-07 17:24:53 +02:00
|
|
|
if (!should_apply_changes_for_rel(rel))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The relation can't become interesting in the middle of the
|
|
|
|
* transaction so it's safe to unlock it.
|
|
|
|
*/
|
2021-05-21 04:24:27 +02:00
|
|
|
logicalrep_rel_close(rel, lockmode);
|
2018-04-07 17:24:53 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
remote_rels = lappend(remote_rels, rel);
|
Respect permissions within logical replication.
Prevent logical replication workers from performing insert, update,
delete, truncate, or copy commands on tables unless the subscription
owner has permission to do so.
Prevent subscription owners from circumventing row-level security by
forbidding replication into tables with row-level security policies
which the subscription owner is subject to, without regard to whether
the policy would ordinarily allow the INSERT, UPDATE, DELETE or
TRUNCATE which is being replicated. This seems sufficient for now, as
superusers, roles with bypassrls, and target table owners should still
be able to replicate despite RLS policies. We can revisit the
question of applying row-level security policies on a per-row basis if
this restriction proves too severe in practice.
Author: Mark Dilger
Reviewed-by: Jeff Davis, Andrew Dunstan, Ronan Dunklau
Discussion: https://postgr.es/m/9DFC88D3-1300-4DE8-ACBC-4CEF84399A53%40enterprisedb.com
2022-01-08 02:38:20 +01:00
|
|
|
TargetPrivilegesCheck(rel->localrel, ACL_TRUNCATE);
|
2018-04-07 17:24:53 +02:00
|
|
|
rels = lappend(rels, rel->localrel);
|
|
|
|
relids = lappend_oid(relids, rel->localreloid);
|
|
|
|
if (RelationIsLogicallyLogged(rel->localrel))
|
2018-04-23 19:38:22 +02:00
|
|
|
relids_logged = lappend_oid(relids_logged, rel->localreloid);
|
2020-04-06 15:15:52 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Truncate partitions if we got a message to truncate a partitioned
|
|
|
|
* table.
|
|
|
|
*/
|
|
|
|
if (rel->localrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
|
|
|
|
{
|
|
|
|
ListCell *child;
|
|
|
|
List *children = find_all_inheritors(rel->localreloid,
|
2021-05-21 04:24:27 +02:00
|
|
|
lockmode,
|
2020-04-06 15:15:52 +02:00
|
|
|
NULL);
|
|
|
|
|
|
|
|
foreach(child, children)
|
|
|
|
{
|
|
|
|
Oid childrelid = lfirst_oid(child);
|
|
|
|
Relation childrel;
|
|
|
|
|
|
|
|
if (list_member_oid(relids, childrelid))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* find_all_inheritors already got lock */
|
|
|
|
childrel = table_open(childrelid, NoLock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ignore temp tables of other backends. See similar code in
|
|
|
|
* ExecuteTruncate().
|
|
|
|
*/
|
|
|
|
if (RELATION_IS_OTHER_TEMP(childrel))
|
|
|
|
{
|
2021-05-21 04:24:27 +02:00
|
|
|
table_close(childrel, lockmode);
|
2020-04-06 15:15:52 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
Respect permissions within logical replication.
Prevent logical replication workers from performing insert, update,
delete, truncate, or copy commands on tables unless the subscription
owner has permission to do so.
Prevent subscription owners from circumventing row-level security by
forbidding replication into tables with row-level security policies
which the subscription owner is subject to, without regard to whether
the policy would ordinarily allow the INSERT, UPDATE, DELETE or
TRUNCATE which is being replicated. This seems sufficient for now, as
superusers, roles with bypassrls, and target table owners should still
be able to replicate despite RLS policies. We can revisit the
question of applying row-level security policies on a per-row basis if
this restriction proves too severe in practice.
Author: Mark Dilger
Reviewed-by: Jeff Davis, Andrew Dunstan, Ronan Dunklau
Discussion: https://postgr.es/m/9DFC88D3-1300-4DE8-ACBC-4CEF84399A53%40enterprisedb.com
2022-01-08 02:38:20 +01:00
|
|
|
TargetPrivilegesCheck(childrel, ACL_TRUNCATE);
|
2020-04-06 15:15:52 +02:00
|
|
|
rels = lappend(rels, childrel);
|
|
|
|
part_rels = lappend(part_rels, childrel);
|
|
|
|
relids = lappend_oid(relids, childrelid);
|
|
|
|
/* Log this relation only if needed for logical decoding */
|
|
|
|
if (RelationIsLogicallyLogged(childrel))
|
|
|
|
relids_logged = lappend_oid(relids_logged, childrelid);
|
|
|
|
}
|
|
|
|
}
|
2018-04-07 17:24:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2020-07-18 18:44:51 +02:00
|
|
|
* Even if we used CASCADE on the upstream primary we explicitly default
|
|
|
|
* to replaying changes without further cascading. This might be later
|
2018-04-07 17:24:53 +02:00
|
|
|
* changeable with a user specified option.
|
|
|
|
*/
|
Allow TRUNCATE command to truncate foreign tables.
This commit introduces new foreign data wrapper API for TRUNCATE.
It extends TRUNCATE command so that it accepts foreign tables as
the targets to truncate and invokes that API. Also it extends postgres_fdw
so that it can issue TRUNCATE command to foreign servers, by adding
new routine for that TRUNCATE API.
The information about options specified in TRUNCATE command, e.g.,
ONLY, CACADE, etc is passed to FDW via API. The list of foreign tables to
truncate is also passed to FDW. FDW truncates the foreign data sources
that the passed foreign tables specify, based on those information.
For example, postgres_fdw constructs TRUNCATE command using them
and issues it to the foreign server.
For performance, TRUNCATE command invokes the FDW routine for
TRUNCATE once per foreign server that foreign tables to truncate belong to.
Author: Kazutaka Onishi, Kohei KaiGai, slightly modified by Fujii Masao
Reviewed-by: Bharath Rupireddy, Michael Paquier, Zhihong Yu, Alvaro Herrera, Stephen Frost, Ashutosh Bapat, Amit Langote, Daniel Gustafsson, Ibrar Ahmed, Fujii Masao
Discussion: https://postgr.es/m/CAOP8fzb_gkReLput7OvOK+8NHgw-RKqNv59vem7=524krQTcWA@mail.gmail.com
Discussion: https://postgr.es/m/CAJuF6cMWDDqU-vn_knZgma+2GMaout68YUgn1uyDnexRhqqM5Q@mail.gmail.com
2021-04-08 13:56:08 +02:00
|
|
|
ExecuteTruncateGuts(rels,
|
|
|
|
relids,
|
|
|
|
relids_logged,
|
|
|
|
DROP_RESTRICT,
|
|
|
|
restart_seqs);
|
2018-04-07 17:24:53 +02:00
|
|
|
foreach(lc, remote_rels)
|
|
|
|
{
|
|
|
|
LogicalRepRelMapEntry *rel = lfirst(lc);
|
|
|
|
|
|
|
|
logicalrep_rel_close(rel, NoLock);
|
|
|
|
}
|
2020-04-06 15:15:52 +02:00
|
|
|
foreach(lc, part_rels)
|
|
|
|
{
|
|
|
|
Relation rel = lfirst(lc);
|
|
|
|
|
|
|
|
table_close(rel, NoLock);
|
|
|
|
}
|
2018-04-07 17:24:53 +02:00
|
|
|
|
2021-06-10 18:27:27 +02:00
|
|
|
end_replication_step();
|
2018-04-07 17:24:53 +02:00
|
|
|
}
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Logical replication protocol message dispatcher.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
apply_dispatch(StringInfo s)
|
|
|
|
{
|
2020-11-02 03:48:18 +01:00
|
|
|
LogicalRepMsgType action = pq_getmsgbyte(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
LogicalRepMsgType saved_command;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the current command being applied. Since this function can be
|
2022-01-25 01:40:04 +01:00
|
|
|
* called recursively when applying spooled changes, save the current
|
2021-08-27 05:00:23 +02:00
|
|
|
* command.
|
|
|
|
*/
|
|
|
|
saved_command = apply_error_callback_arg.command;
|
|
|
|
apply_error_callback_arg.command = action;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
switch (action)
|
|
|
|
{
|
2020-11-02 03:48:18 +01:00
|
|
|
case LOGICAL_REP_MSG_BEGIN:
|
2017-01-19 18:00:00 +01:00
|
|
|
apply_handle_begin(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2020-11-02 03:48:18 +01:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_COMMIT:
|
2017-01-19 18:00:00 +01:00
|
|
|
apply_handle_commit(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2020-11-02 03:48:18 +01:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_INSERT:
|
2017-01-19 18:00:00 +01:00
|
|
|
apply_handle_insert(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2020-11-02 03:48:18 +01:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_UPDATE:
|
2017-01-19 18:00:00 +01:00
|
|
|
apply_handle_update(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2020-11-02 03:48:18 +01:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_DELETE:
|
2017-01-19 18:00:00 +01:00
|
|
|
apply_handle_delete(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2020-11-02 03:48:18 +01:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_TRUNCATE:
|
2018-04-07 17:24:53 +02:00
|
|
|
apply_handle_truncate(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2020-11-02 03:48:18 +01:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_RELATION:
|
2017-01-19 18:00:00 +01:00
|
|
|
apply_handle_relation(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2020-11-02 03:48:18 +01:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_TYPE:
|
2017-01-19 18:00:00 +01:00
|
|
|
apply_handle_type(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2020-11-02 03:48:18 +01:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_ORIGIN:
|
2017-01-19 18:00:00 +01:00
|
|
|
apply_handle_origin(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2020-11-02 03:48:18 +01:00
|
|
|
|
2021-04-06 05:10:47 +02:00
|
|
|
case LOGICAL_REP_MSG_MESSAGE:
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Logical replication does not use generic logical messages yet.
|
|
|
|
* Although, it could be used by other applications that use this
|
|
|
|
* output plugin.
|
|
|
|
*/
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2021-04-06 05:10:47 +02:00
|
|
|
|
2020-11-02 03:48:18 +01:00
|
|
|
case LOGICAL_REP_MSG_STREAM_START:
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
apply_handle_stream_start(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2020-11-02 03:48:18 +01:00
|
|
|
|
2021-08-19 06:04:26 +02:00
|
|
|
case LOGICAL_REP_MSG_STREAM_STOP:
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
apply_handle_stream_stop(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2020-11-02 03:48:18 +01:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_STREAM_ABORT:
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
apply_handle_stream_abort(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2020-11-02 03:48:18 +01:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_STREAM_COMMIT:
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
apply_handle_stream_commit(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_BEGIN_PREPARE:
|
|
|
|
apply_handle_begin_prepare(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_PREPARE:
|
|
|
|
apply_handle_prepare(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_COMMIT_PREPARED:
|
|
|
|
apply_handle_commit_prepared(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_ROLLBACK_PREPARED:
|
|
|
|
apply_handle_rollback_prepared(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
2021-08-04 04:17:06 +02:00
|
|
|
|
|
|
|
case LOGICAL_REP_MSG_STREAM_PREPARE:
|
|
|
|
apply_handle_stream_prepare(s);
|
2021-08-27 05:00:23 +02:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg("invalid logical replication message type \"%c\"", action)));
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
2020-11-02 03:48:18 +01:00
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Reset the current command */
|
|
|
|
apply_error_callback_arg.command = saved_command;
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Figure out which write/flush positions to report to the walsender process.
|
|
|
|
*
|
|
|
|
* We can't simply report back the last LSN the walsender sent us because the
|
|
|
|
* local transaction might not yet be flushed to disk locally. Instead we
|
|
|
|
* build a list that associates local with remote LSNs for every commit. When
|
|
|
|
* reporting back the flush position to the sender we iterate that list and
|
|
|
|
* check which entries on it are already locally flushed. Those we can report
|
|
|
|
* as having been flushed.
|
|
|
|
*
|
|
|
|
* The have_pending_txes is true if there are outstanding transactions that
|
|
|
|
* need to be flushed.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
get_flush_position(XLogRecPtr *write, XLogRecPtr *flush,
|
|
|
|
bool *have_pending_txes)
|
|
|
|
{
|
|
|
|
dlist_mutable_iter iter;
|
Remove all use of ThisTimeLineID global variable outside of xlog.c
All such code deals with this global variable in one of three ways.
Sometimes the same functions use it in more than one of these ways
at the same time.
First, sometimes it's an implicit argument to one or more functions
being called in xlog.c or elsewhere, and must be set to the
appropriate value before calling those functions lest they
misbehave. In those cases, it is now passed as an explicit argument
instead.
Second, sometimes it's used to obtain the current timeline after
the end of recovery, i.e. the timeline to which WAL is being
written and flushed. Such code now calls GetWALInsertionTimeLine()
or relies on the new out parameter added to GetFlushRecPtr().
Third, sometimes it's used during recovery to store the current
replay timeline. That can change, so such code must generally
update the value before each use. It can still do that, but must
now use a local variable instead.
The net effect of these changes is to reduce by a fair amount the
amount of code that is directly accessing this global variable.
That's good, because history has shown that we don't always think
clearly about which timeline ID it's supposed to contain at any
given point in time, or indeed, whether it has been or needs to
be initialized at any given point in the code.
Patch by me, reviewed and tested by Michael Paquier, Amul Sul, and
Álvaro Herrera.
Discussion: https://postgr.es/m/CA+TgmobfAAqhfWa1kaFBBFvX+5CjM=7TE=n4r4Q1o2bjbGYBpA@mail.gmail.com
2021-11-05 17:50:01 +01:00
|
|
|
XLogRecPtr local_flush = GetFlushRecPtr(NULL);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
*write = InvalidXLogRecPtr;
|
|
|
|
*flush = InvalidXLogRecPtr;
|
|
|
|
|
|
|
|
dlist_foreach_modify(iter, &lsn_mapping)
|
|
|
|
{
|
|
|
|
FlushPosition *pos =
|
|
|
|
dlist_container(FlushPosition, node, iter.cur);
|
|
|
|
|
|
|
|
*write = pos->remote_end;
|
|
|
|
|
|
|
|
if (pos->local_end <= local_flush)
|
|
|
|
{
|
|
|
|
*flush = pos->remote_end;
|
|
|
|
dlist_delete(iter.cur);
|
|
|
|
pfree(pos);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Don't want to uselessly iterate over the rest of the list which
|
|
|
|
* could potentially be long. Instead get the last element and
|
|
|
|
* grab the write position from there.
|
|
|
|
*/
|
|
|
|
pos = dlist_tail_element(FlushPosition, node,
|
|
|
|
&lsn_mapping);
|
|
|
|
*write = pos->remote_end;
|
|
|
|
*have_pending_txes = true;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*have_pending_txes = !dlist_is_empty(&lsn_mapping);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Store current remote/local lsn pair in the tracking list.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
store_flush_position(XLogRecPtr remote_lsn)
|
|
|
|
{
|
|
|
|
FlushPosition *flushpos;
|
|
|
|
|
|
|
|
/* Need to do this in permanent context */
|
2017-05-09 20:40:42 +02:00
|
|
|
MemoryContextSwitchTo(ApplyContext);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
/* Track commit lsn */
|
|
|
|
flushpos = (FlushPosition *) palloc(sizeof(FlushPosition));
|
|
|
|
flushpos->local_end = XactLastCommitEnd;
|
|
|
|
flushpos->remote_end = remote_lsn;
|
|
|
|
|
|
|
|
dlist_push_tail(&lsn_mapping, &flushpos->node);
|
2017-05-09 20:40:42 +02:00
|
|
|
MemoryContextSwitchTo(ApplyMessageContext);
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Update statistics of the worker. */
|
|
|
|
static void
|
|
|
|
UpdateWorkerStats(XLogRecPtr last_lsn, TimestampTz send_time, bool reply)
|
|
|
|
{
|
|
|
|
MyLogicalRepWorker->last_lsn = last_lsn;
|
|
|
|
MyLogicalRepWorker->last_send_time = send_time;
|
|
|
|
MyLogicalRepWorker->last_recv_time = GetCurrentTimestamp();
|
|
|
|
if (reply)
|
|
|
|
{
|
|
|
|
MyLogicalRepWorker->reply_lsn = last_lsn;
|
|
|
|
MyLogicalRepWorker->reply_time = send_time;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Apply main loop.
|
|
|
|
*/
|
|
|
|
static void
|
2017-03-23 13:36:36 +01:00
|
|
|
LogicalRepApplyLoop(XLogRecPtr last_received)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
2019-10-18 07:26:29 +02:00
|
|
|
TimestampTz last_recv_timestamp = GetCurrentTimestamp();
|
2020-09-05 02:20:05 +02:00
|
|
|
bool ping_sent = false;
|
2020-10-15 16:33:48 +02:00
|
|
|
TimeLineID tli;
|
2021-08-27 05:00:23 +02:00
|
|
|
ErrorContextCallback errcallback;
|
2019-10-18 07:26:29 +02:00
|
|
|
|
2017-05-09 20:40:42 +02:00
|
|
|
/*
|
|
|
|
* Init the ApplyMessageContext which we clean up after each replication
|
|
|
|
* protocol message.
|
|
|
|
*/
|
|
|
|
ApplyMessageContext = AllocSetContextCreate(ApplyContext,
|
|
|
|
"ApplyMessageContext",
|
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
/*
|
|
|
|
* This memory context is used for per-stream data when the streaming mode
|
|
|
|
* is enabled. This context is reset on each stream stop.
|
|
|
|
*/
|
|
|
|
LogicalStreamingContext = AllocSetContextCreate(ApplyContext,
|
|
|
|
"LogicalStreamingContext",
|
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/* mark as idle, before starting to loop */
|
|
|
|
pgstat_report_activity(STATE_IDLE, NULL);
|
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/*
|
2022-01-25 01:40:04 +01:00
|
|
|
* Push apply error context callback. Fields will be filled while applying
|
|
|
|
* a change.
|
2021-08-27 05:00:23 +02:00
|
|
|
*/
|
|
|
|
errcallback.callback = apply_error_callback;
|
|
|
|
errcallback.previous = error_context_stack;
|
|
|
|
error_context_stack = &errcallback;
|
|
|
|
|
2020-09-05 02:20:05 +02:00
|
|
|
/* This outer loop iterates once per wait. */
|
2017-06-02 20:46:00 +02:00
|
|
|
for (;;)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
|
|
|
pgsocket fd = PGINVALID_SOCKET;
|
|
|
|
int rc;
|
|
|
|
int len;
|
|
|
|
char *buf = NULL;
|
|
|
|
bool endofstream = false;
|
Reduce delay for last logicalrep feedback message when master goes idle.
The regression tests contain numerous cases where we do some activity on a
master server and then wait till the slave has ack'd flushing its copy of
that transaction. Because WAL flush on the slave is asynchronous to the
logicalrep worker process, the worker cannot send such a feedback message
during the LogicalRepApplyLoop iteration where it processes the last data
from the master. In the previous coding, the feedback message would come
out only when the loop's WaitLatchOrSocket call returned WL_TIMEOUT. That
requires one full second of delay (NAPTIME_PER_CYCLE); and to add insult
to injury, it could take more than that if the WaitLatchOrSocket was
interrupted a few times by latch-setting events.
In reality we can expect the slave's walwriter process to have flushed the
WAL data after, more or less, WalWriterDelay (typically 200ms). Hence,
if there are unacked transactions pending, make the wait delay only that
long rather than the full NAPTIME_PER_CYCLE. Also, move one of the
send_feedback() calls into the loop main line, so that we'll check for the
need to send feedback even if we were woken by a latch event and not either
socket data or timeout.
It's not clear how much this matters for production purposes, but
it's definitely helpful for testing.
Discussion: https://postgr.es/m/30864.1498861103@sss.pgh.pa.us
2017-07-01 18:15:51 +02:00
|
|
|
long wait_time;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2017-06-02 20:46:00 +02:00
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
2017-05-09 20:40:42 +02:00
|
|
|
MemoryContextSwitchTo(ApplyMessageContext);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2021-05-13 01:13:54 +02:00
|
|
|
len = walrcv_receive(LogRepWorkerWalRcvConn, &buf, &fd);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
if (len != 0)
|
|
|
|
{
|
2020-09-05 02:20:05 +02:00
|
|
|
/* Loop to process all available data (without blocking). */
|
2017-01-19 18:00:00 +01:00
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
|
|
|
|
if (len == 0)
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (len < 0)
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errmsg("data stream from publisher has ended")));
|
|
|
|
endofstream = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int c;
|
|
|
|
StringInfoData s;
|
|
|
|
|
|
|
|
/* Reset timeout. */
|
|
|
|
last_recv_timestamp = GetCurrentTimestamp();
|
|
|
|
ping_sent = false;
|
|
|
|
|
|
|
|
/* Ensure we are reading the data into our memory context. */
|
2017-05-09 20:40:42 +02:00
|
|
|
MemoryContextSwitchTo(ApplyMessageContext);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
s.data = buf;
|
|
|
|
s.len = len;
|
|
|
|
s.cursor = 0;
|
|
|
|
s.maxlen = -1;
|
|
|
|
|
|
|
|
c = pq_getmsgbyte(&s);
|
|
|
|
|
|
|
|
if (c == 'w')
|
|
|
|
{
|
|
|
|
XLogRecPtr start_lsn;
|
|
|
|
XLogRecPtr end_lsn;
|
2017-02-23 21:57:08 +01:00
|
|
|
TimestampTz send_time;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
start_lsn = pq_getmsgint64(&s);
|
|
|
|
end_lsn = pq_getmsgint64(&s);
|
2017-02-23 21:57:08 +01:00
|
|
|
send_time = pq_getmsgint64(&s);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
if (last_received < start_lsn)
|
|
|
|
last_received = start_lsn;
|
|
|
|
|
|
|
|
if (last_received < end_lsn)
|
|
|
|
last_received = end_lsn;
|
|
|
|
|
|
|
|
UpdateWorkerStats(last_received, send_time, false);
|
|
|
|
|
|
|
|
apply_dispatch(&s);
|
|
|
|
}
|
|
|
|
else if (c == 'k')
|
|
|
|
{
|
2017-03-23 13:36:36 +01:00
|
|
|
XLogRecPtr end_lsn;
|
2017-02-23 21:57:08 +01:00
|
|
|
TimestampTz timestamp;
|
|
|
|
bool reply_requested;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
end_lsn = pq_getmsgint64(&s);
|
2017-02-23 21:57:08 +01:00
|
|
|
timestamp = pq_getmsgint64(&s);
|
2017-01-19 18:00:00 +01:00
|
|
|
reply_requested = pq_getmsgbyte(&s);
|
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
if (last_received < end_lsn)
|
|
|
|
last_received = end_lsn;
|
|
|
|
|
|
|
|
send_feedback(last_received, reply_requested, false);
|
2017-01-19 18:00:00 +01:00
|
|
|
UpdateWorkerStats(last_received, timestamp, true);
|
|
|
|
}
|
|
|
|
/* other message types are purposefully ignored */
|
2017-05-09 20:40:42 +02:00
|
|
|
|
|
|
|
MemoryContextReset(ApplyMessageContext);
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
2021-05-13 01:13:54 +02:00
|
|
|
len = walrcv_receive(LogRepWorkerWalRcvConn, &buf, &fd);
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Reduce delay for last logicalrep feedback message when master goes idle.
The regression tests contain numerous cases where we do some activity on a
master server and then wait till the slave has ack'd flushing its copy of
that transaction. Because WAL flush on the slave is asynchronous to the
logicalrep worker process, the worker cannot send such a feedback message
during the LogicalRepApplyLoop iteration where it processes the last data
from the master. In the previous coding, the feedback message would come
out only when the loop's WaitLatchOrSocket call returned WL_TIMEOUT. That
requires one full second of delay (NAPTIME_PER_CYCLE); and to add insult
to injury, it could take more than that if the WaitLatchOrSocket was
interrupted a few times by latch-setting events.
In reality we can expect the slave's walwriter process to have flushed the
WAL data after, more or less, WalWriterDelay (typically 200ms). Hence,
if there are unacked transactions pending, make the wait delay only that
long rather than the full NAPTIME_PER_CYCLE. Also, move one of the
send_feedback() calls into the loop main line, so that we'll check for the
need to send feedback even if we were woken by a latch event and not either
socket data or timeout.
It's not clear how much this matters for production purposes, but
it's definitely helpful for testing.
Discussion: https://postgr.es/m/30864.1498861103@sss.pgh.pa.us
2017-07-01 18:15:51 +02:00
|
|
|
/* confirm all writes so far */
|
|
|
|
send_feedback(last_received, false, false);
|
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
if (!in_remote_transaction && !in_streamed_transaction)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If we didn't get any transactions for a while there might be
|
|
|
|
* unconsumed invalidation messages in the queue, consume them
|
|
|
|
* now.
|
|
|
|
*/
|
2017-03-23 13:36:36 +01:00
|
|
|
AcceptInvalidationMessages();
|
2017-06-03 17:37:47 +02:00
|
|
|
maybe_reread_subscription();
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
/* Process any table synchronization changes. */
|
|
|
|
process_syncing_tables(last_received);
|
|
|
|
}
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
/* Cleanup the memory. */
|
2017-05-09 20:40:42 +02:00
|
|
|
MemoryContextResetAndDeleteChildren(ApplyMessageContext);
|
2017-01-19 18:00:00 +01:00
|
|
|
MemoryContextSwitchTo(TopMemoryContext);
|
|
|
|
|
|
|
|
/* Check if we need to exit the streaming loop. */
|
|
|
|
if (endofstream)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
Reduce delay for last logicalrep feedback message when master goes idle.
The regression tests contain numerous cases where we do some activity on a
master server and then wait till the slave has ack'd flushing its copy of
that transaction. Because WAL flush on the slave is asynchronous to the
logicalrep worker process, the worker cannot send such a feedback message
during the LogicalRepApplyLoop iteration where it processes the last data
from the master. In the previous coding, the feedback message would come
out only when the loop's WaitLatchOrSocket call returned WL_TIMEOUT. That
requires one full second of delay (NAPTIME_PER_CYCLE); and to add insult
to injury, it could take more than that if the WaitLatchOrSocket was
interrupted a few times by latch-setting events.
In reality we can expect the slave's walwriter process to have flushed the
WAL data after, more or less, WalWriterDelay (typically 200ms). Hence,
if there are unacked transactions pending, make the wait delay only that
long rather than the full NAPTIME_PER_CYCLE. Also, move one of the
send_feedback() calls into the loop main line, so that we'll check for the
need to send feedback even if we were woken by a latch event and not either
socket data or timeout.
It's not clear how much this matters for production purposes, but
it's definitely helpful for testing.
Discussion: https://postgr.es/m/30864.1498861103@sss.pgh.pa.us
2017-07-01 18:15:51 +02:00
|
|
|
* Wait for more data or latch. If we have unflushed transactions,
|
|
|
|
* wake up after WalWriterDelay to see if they've been flushed yet (in
|
|
|
|
* which case we should send a feedback message). Otherwise, there's
|
|
|
|
* no particular urgency about waking up unless we get data or a
|
|
|
|
* signal.
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
Reduce delay for last logicalrep feedback message when master goes idle.
The regression tests contain numerous cases where we do some activity on a
master server and then wait till the slave has ack'd flushing its copy of
that transaction. Because WAL flush on the slave is asynchronous to the
logicalrep worker process, the worker cannot send such a feedback message
during the LogicalRepApplyLoop iteration where it processes the last data
from the master. In the previous coding, the feedback message would come
out only when the loop's WaitLatchOrSocket call returned WL_TIMEOUT. That
requires one full second of delay (NAPTIME_PER_CYCLE); and to add insult
to injury, it could take more than that if the WaitLatchOrSocket was
interrupted a few times by latch-setting events.
In reality we can expect the slave's walwriter process to have flushed the
WAL data after, more or less, WalWriterDelay (typically 200ms). Hence,
if there are unacked transactions pending, make the wait delay only that
long rather than the full NAPTIME_PER_CYCLE. Also, move one of the
send_feedback() calls into the loop main line, so that we'll check for the
need to send feedback even if we were woken by a latch event and not either
socket data or timeout.
It's not clear how much this matters for production purposes, but
it's definitely helpful for testing.
Discussion: https://postgr.es/m/30864.1498861103@sss.pgh.pa.us
2017-07-01 18:15:51 +02:00
|
|
|
if (!dlist_is_empty(&lsn_mapping))
|
|
|
|
wait_time = WalWriterDelay;
|
|
|
|
else
|
|
|
|
wait_time = NAPTIME_PER_CYCLE;
|
|
|
|
|
2017-06-07 01:13:00 +02:00
|
|
|
rc = WaitLatchOrSocket(MyLatch,
|
2017-01-19 18:00:00 +01:00
|
|
|
WL_SOCKET_READABLE | WL_LATCH_SET |
|
Add WL_EXIT_ON_PM_DEATH pseudo-event.
Users of the WaitEventSet and WaitLatch() APIs can now choose between
asking for WL_POSTMASTER_DEATH and then handling it explicitly, or asking
for WL_EXIT_ON_PM_DEATH to trigger immediate exit on postmaster death.
This reduces code duplication, since almost all callers want the latter.
Repair all code that was previously ignoring postmaster death completely,
or requesting the event but ignoring it, or requesting the event but then
doing an unconditional PostmasterIsAlive() call every time through its
event loop (which is an expensive syscall on platforms for which we don't
have USE_POSTMASTER_DEATH_SIGNAL support).
Assert that callers of WaitLatchXXX() under the postmaster remember to
ask for either WL_POSTMASTER_DEATH or WL_EXIT_ON_PM_DEATH, to prevent
future bugs.
The only process that doesn't handle postmaster death is syslogger. It
waits until all backends holding the write end of the syslog pipe
(including the postmaster) have closed it by exiting, to be sure to
capture any parting messages. By using the WaitEventSet API directly
it avoids the new assertion, and as a by-product it may be slightly
more efficient on platforms that have epoll().
Author: Thomas Munro
Reviewed-by: Kyotaro Horiguchi, Heikki Linnakangas, Tom Lane
Discussion: https://postgr.es/m/CAEepm%3D1TCviRykkUb69ppWLr_V697rzd1j3eZsRMmbXvETfqbQ%40mail.gmail.com,
https://postgr.es/m/CAEepm=2LqHzizbe7muD7-2yHUbTOoF7Q+qkSD5Q41kuhttRTwA@mail.gmail.com
2018-11-23 08:16:41 +01:00
|
|
|
WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
|
Reduce delay for last logicalrep feedback message when master goes idle.
The regression tests contain numerous cases where we do some activity on a
master server and then wait till the slave has ack'd flushing its copy of
that transaction. Because WAL flush on the slave is asynchronous to the
logicalrep worker process, the worker cannot send such a feedback message
during the LogicalRepApplyLoop iteration where it processes the last data
from the master. In the previous coding, the feedback message would come
out only when the loop's WaitLatchOrSocket call returned WL_TIMEOUT. That
requires one full second of delay (NAPTIME_PER_CYCLE); and to add insult
to injury, it could take more than that if the WaitLatchOrSocket was
interrupted a few times by latch-setting events.
In reality we can expect the slave's walwriter process to have flushed the
WAL data after, more or less, WalWriterDelay (typically 200ms). Hence,
if there are unacked transactions pending, make the wait delay only that
long rather than the full NAPTIME_PER_CYCLE. Also, move one of the
send_feedback() calls into the loop main line, so that we'll check for the
need to send feedback even if we were woken by a latch event and not either
socket data or timeout.
It's not clear how much this matters for production purposes, but
it's definitely helpful for testing.
Discussion: https://postgr.es/m/30864.1498861103@sss.pgh.pa.us
2017-07-01 18:15:51 +02:00
|
|
|
fd, wait_time,
|
2017-01-19 18:00:00 +01:00
|
|
|
WAIT_EVENT_LOGICAL_APPLY_MAIN);
|
|
|
|
|
2017-06-07 01:13:00 +02:00
|
|
|
if (rc & WL_LATCH_SET)
|
|
|
|
{
|
|
|
|
ResetLatch(MyLatch);
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
}
|
|
|
|
|
2019-12-17 19:03:57 +01:00
|
|
|
if (ConfigReloadPending)
|
2017-04-10 19:40:55 +02:00
|
|
|
{
|
2019-12-17 19:03:57 +01:00
|
|
|
ConfigReloadPending = false;
|
2017-04-10 19:40:55 +02:00
|
|
|
ProcessConfigFile(PGC_SIGHUP);
|
|
|
|
}
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
if (rc & WL_TIMEOUT)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We didn't receive anything new. If we haven't heard anything
|
|
|
|
* from the server for more than wal_receiver_timeout / 2, ping
|
|
|
|
* the server. Also, if it's been longer than
|
|
|
|
* wal_receiver_status_interval since the last update we sent,
|
2020-06-14 23:05:18 +02:00
|
|
|
* send a status update to the primary anyway, to report any
|
2017-01-19 18:00:00 +01:00
|
|
|
* progress in applying WAL.
|
|
|
|
*/
|
|
|
|
bool requestReply = false;
|
|
|
|
|
|
|
|
/*
|
2020-12-13 05:12:25 +01:00
|
|
|
* Check if time since last receive from primary has reached the
|
2017-01-19 18:00:00 +01:00
|
|
|
* configured limit.
|
|
|
|
*/
|
|
|
|
if (wal_receiver_timeout > 0)
|
|
|
|
{
|
|
|
|
TimestampTz now = GetCurrentTimestamp();
|
|
|
|
TimestampTz timeout;
|
|
|
|
|
|
|
|
timeout =
|
|
|
|
TimestampTzPlusMilliseconds(last_recv_timestamp,
|
|
|
|
wal_receiver_timeout);
|
|
|
|
|
|
|
|
if (now >= timeout)
|
|
|
|
ereport(ERROR,
|
2021-06-16 17:52:05 +02:00
|
|
|
(errcode(ERRCODE_CONNECTION_FAILURE),
|
|
|
|
errmsg("terminating logical replication worker due to timeout")));
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2020-09-05 02:20:05 +02:00
|
|
|
/* Check to see if it's time for a ping. */
|
2017-01-19 18:00:00 +01:00
|
|
|
if (!ping_sent)
|
|
|
|
{
|
|
|
|
timeout = TimestampTzPlusMilliseconds(last_recv_timestamp,
|
|
|
|
(wal_receiver_timeout / 2));
|
|
|
|
if (now >= timeout)
|
|
|
|
{
|
|
|
|
requestReply = true;
|
|
|
|
ping_sent = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
send_feedback(last_received, requestReply, requestReply);
|
|
|
|
}
|
|
|
|
}
|
2020-10-15 16:33:48 +02:00
|
|
|
|
2021-08-27 05:00:23 +02:00
|
|
|
/* Pop the error context stack */
|
|
|
|
error_context_stack = errcallback.previous;
|
|
|
|
|
2020-10-15 16:33:48 +02:00
|
|
|
/* All done */
|
2021-05-13 01:13:54 +02:00
|
|
|
walrcv_endstreaming(LogRepWorkerWalRcvConn, &tli);
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Send a Standby Status Update message to server.
|
|
|
|
*
|
|
|
|
* 'recvpos' is the latest LSN we've received data to, force is set if we need
|
|
|
|
* to send a response to avoid timeouts.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
send_feedback(XLogRecPtr recvpos, bool force, bool requestReply)
|
|
|
|
{
|
|
|
|
static StringInfo reply_message = NULL;
|
|
|
|
static TimestampTz send_time = 0;
|
|
|
|
|
|
|
|
static XLogRecPtr last_recvpos = InvalidXLogRecPtr;
|
|
|
|
static XLogRecPtr last_writepos = InvalidXLogRecPtr;
|
|
|
|
static XLogRecPtr last_flushpos = InvalidXLogRecPtr;
|
|
|
|
|
|
|
|
XLogRecPtr writepos;
|
|
|
|
XLogRecPtr flushpos;
|
|
|
|
TimestampTz now;
|
|
|
|
bool have_pending_txes;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the user doesn't want status to be reported to the publisher, be
|
|
|
|
* sure to exit before doing anything at all.
|
|
|
|
*/
|
|
|
|
if (!force && wal_receiver_status_interval <= 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* It's legal to not pass a recvpos */
|
|
|
|
if (recvpos < last_recvpos)
|
|
|
|
recvpos = last_recvpos;
|
|
|
|
|
|
|
|
get_flush_position(&writepos, &flushpos, &have_pending_txes);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* No outstanding transactions to flush, we can report the latest received
|
|
|
|
* position. This is important for synchronous replication.
|
|
|
|
*/
|
|
|
|
if (!have_pending_txes)
|
|
|
|
flushpos = writepos = recvpos;
|
|
|
|
|
|
|
|
if (writepos < last_writepos)
|
|
|
|
writepos = last_writepos;
|
|
|
|
|
|
|
|
if (flushpos < last_flushpos)
|
|
|
|
flushpos = last_flushpos;
|
|
|
|
|
|
|
|
now = GetCurrentTimestamp();
|
|
|
|
|
|
|
|
/* if we've already reported everything we're good */
|
|
|
|
if (!force &&
|
|
|
|
writepos == last_writepos &&
|
|
|
|
flushpos == last_flushpos &&
|
|
|
|
!TimestampDifferenceExceeds(send_time, now,
|
|
|
|
wal_receiver_status_interval * 1000))
|
|
|
|
return;
|
|
|
|
send_time = now;
|
|
|
|
|
|
|
|
if (!reply_message)
|
|
|
|
{
|
2017-05-09 20:40:42 +02:00
|
|
|
MemoryContext oldctx = MemoryContextSwitchTo(ApplyContext);
|
2017-05-17 22:31:56 +02:00
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
reply_message = makeStringInfo();
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
resetStringInfo(reply_message);
|
|
|
|
|
|
|
|
pq_sendbyte(reply_message, 'r');
|
|
|
|
pq_sendint64(reply_message, recvpos); /* write */
|
|
|
|
pq_sendint64(reply_message, flushpos); /* flush */
|
|
|
|
pq_sendint64(reply_message, writepos); /* apply */
|
|
|
|
pq_sendint64(reply_message, now); /* sendTime */
|
|
|
|
pq_sendbyte(reply_message, requestReply); /* replyRequested */
|
|
|
|
|
|
|
|
elog(DEBUG2, "sending feedback (force %d) to recv %X/%X, write %X/%X, flush %X/%X",
|
|
|
|
force,
|
2021-02-23 10:14:38 +01:00
|
|
|
LSN_FORMAT_ARGS(recvpos),
|
|
|
|
LSN_FORMAT_ARGS(writepos),
|
|
|
|
LSN_FORMAT_ARGS(flushpos));
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2021-05-13 01:13:54 +02:00
|
|
|
walrcv_send(LogRepWorkerWalRcvConn,
|
|
|
|
reply_message->data, reply_message->len);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
if (recvpos > last_recvpos)
|
|
|
|
last_recvpos = recvpos;
|
|
|
|
if (writepos > last_writepos)
|
|
|
|
last_writepos = writepos;
|
|
|
|
if (flushpos > last_flushpos)
|
|
|
|
last_flushpos = flushpos;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2017-06-03 17:37:47 +02:00
|
|
|
* Reread subscription info if needed. Most changes will be exit.
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
|
|
|
static void
|
2017-06-03 17:37:47 +02:00
|
|
|
maybe_reread_subscription(void)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
|
|
|
MemoryContext oldctx;
|
|
|
|
Subscription *newsub;
|
2017-03-23 13:36:36 +01:00
|
|
|
bool started_tx = false;
|
|
|
|
|
2017-06-03 17:37:47 +02:00
|
|
|
/* When cache state is valid there is nothing to do here. */
|
|
|
|
if (MySubscriptionValid)
|
|
|
|
return;
|
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
/* This function might be called inside or outside of transaction. */
|
|
|
|
if (!IsTransactionState())
|
|
|
|
{
|
|
|
|
StartTransactionCommand();
|
|
|
|
started_tx = true;
|
|
|
|
}
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
/* Ensure allocations in permanent context. */
|
2017-05-09 20:40:42 +02:00
|
|
|
oldctx = MemoryContextSwitchTo(ApplyContext);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
newsub = GetSubscription(MyLogicalRepWorker->subid, true);
|
|
|
|
|
|
|
|
/*
|
2017-01-23 18:33:27 +01:00
|
|
|
* Exit if the subscription was removed. This normally should not happen
|
|
|
|
* as the worker gets killed during DROP SUBSCRIPTION.
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
2017-01-23 18:33:27 +01:00
|
|
|
if (!newsub)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
|
|
|
ereport(LOG,
|
2017-05-25 00:56:21 +02:00
|
|
|
(errmsg("logical replication apply worker for subscription \"%s\" will "
|
2017-01-23 18:33:27 +01:00
|
|
|
"stop because the subscription was removed",
|
2017-01-19 18:00:00 +01:00
|
|
|
MySubscription->name)));
|
|
|
|
|
|
|
|
proc_exit(0);
|
|
|
|
}
|
|
|
|
|
2017-05-09 16:20:42 +02:00
|
|
|
/*
|
|
|
|
* Exit if the subscription was disabled. This normally should not happen
|
|
|
|
* as the worker gets killed during ALTER SUBSCRIPTION ... DISABLE.
|
|
|
|
*/
|
|
|
|
if (!newsub->enabled)
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
2017-05-25 00:56:21 +02:00
|
|
|
(errmsg("logical replication apply worker for subscription \"%s\" will "
|
2017-05-09 16:20:42 +02:00
|
|
|
"stop because the subscription was disabled",
|
|
|
|
MySubscription->name)));
|
|
|
|
|
|
|
|
proc_exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* !slotname should never happen when enabled is true. */
|
|
|
|
Assert(newsub->slotname);
|
|
|
|
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
/* two-phase should not be altered */
|
|
|
|
Assert(newsub->twophasestate == MySubscription->twophasestate);
|
|
|
|
|
2017-04-03 17:10:28 +02:00
|
|
|
/*
|
2020-07-18 18:44:51 +02:00
|
|
|
* Exit if any parameter that affects the remote connection was changed.
|
|
|
|
* The launcher will start a new worker.
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
2020-07-18 18:44:51 +02:00
|
|
|
if (strcmp(newsub->conninfo, MySubscription->conninfo) != 0 ||
|
|
|
|
strcmp(newsub->name, MySubscription->name) != 0 ||
|
|
|
|
strcmp(newsub->slotname, MySubscription->slotname) != 0 ||
|
|
|
|
newsub->binary != MySubscription->binary ||
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
newsub->stream != MySubscription->stream ||
|
Respect permissions within logical replication.
Prevent logical replication workers from performing insert, update,
delete, truncate, or copy commands on tables unless the subscription
owner has permission to do so.
Prevent subscription owners from circumventing row-level security by
forbidding replication into tables with row-level security policies
which the subscription owner is subject to, without regard to whether
the policy would ordinarily allow the INSERT, UPDATE, DELETE or
TRUNCATE which is being replicated. This seems sufficient for now, as
superusers, roles with bypassrls, and target table owners should still
be able to replicate despite RLS policies. We can revisit the
question of applying row-level security policies on a per-row basis if
this restriction proves too severe in practice.
Author: Mark Dilger
Reviewed-by: Jeff Davis, Andrew Dunstan, Ronan Dunklau
Discussion: https://postgr.es/m/9DFC88D3-1300-4DE8-ACBC-4CEF84399A53%40enterprisedb.com
2022-01-08 02:38:20 +01:00
|
|
|
newsub->owner != MySubscription->owner ||
|
2020-07-18 18:44:51 +02:00
|
|
|
!equal(newsub->publications, MySubscription->publications))
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
|
|
|
ereport(LOG,
|
2020-07-18 18:44:51 +02:00
|
|
|
(errmsg("logical replication apply worker for subscription \"%s\" will restart because of a parameter change",
|
2017-01-19 18:00:00 +01:00
|
|
|
MySubscription->name)));
|
|
|
|
|
|
|
|
proc_exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check for other changes that should never happen too. */
|
2017-04-03 17:10:28 +02:00
|
|
|
if (newsub->dbid != MySubscription->dbid)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
|
|
|
elog(ERROR, "subscription %u changed unexpectedly",
|
|
|
|
MyLogicalRepWorker->subid);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Clean old subscription info and switch to new one. */
|
|
|
|
FreeSubscription(MySubscription);
|
|
|
|
MySubscription = newsub;
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
|
2017-04-14 19:58:46 +02:00
|
|
|
/* Change synchronous commit according to the user's wishes */
|
|
|
|
SetConfigOption("synchronous_commit", MySubscription->synccommit,
|
|
|
|
PGC_BACKEND, PGC_S_OVERRIDE);
|
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
if (started_tx)
|
|
|
|
CommitTransactionCommand();
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
MySubscriptionValid = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Callback from subscription syscache invalidation.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
subscription_change_cb(Datum arg, int cacheid, uint32 hashvalue)
|
|
|
|
{
|
|
|
|
MySubscriptionValid = false;
|
|
|
|
}
|
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
/*
|
|
|
|
* subxact_info_write
|
|
|
|
* Store information about subxacts for a toplevel transaction.
|
|
|
|
*
|
|
|
|
* For each subxact we store offset of it's first change in the main file.
|
|
|
|
* The file is always over-written as a whole.
|
|
|
|
*
|
|
|
|
* XXX We should only store subxacts that were not aborted yet.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
subxact_info_write(Oid subid, TransactionId xid)
|
|
|
|
{
|
|
|
|
char path[MAXPGPATH];
|
|
|
|
Size len;
|
|
|
|
BufFile *fd;
|
|
|
|
|
|
|
|
Assert(TransactionIdIsValid(xid));
|
|
|
|
|
2021-09-02 04:43:46 +02:00
|
|
|
/* construct the subxact filename */
|
|
|
|
subxact_filename(path, subid, xid);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
2021-09-02 04:43:46 +02:00
|
|
|
/* Delete the subxacts file, if exists. */
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
if (subxact_data.nsubxacts == 0)
|
|
|
|
{
|
2021-09-02 04:43:46 +02:00
|
|
|
cleanup_subxact_info();
|
|
|
|
BufFileDeleteFileSet(MyLogicalRepWorker->stream_fileset, path, true);
|
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create the subxact file if it not already created, otherwise open the
|
|
|
|
* existing file.
|
|
|
|
*/
|
2021-09-02 04:43:46 +02:00
|
|
|
fd = BufFileOpenFileSet(MyLogicalRepWorker->stream_fileset, path, O_RDWR,
|
|
|
|
true);
|
|
|
|
if (fd == NULL)
|
|
|
|
fd = BufFileCreateFileSet(MyLogicalRepWorker->stream_fileset, path);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
len = sizeof(SubXactInfo) * subxact_data.nsubxacts;
|
|
|
|
|
|
|
|
/* Write the subxact count and subxact info */
|
|
|
|
BufFileWrite(fd, &subxact_data.nsubxacts, sizeof(subxact_data.nsubxacts));
|
|
|
|
BufFileWrite(fd, subxact_data.subxacts, len);
|
|
|
|
|
|
|
|
BufFileClose(fd);
|
|
|
|
|
|
|
|
/* free the memory allocated for subxact info */
|
|
|
|
cleanup_subxact_info();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* subxact_info_read
|
|
|
|
* Restore information about subxacts of a streamed transaction.
|
|
|
|
*
|
|
|
|
* Read information about subxacts into the structure subxact_data that can be
|
|
|
|
* used later.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
subxact_info_read(Oid subid, TransactionId xid)
|
|
|
|
{
|
|
|
|
char path[MAXPGPATH];
|
|
|
|
Size len;
|
|
|
|
BufFile *fd;
|
|
|
|
MemoryContext oldctx;
|
|
|
|
|
|
|
|
Assert(!subxact_data.subxacts);
|
|
|
|
Assert(subxact_data.nsubxacts == 0);
|
|
|
|
Assert(subxact_data.nsubxacts_max == 0);
|
|
|
|
|
|
|
|
/*
|
2021-09-02 04:43:46 +02:00
|
|
|
* If the subxact file doesn't exist that means we don't have any subxact
|
|
|
|
* info.
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
*/
|
|
|
|
subxact_filename(path, subid, xid);
|
2021-09-02 04:43:46 +02:00
|
|
|
fd = BufFileOpenFileSet(MyLogicalRepWorker->stream_fileset, path, O_RDONLY,
|
|
|
|
true);
|
|
|
|
if (fd == NULL)
|
|
|
|
return;
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
|
|
|
/* read number of subxact items */
|
|
|
|
if (BufFileRead(fd, &subxact_data.nsubxacts,
|
|
|
|
sizeof(subxact_data.nsubxacts)) !=
|
|
|
|
sizeof(subxact_data.nsubxacts))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not read from streaming transaction's subxact file \"%s\": %m",
|
|
|
|
path)));
|
|
|
|
|
|
|
|
len = sizeof(SubXactInfo) * subxact_data.nsubxacts;
|
|
|
|
|
|
|
|
/* we keep the maximum as a power of 2 */
|
|
|
|
subxact_data.nsubxacts_max = 1 << my_log2(subxact_data.nsubxacts);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate subxact information in the logical streaming context. We need
|
|
|
|
* this information during the complete stream so that we can add the sub
|
|
|
|
* transaction info to this. On stream stop we will flush this information
|
|
|
|
* to the subxact file and reset the logical streaming context.
|
|
|
|
*/
|
|
|
|
oldctx = MemoryContextSwitchTo(LogicalStreamingContext);
|
|
|
|
subxact_data.subxacts = palloc(subxact_data.nsubxacts_max *
|
|
|
|
sizeof(SubXactInfo));
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
|
|
|
|
if ((len > 0) && ((BufFileRead(fd, subxact_data.subxacts, len)) != len))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode_for_file_access(),
|
|
|
|
errmsg("could not read from streaming transaction's subxact file \"%s\": %m",
|
|
|
|
path)));
|
|
|
|
|
|
|
|
BufFileClose(fd);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* subxact_info_add
|
|
|
|
* Add information about a subxact (offset in the main file).
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
subxact_info_add(TransactionId xid)
|
|
|
|
{
|
|
|
|
SubXactInfo *subxacts = subxact_data.subxacts;
|
|
|
|
int64 i;
|
|
|
|
|
|
|
|
/* We must have a valid top level stream xid and a stream fd. */
|
|
|
|
Assert(TransactionIdIsValid(stream_xid));
|
|
|
|
Assert(stream_fd != NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the XID matches the toplevel transaction, we don't want to add it.
|
|
|
|
*/
|
|
|
|
if (stream_xid == xid)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In most cases we're checking the same subxact as we've already seen in
|
|
|
|
* the last call, so make sure to ignore it (this change comes later).
|
|
|
|
*/
|
|
|
|
if (subxact_data.subxact_last == xid)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* OK, remember we're processing this XID. */
|
|
|
|
subxact_data.subxact_last = xid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if the transaction is already present in the array of subxact. We
|
|
|
|
* intentionally scan the array from the tail, because we're likely adding
|
|
|
|
* a change for the most recent subtransactions.
|
|
|
|
*
|
|
|
|
* XXX Can we rely on the subxact XIDs arriving in sorted order? That
|
|
|
|
* would allow us to use binary search here.
|
|
|
|
*/
|
|
|
|
for (i = subxact_data.nsubxacts; i > 0; i--)
|
|
|
|
{
|
|
|
|
/* found, so we're done */
|
|
|
|
if (subxacts[i - 1].xid == xid)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This is a new subxact, so we need to add it to the array. */
|
|
|
|
if (subxact_data.nsubxacts == 0)
|
|
|
|
{
|
|
|
|
MemoryContext oldctx;
|
|
|
|
|
|
|
|
subxact_data.nsubxacts_max = 128;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate this memory for subxacts in per-stream context, see
|
|
|
|
* subxact_info_read.
|
|
|
|
*/
|
|
|
|
oldctx = MemoryContextSwitchTo(LogicalStreamingContext);
|
|
|
|
subxacts = palloc(subxact_data.nsubxacts_max * sizeof(SubXactInfo));
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
}
|
|
|
|
else if (subxact_data.nsubxacts == subxact_data.nsubxacts_max)
|
|
|
|
{
|
|
|
|
subxact_data.nsubxacts_max *= 2;
|
|
|
|
subxacts = repalloc(subxacts,
|
|
|
|
subxact_data.nsubxacts_max * sizeof(SubXactInfo));
|
|
|
|
}
|
|
|
|
|
|
|
|
subxacts[subxact_data.nsubxacts].xid = xid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the current offset of the stream file and store it as offset of
|
|
|
|
* this subxact.
|
|
|
|
*/
|
|
|
|
BufFileTell(stream_fd,
|
|
|
|
&subxacts[subxact_data.nsubxacts].fileno,
|
|
|
|
&subxacts[subxact_data.nsubxacts].offset);
|
|
|
|
|
|
|
|
subxact_data.nsubxacts++;
|
|
|
|
subxact_data.subxacts = subxacts;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* format filename for file containing the info about subxacts */
|
2020-09-04 07:55:16 +02:00
|
|
|
static inline void
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
subxact_filename(char *path, Oid subid, TransactionId xid)
|
|
|
|
{
|
|
|
|
snprintf(path, MAXPGPATH, "%u-%u.subxacts", subid, xid);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* format filename for file containing serialized changes */
|
|
|
|
static inline void
|
|
|
|
changes_filename(char *path, Oid subid, TransactionId xid)
|
|
|
|
{
|
|
|
|
snprintf(path, MAXPGPATH, "%u-%u.changes", subid, xid);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* stream_cleanup_files
|
|
|
|
* Cleanup files for a subscription / toplevel transaction.
|
|
|
|
*
|
|
|
|
* Remove files with serialized changes and subxact info for a particular
|
2021-09-02 04:43:46 +02:00
|
|
|
* toplevel transaction. Each subscription has a separate set of files
|
|
|
|
* for any toplevel transaction.
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
stream_cleanup_files(Oid subid, TransactionId xid)
|
|
|
|
{
|
|
|
|
char path[MAXPGPATH];
|
|
|
|
|
2021-09-02 04:43:46 +02:00
|
|
|
/* Delete the changes file. */
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
changes_filename(path, subid, xid);
|
2021-09-02 04:43:46 +02:00
|
|
|
BufFileDeleteFileSet(MyLogicalRepWorker->stream_fileset, path, false);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
2021-09-02 04:43:46 +02:00
|
|
|
/* Delete the subxact file, if it exists. */
|
|
|
|
subxact_filename(path, subid, xid);
|
|
|
|
BufFileDeleteFileSet(MyLogicalRepWorker->stream_fileset, path, true);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* stream_open_file
|
|
|
|
* Open a file that we'll use to serialize changes for a toplevel
|
|
|
|
* transaction.
|
|
|
|
*
|
|
|
|
* Open a file for streamed changes from a toplevel transaction identified
|
|
|
|
* by stream_xid (global variable). If it's the first chunk of streamed
|
2021-09-02 04:43:46 +02:00
|
|
|
* changes for this transaction, create the buffile, otherwise open the
|
|
|
|
* previously created file.
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
*
|
|
|
|
* This can only be called at the beginning of a "streaming" block, i.e.
|
|
|
|
* between stream_start/stream_stop messages from the upstream.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
stream_open_file(Oid subid, TransactionId xid, bool first_segment)
|
|
|
|
{
|
|
|
|
char path[MAXPGPATH];
|
|
|
|
MemoryContext oldcxt;
|
|
|
|
|
|
|
|
Assert(in_streamed_transaction);
|
|
|
|
Assert(OidIsValid(subid));
|
|
|
|
Assert(TransactionIdIsValid(xid));
|
|
|
|
Assert(stream_fd == NULL);
|
|
|
|
|
2021-06-12 18:59:15 +02:00
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
changes_filename(path, subid, xid);
|
|
|
|
elog(DEBUG1, "opening file \"%s\" for streamed changes", path);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create/open the buffiles under the logical streaming context so that we
|
|
|
|
* have those files until stream stop.
|
|
|
|
*/
|
|
|
|
oldcxt = MemoryContextSwitchTo(LogicalStreamingContext);
|
|
|
|
|
|
|
|
/*
|
2021-09-02 04:43:46 +02:00
|
|
|
* If this is the first streamed segment, create the changes file.
|
|
|
|
* Otherwise, just open the file for writing, in append mode.
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
*/
|
|
|
|
if (first_segment)
|
2021-09-02 04:43:46 +02:00
|
|
|
stream_fd = BufFileCreateFileSet(MyLogicalRepWorker->stream_fileset,
|
|
|
|
path);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Open the file and seek to the end of the file because we always
|
|
|
|
* append the changes file.
|
|
|
|
*/
|
2021-09-02 04:43:46 +02:00
|
|
|
stream_fd = BufFileOpenFileSet(MyLogicalRepWorker->stream_fileset,
|
|
|
|
path, O_RDWR, false);
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
BufFileSeek(stream_fd, 0, 0, SEEK_END);
|
|
|
|
}
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcxt);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* stream_close_file
|
|
|
|
* Close the currently open file with streamed changes.
|
|
|
|
*
|
|
|
|
* This can only be called at the end of a streaming block, i.e. at stream_stop
|
|
|
|
* message from the upstream.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
stream_close_file(void)
|
|
|
|
{
|
|
|
|
Assert(in_streamed_transaction);
|
|
|
|
Assert(TransactionIdIsValid(stream_xid));
|
|
|
|
Assert(stream_fd != NULL);
|
|
|
|
|
|
|
|
BufFileClose(stream_fd);
|
|
|
|
|
|
|
|
stream_xid = InvalidTransactionId;
|
|
|
|
stream_fd = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* stream_write_change
|
|
|
|
* Serialize a change to a file for the current toplevel transaction.
|
|
|
|
*
|
|
|
|
* The change is serialized in a simple format, with length (not including
|
|
|
|
* the length), action code (identifying the message type) and message
|
|
|
|
* contents (without the subxact TransactionId value).
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
stream_write_change(char action, StringInfo s)
|
|
|
|
{
|
|
|
|
int len;
|
|
|
|
|
|
|
|
Assert(in_streamed_transaction);
|
|
|
|
Assert(TransactionIdIsValid(stream_xid));
|
|
|
|
Assert(stream_fd != NULL);
|
|
|
|
|
|
|
|
/* total on-disk size, including the action type character */
|
|
|
|
len = (s->len - s->cursor) + sizeof(char);
|
|
|
|
|
|
|
|
/* first write the size */
|
|
|
|
BufFileWrite(stream_fd, &len, sizeof(len));
|
|
|
|
|
|
|
|
/* then the action */
|
|
|
|
BufFileWrite(stream_fd, &action, sizeof(action));
|
|
|
|
|
|
|
|
/* and finally the remaining part of the buffer (after the XID) */
|
|
|
|
len = (s->len - s->cursor);
|
|
|
|
|
|
|
|
BufFileWrite(stream_fd, &s->data[s->cursor], len);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Cleanup the memory for subxacts and reset the related variables.
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
cleanup_subxact_info()
|
|
|
|
{
|
|
|
|
if (subxact_data.subxacts)
|
|
|
|
pfree(subxact_data.subxacts);
|
|
|
|
|
|
|
|
subxact_data.subxacts = NULL;
|
|
|
|
subxact_data.subxact_last = InvalidTransactionId;
|
|
|
|
subxact_data.nsubxacts = 0;
|
|
|
|
subxact_data.nsubxacts_max = 0;
|
|
|
|
}
|
|
|
|
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
/*
|
|
|
|
* Form the prepared transaction GID for two_phase transactions.
|
|
|
|
*
|
|
|
|
* Return the GID in the supplied buffer.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
TwoPhaseTransactionGid(Oid subid, TransactionId xid, char *gid, int szgid)
|
|
|
|
{
|
|
|
|
Assert(subid != InvalidRepOriginId);
|
|
|
|
|
|
|
|
if (!TransactionIdIsValid(xid))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROTOCOL_VIOLATION),
|
|
|
|
errmsg_internal("invalid two-phase transaction ID")));
|
|
|
|
|
|
|
|
snprintf(gid, szgid, "pg_gid_%u_%u", subid, xid);
|
|
|
|
}
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/* Logical Replication Apply worker entry point */
|
|
|
|
void
|
|
|
|
ApplyWorkerMain(Datum main_arg)
|
|
|
|
{
|
2017-04-18 17:12:27 +02:00
|
|
|
int worker_slot = DatumGetInt32(main_arg);
|
2021-11-30 04:24:30 +01:00
|
|
|
MemoryContext cctx = CurrentMemoryContext;
|
2017-01-19 18:00:00 +01:00
|
|
|
MemoryContext oldctx;
|
|
|
|
char originname[NAMEDATALEN];
|
|
|
|
XLogRecPtr origin_startpos;
|
2017-03-23 13:36:36 +01:00
|
|
|
char *myslotname;
|
2017-01-19 18:00:00 +01:00
|
|
|
WalRcvStreamOptions options;
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
int server_version;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
/* Attach to slot */
|
|
|
|
logicalrep_worker_attach(worker_slot);
|
|
|
|
|
|
|
|
/* Setup signal handling */
|
2019-12-17 19:14:28 +01:00
|
|
|
pqsignal(SIGHUP, SignalHandlerForConfigReload);
|
2017-06-02 20:46:00 +02:00
|
|
|
pqsignal(SIGTERM, die);
|
2017-01-19 18:00:00 +01:00
|
|
|
BackgroundWorkerUnblockSignals();
|
|
|
|
|
Use a ResourceOwner to track buffer pins in all cases.
Historically, we've allowed auxiliary processes to take buffer pins without
tracking them in a ResourceOwner. However, that creates problems for error
recovery. In particular, we've seen multiple reports of assertion crashes
in the startup process when it gets an error while holding a buffer pin,
as for example if it gets ENOSPC during a write. In a non-assert build,
the process would simply exit without releasing the pin at all. We've
gotten away with that so far just because a failure exit of the startup
process translates to a database crash anyhow; but any similar behavior
in other aux processes could result in stuck pins and subsequent problems
in vacuum.
To improve this, institute a policy that we must *always* have a resowner
backing any attempt to pin a buffer, which we can enforce just by removing
the previous special-case code in resowner.c. Add infrastructure to make
it easy to create a process-lifespan AuxProcessResourceOwner and clear
out its contents at appropriate times. Replace existing ad-hoc resowner
management in bgwriter.c and other aux processes with that. (Thus, while
the startup process gains a resowner where it had none at all before, some
other aux process types are replacing an ad-hoc resowner with this code.)
Also use the AuxProcessResourceOwner to manage buffer pins taken during
StartupXLOG and ShutdownXLOG, even when those are being run in a bootstrap
process or a standalone backend rather than a true auxiliary process.
In passing, remove some other ad-hoc resource owner creations that had
gotten cargo-culted into various other places. As far as I can tell
that was all unnecessary, and if it had been necessary it was incomplete,
due to lacking any provision for clearing those resowners later.
(Also worth noting in this connection is that a process that hasn't called
InitBufferPoolBackend has no business accessing buffers; so there's more
to do than just add the resowner if we want to touch buffers in processes
not covered by this patch.)
Although this fixes a very old bug, no back-patch, because there's no
evidence of any significant problem in non-assert builds.
Patch by me, pursuant to a report from Justin Pryzby. Thanks to
Robert Haas and Kyotaro Horiguchi for reviews.
Discussion: https://postgr.es/m/20180627233939.GA10276@telsasoft.com
2018-07-18 18:15:16 +02:00
|
|
|
/*
|
|
|
|
* We don't currently need any ResourceOwner in a walreceiver process, but
|
|
|
|
* if we did, we could call CreateAuxProcessResourceOwner here.
|
|
|
|
*/
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/* Initialise stats to a sanish value */
|
|
|
|
MyLogicalRepWorker->last_send_time = MyLogicalRepWorker->last_recv_time =
|
|
|
|
MyLogicalRepWorker->reply_time = GetCurrentTimestamp();
|
|
|
|
|
|
|
|
/* Load the libpq-specific functions */
|
|
|
|
load_file("libpqwalreceiver", false);
|
|
|
|
|
|
|
|
/* Run as replica session replication role. */
|
|
|
|
SetConfigOption("session_replication_role", "replica",
|
|
|
|
PGC_SUSET, PGC_S_OVERRIDE);
|
|
|
|
|
|
|
|
/* Connect to our database. */
|
|
|
|
BackgroundWorkerInitializeConnectionByOid(MyLogicalRepWorker->dbid,
|
2018-04-05 18:59:32 +02:00
|
|
|
MyLogicalRepWorker->userid,
|
|
|
|
0);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2020-08-10 18:22:54 +02:00
|
|
|
/*
|
|
|
|
* Set always-secure search path, so malicious users can't redirect user
|
|
|
|
* code (e.g. pg_index.indexprs).
|
|
|
|
*/
|
|
|
|
SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/* Load the subscription into persistent memory context. */
|
2017-05-09 20:40:42 +02:00
|
|
|
ApplyContext = AllocSetContextCreate(TopMemoryContext,
|
|
|
|
"ApplyContext",
|
2017-01-19 18:00:00 +01:00
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
StartTransactionCommand();
|
2017-05-09 20:40:42 +02:00
|
|
|
oldctx = MemoryContextSwitchTo(ApplyContext);
|
2018-04-06 15:07:09 +02:00
|
|
|
|
|
|
|
MySubscription = GetSubscription(MyLogicalRepWorker->subid, true);
|
|
|
|
if (!MySubscription)
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
|
|
|
(errmsg("logical replication apply worker for subscription %u will not "
|
|
|
|
"start because the subscription was removed during startup",
|
|
|
|
MyLogicalRepWorker->subid)));
|
|
|
|
proc_exit(0);
|
|
|
|
}
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
MySubscriptionValid = true;
|
|
|
|
MemoryContextSwitchTo(oldctx);
|
|
|
|
|
|
|
|
if (!MySubscription->enabled)
|
|
|
|
{
|
|
|
|
ereport(LOG,
|
2017-05-25 00:56:21 +02:00
|
|
|
(errmsg("logical replication apply worker for subscription \"%s\" will not "
|
2017-01-19 18:00:00 +01:00
|
|
|
"start because the subscription was disabled during startup",
|
|
|
|
MySubscription->name)));
|
|
|
|
|
|
|
|
proc_exit(0);
|
|
|
|
}
|
|
|
|
|
2018-04-06 15:07:09 +02:00
|
|
|
/* Setup synchronous commit according to the user's wishes */
|
|
|
|
SetConfigOption("synchronous_commit", MySubscription->synccommit,
|
|
|
|
PGC_BACKEND, PGC_S_OVERRIDE);
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/* Keep us informed about subscription changes. */
|
|
|
|
CacheRegisterSyscacheCallback(SUBSCRIPTIONOID,
|
|
|
|
subscription_change_cb,
|
|
|
|
(Datum) 0);
|
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
if (am_tablesync_worker())
|
2017-05-25 00:56:21 +02:00
|
|
|
ereport(LOG,
|
|
|
|
(errmsg("logical replication table synchronization worker for subscription \"%s\", table \"%s\" has started",
|
|
|
|
MySubscription->name, get_rel_name(MyLogicalRepWorker->relid))));
|
2017-03-23 13:36:36 +01:00
|
|
|
else
|
2017-05-25 00:56:21 +02:00
|
|
|
ereport(LOG,
|
|
|
|
(errmsg("logical replication apply worker for subscription \"%s\" has started",
|
|
|
|
MySubscription->name)));
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
CommitTransactionCommand();
|
|
|
|
|
|
|
|
/* Connect to the origin and start the replication. */
|
|
|
|
elog(DEBUG1, "connecting to publisher using connection string \"%s\"",
|
|
|
|
MySubscription->conninfo);
|
2017-03-23 13:36:36 +01:00
|
|
|
|
|
|
|
if (am_tablesync_worker())
|
|
|
|
{
|
|
|
|
char *syncslotname;
|
|
|
|
|
2021-11-30 04:24:30 +01:00
|
|
|
PG_TRY();
|
|
|
|
{
|
|
|
|
/* This is table synchronization worker, call initial sync. */
|
|
|
|
syncslotname = LogicalRepSyncTableStart(&origin_startpos);
|
|
|
|
}
|
|
|
|
PG_CATCH();
|
|
|
|
{
|
|
|
|
MemoryContext ecxt = MemoryContextSwitchTo(cctx);
|
|
|
|
ErrorData *errdata = CopyErrorData();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Report the table sync error. There is no corresponding message
|
|
|
|
* type for table synchronization.
|
|
|
|
*/
|
|
|
|
pgstat_report_subworker_error(MyLogicalRepWorker->subid,
|
|
|
|
MyLogicalRepWorker->relid,
|
|
|
|
MyLogicalRepWorker->relid,
|
|
|
|
0, /* message type */
|
|
|
|
InvalidTransactionId,
|
|
|
|
errdata->message);
|
|
|
|
MemoryContextSwitchTo(ecxt);
|
|
|
|
PG_RE_THROW();
|
|
|
|
}
|
|
|
|
PG_END_TRY();
|
2017-03-23 13:36:36 +01:00
|
|
|
|
2020-10-15 16:33:48 +02:00
|
|
|
/* allocate slot name in long-lived context */
|
|
|
|
myslotname = MemoryContextStrdup(ApplyContext, syncslotname);
|
2017-03-23 13:36:36 +01:00
|
|
|
|
|
|
|
pfree(syncslotname);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* This is main apply worker */
|
|
|
|
RepOriginId originid;
|
|
|
|
TimeLineID startpointTLI;
|
|
|
|
char *err;
|
|
|
|
|
|
|
|
myslotname = MySubscription->slotname;
|
|
|
|
|
2017-05-18 02:47:37 +02:00
|
|
|
/*
|
|
|
|
* This shouldn't happen if the subscription is enabled, but guard
|
|
|
|
* against DDL bugs or manual catalog changes. (libpqwalreceiver will
|
|
|
|
* crash if slot is NULL.)
|
|
|
|
*/
|
|
|
|
if (!myslotname)
|
|
|
|
ereport(ERROR,
|
2021-06-12 18:59:15 +02:00
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
|
|
errmsg("subscription has no replication slot set")));
|
2017-05-18 02:47:37 +02:00
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
/* Setup replication origin tracking. */
|
|
|
|
StartTransactionCommand();
|
|
|
|
snprintf(originname, sizeof(originname), "pg_%u", MySubscription->oid);
|
|
|
|
originid = replorigin_by_name(originname, true);
|
|
|
|
if (!OidIsValid(originid))
|
|
|
|
originid = replorigin_create(originname);
|
|
|
|
replorigin_session_setup(originid);
|
|
|
|
replorigin_session_origin = originid;
|
|
|
|
origin_startpos = replorigin_session_get_progress(false);
|
|
|
|
CommitTransactionCommand();
|
|
|
|
|
2021-05-13 01:13:54 +02:00
|
|
|
LogRepWorkerWalRcvConn = walrcv_connect(MySubscription->conninfo, true,
|
|
|
|
MySubscription->name, &err);
|
|
|
|
if (LogRepWorkerWalRcvConn == NULL)
|
2017-03-23 13:36:36 +01:00
|
|
|
ereport(ERROR,
|
2021-06-16 17:52:05 +02:00
|
|
|
(errcode(ERRCODE_CONNECTION_FAILURE),
|
|
|
|
errmsg("could not connect to the publisher: %s", err)));
|
2017-03-23 13:36:36 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't really use the output identify_system for anything but it
|
|
|
|
* does some initializations on the upstream so let's still call it.
|
|
|
|
*/
|
2021-05-13 01:13:54 +02:00
|
|
|
(void) walrcv_identify_system(LogRepWorkerWalRcvConn, &startpointTLI);
|
2017-03-23 13:36:36 +01:00
|
|
|
}
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
/*
|
2017-03-23 13:36:36 +01:00
|
|
|
* Setup callback for syscache so that we know when something changes in
|
|
|
|
* the subscription relation state.
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
2017-03-23 13:36:36 +01:00
|
|
|
CacheRegisterSyscacheCallback(SUBSCRIPTIONRELMAP,
|
|
|
|
invalidate_syncing_table_states,
|
|
|
|
(Datum) 0);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
/* Build logical replication streaming options. */
|
|
|
|
options.logical = true;
|
|
|
|
options.startpoint = origin_startpos;
|
2017-03-23 13:36:36 +01:00
|
|
|
options.slotname = myslotname;
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
|
|
|
|
server_version = walrcv_server_version(LogRepWorkerWalRcvConn);
|
2020-09-26 06:38:00 +02:00
|
|
|
options.proto.logical.proto_version =
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
server_version >= 150000 ? LOGICALREP_PROTO_TWOPHASE_VERSION_NUM :
|
|
|
|
server_version >= 140000 ? LOGICALREP_PROTO_STREAM_VERSION_NUM :
|
|
|
|
LOGICALREP_PROTO_VERSION_NUM;
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
options.proto.logical.publication_names = MySubscription->publications;
|
2020-07-18 18:44:51 +02:00
|
|
|
options.proto.logical.binary = MySubscription->binary;
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
options.proto.logical.streaming = MySubscription->stream;
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
options.proto.logical.twophase = false;
|
|
|
|
|
|
|
|
if (!am_tablesync_worker())
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Even when the two_phase mode is requested by the user, it remains
|
|
|
|
* as the tri-state PENDING until all tablesyncs have reached READY
|
|
|
|
* state. Only then, can it become ENABLED.
|
|
|
|
*
|
|
|
|
* Note: If the subscription has no tables then leave the state as
|
|
|
|
* PENDING, which allows ALTER SUBSCRIPTION ... REFRESH PUBLICATION to
|
|
|
|
* work.
|
|
|
|
*/
|
|
|
|
if (MySubscription->twophasestate == LOGICALREP_TWOPHASE_STATE_PENDING &&
|
|
|
|
AllTablesyncsReady())
|
|
|
|
{
|
|
|
|
/* Start streaming with two_phase enabled */
|
|
|
|
options.proto.logical.twophase = true;
|
|
|
|
walrcv_startstreaming(LogRepWorkerWalRcvConn, &options);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
StartTransactionCommand();
|
|
|
|
UpdateTwoPhaseState(MySubscription->oid, LOGICALREP_TWOPHASE_STATE_ENABLED);
|
|
|
|
MySubscription->twophasestate = LOGICALREP_TWOPHASE_STATE_ENABLED;
|
|
|
|
CommitTransactionCommand();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
walrcv_startstreaming(LogRepWorkerWalRcvConn, &options);
|
|
|
|
}
|
|
|
|
|
|
|
|
ereport(DEBUG1,
|
2022-01-25 01:40:04 +01:00
|
|
|
(errmsg("logical replication apply worker for subscription \"%s\" two_phase is %s",
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
MySubscription->name,
|
|
|
|
MySubscription->twophasestate == LOGICALREP_TWOPHASE_STATE_DISABLED ? "DISABLED" :
|
|
|
|
MySubscription->twophasestate == LOGICALREP_TWOPHASE_STATE_PENDING ? "PENDING" :
|
|
|
|
MySubscription->twophasestate == LOGICALREP_TWOPHASE_STATE_ENABLED ? "ENABLED" :
|
|
|
|
"?")));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Start normal logical streaming replication. */
|
|
|
|
walrcv_startstreaming(LogRepWorkerWalRcvConn, &options);
|
|
|
|
}
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
/* Run the main loop. */
|
2021-11-30 04:24:30 +01:00
|
|
|
PG_TRY();
|
|
|
|
{
|
|
|
|
LogicalRepApplyLoop(origin_startpos);
|
|
|
|
}
|
|
|
|
PG_CATCH();
|
|
|
|
{
|
|
|
|
/* report the apply error */
|
|
|
|
if (apply_error_callback_arg.command != 0)
|
|
|
|
{
|
|
|
|
MemoryContext ecxt = MemoryContextSwitchTo(cctx);
|
|
|
|
ErrorData *errdata = CopyErrorData();
|
|
|
|
|
|
|
|
pgstat_report_subworker_error(MyLogicalRepWorker->subid,
|
|
|
|
MyLogicalRepWorker->relid,
|
|
|
|
apply_error_callback_arg.rel != NULL
|
|
|
|
? apply_error_callback_arg.rel->localreloid
|
|
|
|
: InvalidOid,
|
|
|
|
apply_error_callback_arg.command,
|
|
|
|
apply_error_callback_arg.remote_xid,
|
|
|
|
errdata->message);
|
|
|
|
MemoryContextSwitchTo(ecxt);
|
|
|
|
}
|
|
|
|
|
|
|
|
PG_RE_THROW();
|
|
|
|
}
|
|
|
|
PG_END_TRY();
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
proc_exit(0);
|
|
|
|
}
|
2017-06-02 20:46:00 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Is current process a logical replication worker?
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
IsLogicalWorker(void)
|
|
|
|
{
|
|
|
|
return MyLogicalRepWorker != NULL;
|
|
|
|
}
|
2021-08-27 05:00:23 +02:00
|
|
|
|
|
|
|
/* Error callback to give more context info about the change being applied */
|
|
|
|
static void
|
|
|
|
apply_error_callback(void *arg)
|
|
|
|
{
|
|
|
|
StringInfoData buf;
|
|
|
|
ApplyErrorCallbackArg *errarg = &apply_error_callback_arg;
|
|
|
|
|
|
|
|
if (apply_error_callback_arg.command == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
initStringInfo(&buf);
|
|
|
|
appendStringInfo(&buf, _("processing remote data during \"%s\""),
|
|
|
|
logicalrep_message_type(errarg->command));
|
|
|
|
|
|
|
|
/* append relation information */
|
|
|
|
if (errarg->rel)
|
|
|
|
{
|
|
|
|
appendStringInfo(&buf, _(" for replication target relation \"%s.%s\""),
|
|
|
|
errarg->rel->remoterel.nspname,
|
|
|
|
errarg->rel->remoterel.relname);
|
|
|
|
if (errarg->remote_attnum >= 0)
|
|
|
|
appendStringInfo(&buf, _(" column \"%s\""),
|
|
|
|
errarg->rel->remoterel.attnames[errarg->remote_attnum]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* append transaction information */
|
|
|
|
if (TransactionIdIsNormal(errarg->remote_xid))
|
|
|
|
{
|
|
|
|
appendStringInfo(&buf, _(" in transaction %u"), errarg->remote_xid);
|
|
|
|
if (errarg->ts != 0)
|
|
|
|
appendStringInfo(&buf, _(" at %s"),
|
|
|
|
timestamptz_to_str(errarg->ts));
|
|
|
|
}
|
|
|
|
|
|
|
|
errcontext("%s", buf.data);
|
|
|
|
pfree(buf.data);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Set transaction information of apply error callback */
|
|
|
|
static inline void
|
|
|
|
set_apply_error_context_xact(TransactionId xid, TimestampTz ts)
|
|
|
|
{
|
|
|
|
apply_error_callback_arg.remote_xid = xid;
|
|
|
|
apply_error_callback_arg.ts = ts;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Reset all information of apply error callback */
|
|
|
|
static inline void
|
|
|
|
reset_apply_error_context_info(void)
|
|
|
|
{
|
|
|
|
apply_error_callback_arg.command = 0;
|
|
|
|
apply_error_callback_arg.rel = NULL;
|
|
|
|
apply_error_callback_arg.remote_attnum = -1;
|
|
|
|
set_apply_error_context_xact(InvalidTransactionId, 0);
|
|
|
|
}
|