2010-01-15 10:19:10 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* walreceiver.h
|
|
|
|
* Exports from replication/walreceiverfuncs.c.
|
|
|
|
*
|
2024-01-04 02:49:05 +01:00
|
|
|
* Portions Copyright (c) 2010-2024, PostgreSQL Global Development Group
|
2010-01-15 10:19:10 +01:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/replication/walreceiver.h
|
2010-01-15 10:19:10 +01:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef _WALRECEIVER_H
|
|
|
|
#define _WALRECEIVER_H
|
|
|
|
|
2022-08-13 23:53:28 +02:00
|
|
|
#include <netdb.h>
|
|
|
|
|
2011-09-04 02:46:19 +02:00
|
|
|
#include "access/xlog.h"
|
2010-01-20 10:16:24 +01:00
|
|
|
#include "access/xlogdefs.h"
|
2019-11-25 03:38:57 +01:00
|
|
|
#include "pgtime.h"
|
2020-04-08 13:45:09 +02:00
|
|
|
#include "port/atomics.h"
|
2017-03-23 13:36:36 +01:00
|
|
|
#include "replication/logicalproto.h"
|
|
|
|
#include "replication/walsender.h"
|
2021-03-12 07:07:27 +01:00
|
|
|
#include "storage/condition_variable.h"
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
#include "storage/latch.h"
|
2010-01-15 10:19:10 +01:00
|
|
|
#include "storage/spin.h"
|
2017-03-23 13:36:36 +01:00
|
|
|
#include "utils/tuplestore.h"
|
2010-01-15 10:19:10 +01:00
|
|
|
|
2012-10-11 16:39:52 +02:00
|
|
|
/* user-settable parameters */
|
2022-04-08 14:16:38 +02:00
|
|
|
extern PGDLLIMPORT int wal_receiver_status_interval;
|
|
|
|
extern PGDLLIMPORT int wal_receiver_timeout;
|
|
|
|
extern PGDLLIMPORT bool hot_standby_feedback;
|
2010-02-19 11:51:04 +01:00
|
|
|
|
2010-01-15 10:19:10 +01:00
|
|
|
/*
|
|
|
|
* MAXCONNINFO: maximum size of a connection string.
|
|
|
|
*
|
|
|
|
* XXX: Should this move to pg_config_manual.h?
|
|
|
|
*/
|
|
|
|
#define MAXCONNINFO 1024
|
|
|
|
|
2011-09-04 02:46:19 +02:00
|
|
|
/* Can we allow the standby to accept replication connection from another standby? */
|
|
|
|
#define AllowCascadeReplication() (EnableHotStandby && max_wal_senders > 0)
|
|
|
|
|
2010-01-15 10:19:10 +01:00
|
|
|
/*
|
|
|
|
* Values for WalRcv->walRcvState.
|
|
|
|
*/
|
|
|
|
typedef enum
|
|
|
|
{
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
WALRCV_STOPPED, /* stopped and mustn't start up again */
|
|
|
|
WALRCV_STARTING, /* launched, but the process hasn't
|
|
|
|
* initialized yet */
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
WALRCV_STREAMING, /* walreceiver is streaming */
|
|
|
|
WALRCV_WAITING, /* stopped streaming, waiting for orders */
|
|
|
|
WALRCV_RESTARTING, /* asked to restart streaming */
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
WALRCV_STOPPING, /* requested to stop, but still running */
|
2010-01-15 10:19:10 +01:00
|
|
|
} WalRcvState;
|
|
|
|
|
|
|
|
/* Shared memory area for management of walreceiver process */
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
/*
|
2010-07-03 22:43:58 +02:00
|
|
|
* PID of currently active walreceiver process, its current state and
|
|
|
|
* start time (actually, the time at which it was requested to be
|
|
|
|
* started).
|
2010-01-15 10:19:10 +01:00
|
|
|
*/
|
|
|
|
pid_t pid;
|
|
|
|
WalRcvState walRcvState;
|
2021-03-12 07:07:27 +01:00
|
|
|
ConditionVariable walRcvStoppedCV;
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
pg_time_t startTime;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
2011-03-01 19:46:57 +01:00
|
|
|
/*
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
* receiveStart and receiveStartTLI indicate the first byte position and
|
|
|
|
* timeline that will be received. When startup process starts the
|
|
|
|
* walreceiver, it sets these to the point where it wants the streaming to
|
|
|
|
* begin.
|
2011-03-01 19:46:57 +01:00
|
|
|
*/
|
|
|
|
XLogRecPtr receiveStart;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
TimeLineID receiveStartTLI;
|
2011-03-01 19:46:57 +01:00
|
|
|
|
2010-01-15 10:19:10 +01:00
|
|
|
/*
|
2020-04-08 13:45:09 +02:00
|
|
|
* flushedUpto-1 is the last byte position that has already been received,
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
* and receivedTLI is the timeline it came from. At the first startup of
|
|
|
|
* walreceiver, these are set to receiveStart and receiveStartTLI. After
|
|
|
|
* that, walreceiver updates these whenever it flushes the received WAL to
|
|
|
|
* disk.
|
2010-01-15 10:19:10 +01:00
|
|
|
*/
|
2020-04-08 13:45:09 +02:00
|
|
|
XLogRecPtr flushedUpto;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
TimeLineID receivedTLI;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
2010-07-03 22:43:58 +02:00
|
|
|
/*
|
|
|
|
* latestChunkStart is the starting byte position of the current "batch"
|
|
|
|
* of received WAL. It's actually the same as the previous value of
|
2020-04-08 13:45:09 +02:00
|
|
|
* flushedUpto before the last flush to disk. Startup process can use
|
2010-07-03 22:43:58 +02:00
|
|
|
* this to detect whether it's keeping up or not.
|
|
|
|
*/
|
|
|
|
XLogRecPtr latestChunkStart;
|
|
|
|
|
2011-12-31 14:30:26 +01:00
|
|
|
/*
|
|
|
|
* Time of send and receive of any message received.
|
|
|
|
*/
|
|
|
|
TimestampTz lastMsgSendTime;
|
|
|
|
TimestampTz lastMsgReceiptTime;
|
|
|
|
|
2012-08-09 18:03:59 +02:00
|
|
|
/*
|
|
|
|
* Latest reported end of WAL on the sender
|
|
|
|
*/
|
|
|
|
XLogRecPtr latestWalEnd;
|
|
|
|
TimestampTz latestWalEndTime;
|
|
|
|
|
2010-07-03 22:43:58 +02:00
|
|
|
/*
|
2016-06-29 22:57:17 +02:00
|
|
|
* connection string; initially set to connect to the primary, and later
|
|
|
|
* clobbered to hide security-sensitive fields.
|
2010-07-03 22:43:58 +02:00
|
|
|
*/
|
|
|
|
char conninfo[MAXCONNINFO];
|
|
|
|
|
2018-03-31 00:51:22 +02:00
|
|
|
/*
|
|
|
|
* Host name (this can be a host name, an IP address, or a directory path)
|
|
|
|
* and port number of the active replication connection.
|
|
|
|
*/
|
|
|
|
char sender_host[NI_MAXHOST];
|
|
|
|
int sender_port;
|
|
|
|
|
2014-02-01 04:45:17 +01:00
|
|
|
/*
|
|
|
|
* replication slot name; is also used for walreceiver to connect with the
|
|
|
|
* primary
|
|
|
|
*/
|
|
|
|
char slotname[NAMEDATALEN];
|
|
|
|
|
2020-01-14 14:07:11 +01:00
|
|
|
/*
|
|
|
|
* If it's a temporary replication slot, it needs to be recreated when
|
|
|
|
* connecting.
|
|
|
|
*/
|
|
|
|
bool is_temp_slot;
|
|
|
|
|
Fix locking in WAL receiver/sender shmem state structs
In WAL receiver and WAL server, some accesses to their corresponding
shared memory control structs were done without holding any kind of
lock, which could lead to inconsistent and possibly insecure results.
In walsender, fix by clarifying the locking rules and following them
correctly, as documented in the new comment in walsender_private.h;
namely that some members can be read in walsender itself without a lock,
because the only writes occur in the same process. The rest of the
struct requires spinlock for accesses, as usual.
In walreceiver, fix by always holding spinlock while accessing the
struct.
While there is potentially a problem in all branches, it is minor in
stable ones. This only became a real problem in pg10 because of quorum
commit in synchronous replication (commit 3901fd70cc7c), and a potential
security problem in walreceiver because a superuser() check was removed
by default monitoring roles (commit 25fff40798fc). Thus, no backpatch.
In passing, clean up some leftover braces which were used to create
unconditional blocks. Once upon a time these were used for
volatile-izing accesses to those shmem structs, which is no longer
required. Many other occurrences of this pattern remain.
Author: Michaël Paquier
Reported-by: Michaël Paquier
Reviewed-by: Masahiko Sawada, Kyotaro Horiguchi, Thomas Munro,
Robert Haas
Discussion: https://postgr.es/m/CAB7nPqTWYqtzD=LN_oDaf9r-hAjUEPAy0B9yRkhcsLdRN8fzrw@mail.gmail.com
2017-07-01 00:06:33 +02:00
|
|
|
/* set true once conninfo is ready to display (obfuscated pwds etc) */
|
|
|
|
bool ready_to_display;
|
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Latch used by startup process to wake up walreceiver after telling it
|
|
|
|
* where to start streaming (after setting receiveStart and
|
2016-03-30 03:16:12 +02:00
|
|
|
* receiveStartTLI), and also to tell it to send apply feedback to the
|
|
|
|
* primary whenever specially marked commit records are applied. This is
|
2016-11-30 18:00:00 +01:00
|
|
|
* normally mapped to procLatch when walreceiver is running.
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
Latch *latch;
|
2017-10-03 20:00:56 +02:00
|
|
|
|
|
|
|
slock_t mutex; /* locks shared variables shown above */
|
|
|
|
|
2020-04-08 13:45:09 +02:00
|
|
|
/*
|
|
|
|
* Like flushedUpto, but advanced after writing and before flushing,
|
|
|
|
* without the need to acquire the spin lock. Data can be read by another
|
|
|
|
* process up to this point, but shouldn't be used for data integrity
|
|
|
|
* purposes.
|
|
|
|
*/
|
|
|
|
pg_atomic_uint64 writtenUpto;
|
|
|
|
|
2017-10-03 20:00:56 +02:00
|
|
|
/*
|
|
|
|
* force walreceiver reply? This doesn't need to be locked; memory
|
|
|
|
* barriers for ordering are sufficient. But we do need atomic fetch and
|
|
|
|
* store semantics, so use sig_atomic_t.
|
|
|
|
*/
|
|
|
|
sig_atomic_t force_reply; /* used as a bool */
|
2010-01-15 10:19:10 +01:00
|
|
|
} WalRcvData;
|
|
|
|
|
2022-04-08 14:16:38 +02:00
|
|
|
extern PGDLLIMPORT WalRcvData *WalRcv;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
bool logical; /* True if this is logical replication stream,
|
|
|
|
* false if physical stream. */
|
|
|
|
char *slotname; /* Name of the replication slot or NULL. */
|
|
|
|
XLogRecPtr startpoint; /* LSN of starting point. */
|
|
|
|
|
|
|
|
union
|
|
|
|
{
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
TimeLineID startpointTLI; /* Starting timeline */
|
|
|
|
} physical;
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
uint32 proto_version; /* Logical protocol version */
|
|
|
|
List *publication_names; /* String list of publications */
|
2020-07-18 18:44:51 +02:00
|
|
|
bool binary; /* Ask publisher to use binary */
|
Perform apply of large transactions by parallel workers.
Currently, for large transactions, the publisher sends the data in
multiple streams (changes divided into chunks depending upon
logical_decoding_work_mem), and then on the subscriber-side, the apply
worker writes the changes into temporary files and once it receives the
commit, it reads from those files and applies the entire transaction. To
improve the performance of such transactions, we can instead allow them to
be applied via parallel workers.
In this approach, we assign a new parallel apply worker (if available) as
soon as the xact's first stream is received and the leader apply worker
will send changes to this new worker via shared memory. The parallel apply
worker will directly apply the change instead of writing it to temporary
files. However, if the leader apply worker times out while attempting to
send a message to the parallel apply worker, it will switch to
"partial serialize" mode - in this mode, the leader serializes all
remaining changes to a file and notifies the parallel apply workers to
read and apply them at the end of the transaction. We use a non-blocking
way to send the messages from the leader apply worker to the parallel
apply to avoid deadlocks. We keep this parallel apply assigned till the
transaction commit is received and also wait for the worker to finish at
commit. This preserves commit ordering and avoid writing to and reading
from files in most cases. We still need to spill if there is no worker
available.
This patch also extends the SUBSCRIPTION 'streaming' parameter so that the
user can control whether to apply the streaming transaction in a parallel
apply worker or spill the change to disk. The user can set the streaming
parameter to 'on/off', or 'parallel'. The parameter value 'parallel' means
the streaming will be applied via a parallel apply worker, if available.
The parameter value 'on' means the streaming transaction will be spilled
to disk. The default value is 'off' (same as current behaviour).
In addition, the patch extends the logical replication STREAM_ABORT
message so that abort_lsn and abort_time can also be sent which can be
used to update the replication origin in parallel apply worker when the
streaming transaction is aborted. Because this message extension is needed
to support parallel streaming, parallel streaming is not supported for
publications on servers < PG16.
Author: Hou Zhijie, Wang wei, Amit Kapila with design inputs from Sawada Masahiko
Reviewed-by: Sawada Masahiko, Peter Smith, Dilip Kumar, Shi yu, Kuroda Hayato, Shveta Mallik
Discussion: https://postgr.es/m/CAA4eK1+wyN6zpaHUkCLorEWNx75MG0xhMwcFhvjqm2KURZEAGw@mail.gmail.com
2023-01-09 02:30:39 +01:00
|
|
|
char *streaming_str; /* Streaming of large transactions */
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
bool twophase; /* Streaming of two-phase transactions at
|
|
|
|
* prepare time */
|
2022-07-21 05:17:38 +02:00
|
|
|
char *origin; /* Only publish data originating from the
|
|
|
|
* specified origin */
|
2017-01-19 18:00:00 +01:00
|
|
|
} logical;
|
|
|
|
} proto;
|
|
|
|
} WalRcvStreamOptions;
|
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
struct WalReceiverConn;
|
|
|
|
typedef struct WalReceiverConn WalReceiverConn;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
/*
|
|
|
|
* Status of walreceiver query execution.
|
|
|
|
*
|
|
|
|
* We only define statuses that are currently used.
|
|
|
|
*/
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
WALRCV_ERROR, /* There was error when executing the query. */
|
|
|
|
WALRCV_OK_COMMAND, /* Query executed utility or replication
|
|
|
|
* command. */
|
|
|
|
WALRCV_OK_TUPLES, /* Query returned tuples. */
|
|
|
|
WALRCV_OK_COPY_IN, /* Query started COPY FROM. */
|
|
|
|
WALRCV_OK_COPY_OUT, /* Query started COPY TO. */
|
2017-03-23 16:58:11 +01:00
|
|
|
WALRCV_OK_COPY_BOTH, /* Query started COPY BOTH replication
|
|
|
|
* protocol. */
|
2017-03-23 13:36:36 +01:00
|
|
|
} WalRcvExecStatus;
|
|
|
|
|
|
|
|
/*
|
2019-08-19 09:21:39 +02:00
|
|
|
* Return value for walrcv_exec, returns the status of the execution and
|
2017-03-23 13:36:36 +01:00
|
|
|
* tuples if any.
|
|
|
|
*/
|
|
|
|
typedef struct WalRcvExecResult
|
|
|
|
{
|
|
|
|
WalRcvExecStatus status;
|
Allow multiple xacts during table sync in logical replication.
For the initial table data synchronization in logical replication, we use
a single transaction to copy the entire table and then synchronize the
position in the stream with the main apply worker.
There are multiple downsides of this approach: (a) We have to perform the
entire copy operation again if there is any error (network breakdown,
error in the database operation, etc.) while we synchronize the WAL
position between tablesync worker and apply worker; this will be onerous
especially for large copies, (b) Using a single transaction in the
synchronization-phase (where we can receive WAL from multiple
transactions) will have the risk of exceeding the CID limit, (c) The slot
will hold the WAL till the entire sync is complete because we never commit
till the end.
This patch solves all the above downsides by allowing multiple
transactions during the tablesync phase. The initial copy is done in a
single transaction and after that, we commit each transaction as we
receive. To allow recovery after any error or crash, we use a permanent
slot and origin to track the progress. The slot and origin will be removed
once we finish the synchronization of the table. We also remove slot and
origin of tablesync workers if the user performs DROP SUBSCRIPTION .. or
ALTER SUBSCRIPTION .. REFERESH and some of the table syncs are still not
finished.
The commands ALTER SUBSCRIPTION ... REFRESH PUBLICATION and
ALTER SUBSCRIPTION ... SET PUBLICATION ... with refresh option as true
cannot be executed inside a transaction block because they can now drop
the slots for which we have no provision to rollback.
This will also open up the path for logical replication of 2PC
transactions on the subscriber side. Previously, we can't do that because
of the requirement of maintaining a single transaction in tablesync
workers.
Bump catalog version due to change of state in the catalog
(pg_subscription_rel).
Author: Peter Smith, Amit Kapila, and Takamichi Osumi
Reviewed-by: Ajin Cherian, Petr Jelinek, Hou Zhijie and Amit Kapila
Discussion: https://postgr.es/m/CAA4eK1KHJxaZS-fod-0fey=0tq3=Gkn4ho=8N4-5HWiCfu0H1A@mail.gmail.com
2021-02-12 03:11:51 +01:00
|
|
|
int sqlstate;
|
2017-03-23 13:36:36 +01:00
|
|
|
char *err;
|
|
|
|
Tuplestorestate *tuplestore;
|
|
|
|
TupleDesc tupledesc;
|
|
|
|
} WalRcvExecResult;
|
|
|
|
|
2020-07-02 06:57:03 +02:00
|
|
|
/* WAL receiver - libpqwalreceiver hooks */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_connect_fn
|
|
|
|
*
|
2024-02-05 06:15:34 +01:00
|
|
|
* Establish connection to a cluster. 'replication' is true if the
|
|
|
|
* connection is a replication connection, and false if it is a
|
|
|
|
* regular connection. If it is a replication connection, it could
|
|
|
|
* be either logical or physical based on input argument 'logical'.
|
2020-07-02 06:57:03 +02:00
|
|
|
* 'appname' is a name associated to the connection, to use for example
|
|
|
|
* with fallback_application_name or application_name. Returns the
|
|
|
|
* details about the connection established, as defined by
|
|
|
|
* WalReceiverConn for each WAL receiver module. On error, NULL is
|
|
|
|
* returned with 'err' including the error generated.
|
|
|
|
*/
|
|
|
|
typedef WalReceiverConn *(*walrcv_connect_fn) (const char *conninfo,
|
2024-02-05 06:15:34 +01:00
|
|
|
bool replication,
|
2020-07-02 06:57:03 +02:00
|
|
|
bool logical,
|
Add new predefined role pg_create_subscription.
This role can be granted to non-superusers to allow them to issue
CREATE SUBSCRIPTION. The non-superuser must additionally have CREATE
permissions on the database in which the subscription is to be
created.
Most forms of ALTER SUBSCRIPTION, including ALTER SUBSCRIPTION .. SKIP,
now require only that the role performing the operation own the
subscription, or inherit the privileges of the owner. However, to
use ALTER SUBSCRIPTION ... RENAME or ALTER SUBSCRIPTION ... OWNER TO,
you also need CREATE permission on the database. This is similar to
what we do for schemas. To change the owner of a schema, you must also
have permission to SET ROLE to the new owner, similar to what we do
for other object types.
Non-superusers are required to specify a password for authentication
and the remote side must use the password, similar to what is required
for postgres_fdw and dblink. A superuser who wants a non-superuser to
own a subscription that does not rely on password authentication may
set the new password_required=false property on that subscription. A
non-superuser may not set password_required=false and may not modify a
subscription that already has password_required=false.
This new password_required subscription property works much like the
eponymous postgres_fdw property. In both cases, the actual semantics
are that a password is not required if either (1) the property is set
to false or (2) the relevant user is the superuser.
Patch by me, reviewed by Andres Freund, Jeff Davis, Mark Dilger,
and Stephen Frost (but some of those people did not fully endorse
all of the decisions that the patch makes).
Discussion: http://postgr.es/m/CA+TgmoaDH=0Xj7OBiQnsHTKcF2c4L+=gzPBUKSJLh8zed2_+Dg@mail.gmail.com
2023-03-30 17:37:19 +02:00
|
|
|
bool must_use_password,
|
2017-01-19 18:00:00 +01:00
|
|
|
const char *appname,
|
|
|
|
char **err);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_check_conninfo_fn
|
|
|
|
*
|
|
|
|
* Parse and validate the connection string given as of 'conninfo'.
|
|
|
|
*/
|
Add new predefined role pg_create_subscription.
This role can be granted to non-superusers to allow them to issue
CREATE SUBSCRIPTION. The non-superuser must additionally have CREATE
permissions on the database in which the subscription is to be
created.
Most forms of ALTER SUBSCRIPTION, including ALTER SUBSCRIPTION .. SKIP,
now require only that the role performing the operation own the
subscription, or inherit the privileges of the owner. However, to
use ALTER SUBSCRIPTION ... RENAME or ALTER SUBSCRIPTION ... OWNER TO,
you also need CREATE permission on the database. This is similar to
what we do for schemas. To change the owner of a schema, you must also
have permission to SET ROLE to the new owner, similar to what we do
for other object types.
Non-superusers are required to specify a password for authentication
and the remote side must use the password, similar to what is required
for postgres_fdw and dblink. A superuser who wants a non-superuser to
own a subscription that does not rely on password authentication may
set the new password_required=false property on that subscription. A
non-superuser may not set password_required=false and may not modify a
subscription that already has password_required=false.
This new password_required subscription property works much like the
eponymous postgres_fdw property. In both cases, the actual semantics
are that a password is not required if either (1) the property is set
to false or (2) the relevant user is the superuser.
Patch by me, reviewed by Andres Freund, Jeff Davis, Mark Dilger,
and Stephen Frost (but some of those people did not fully endorse
all of the decisions that the patch makes).
Discussion: http://postgr.es/m/CA+TgmoaDH=0Xj7OBiQnsHTKcF2c4L+=gzPBUKSJLh8zed2_+Dg@mail.gmail.com
2023-03-30 17:37:19 +02:00
|
|
|
typedef void (*walrcv_check_conninfo_fn) (const char *conninfo,
|
|
|
|
bool must_use_password);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_get_conninfo_fn
|
|
|
|
*
|
|
|
|
* Returns a user-displayable conninfo string. Note that any
|
|
|
|
* security-sensitive fields should be obfuscated.
|
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
typedef char *(*walrcv_get_conninfo_fn) (WalReceiverConn *conn);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_get_senderinfo_fn
|
|
|
|
*
|
|
|
|
* Provide information of the WAL sender this WAL receiver is connected
|
|
|
|
* to, as of 'sender_host' for the host of the sender and 'sender_port'
|
|
|
|
* for its port.
|
|
|
|
*/
|
2018-03-31 00:51:22 +02:00
|
|
|
typedef void (*walrcv_get_senderinfo_fn) (WalReceiverConn *conn,
|
|
|
|
char **sender_host,
|
|
|
|
int *sender_port);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_identify_system_fn
|
|
|
|
*
|
|
|
|
* Run IDENTIFY_SYSTEM on the cluster connected to and validate the
|
|
|
|
* identity of the cluster. Returns the system ID of the cluster
|
|
|
|
* connected to. 'primary_tli' is the timeline ID of the sender.
|
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
typedef char *(*walrcv_identify_system_fn) (WalReceiverConn *conn,
|
2019-03-15 10:16:26 +01:00
|
|
|
TimeLineID *primary_tli);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
2024-02-05 06:15:34 +01:00
|
|
|
/*
|
|
|
|
* walrcv_get_dbname_from_conninfo_fn
|
|
|
|
*
|
|
|
|
* Returns the database name from the primary_conninfo
|
|
|
|
*/
|
|
|
|
typedef char *(*walrcv_get_dbname_from_conninfo_fn) (const char *conninfo);
|
|
|
|
|
2020-07-02 06:57:03 +02:00
|
|
|
/*
|
|
|
|
* walrcv_server_version_fn
|
|
|
|
*
|
|
|
|
* Returns the version number of the cluster connected to.
|
|
|
|
*/
|
2019-03-15 10:16:26 +01:00
|
|
|
typedef int (*walrcv_server_version_fn) (WalReceiverConn *conn);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_readtimelinehistoryfile_fn
|
|
|
|
*
|
|
|
|
* Fetch from cluster the timeline history file for timeline 'tli'.
|
|
|
|
* Returns the name of the timeline history file as of 'filename', its
|
|
|
|
* contents as of 'content' and its 'size'.
|
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
typedef void (*walrcv_readtimelinehistoryfile_fn) (WalReceiverConn *conn,
|
|
|
|
TimeLineID tli,
|
|
|
|
char **filename,
|
2020-07-02 06:57:03 +02:00
|
|
|
char **content,
|
|
|
|
int *size);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_startstreaming_fn
|
|
|
|
*
|
|
|
|
* Start streaming WAL data from given streaming options. Returns true
|
|
|
|
* if the connection has switched successfully to copy-both mode and false
|
|
|
|
* if the server received the command and executed it successfully, but
|
|
|
|
* didn't switch to copy-mode.
|
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
typedef bool (*walrcv_startstreaming_fn) (WalReceiverConn *conn,
|
2017-01-19 18:00:00 +01:00
|
|
|
const WalRcvStreamOptions *options);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_endstreaming_fn
|
|
|
|
*
|
|
|
|
* Stop streaming of WAL data. Returns the next timeline ID of the cluster
|
|
|
|
* connected to in 'next_tli', or 0 if there was no report.
|
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
typedef void (*walrcv_endstreaming_fn) (WalReceiverConn *conn,
|
|
|
|
TimeLineID *next_tli);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_receive_fn
|
|
|
|
*
|
|
|
|
* Receive a message available from the WAL stream. 'buffer' is a pointer
|
|
|
|
* to a buffer holding the message received. Returns the length of the data,
|
|
|
|
* 0 if no data is available yet ('wait_fd' is a socket descriptor which can
|
|
|
|
* be waited on before a retry), and -1 if the cluster ended the COPY.
|
|
|
|
*/
|
|
|
|
typedef int (*walrcv_receive_fn) (WalReceiverConn *conn,
|
|
|
|
char **buffer,
|
2016-11-30 18:00:00 +01:00
|
|
|
pgsocket *wait_fd);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_send_fn
|
|
|
|
*
|
|
|
|
* Send a message of size 'nbytes' to the WAL stream with 'buffer' as
|
|
|
|
* contents.
|
|
|
|
*/
|
|
|
|
typedef void (*walrcv_send_fn) (WalReceiverConn *conn,
|
|
|
|
const char *buffer,
|
2016-11-30 18:00:00 +01:00
|
|
|
int nbytes);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_create_slot_fn
|
|
|
|
*
|
|
|
|
* Create a new replication slot named 'slotname'. 'temporary' defines
|
|
|
|
* if the slot is temporary. 'snapshot_action' defines the behavior wanted
|
|
|
|
* for an exported snapshot (see replication protocol for more details).
|
|
|
|
* 'lsn' includes the LSN position at which the created slot became
|
|
|
|
* consistent. Returns the name of the exported snapshot for a logical
|
|
|
|
* slot, or NULL for a physical slot.
|
|
|
|
*/
|
2017-01-19 18:00:00 +01:00
|
|
|
typedef char *(*walrcv_create_slot_fn) (WalReceiverConn *conn,
|
2020-07-02 06:57:03 +02:00
|
|
|
const char *slotname,
|
|
|
|
bool temporary,
|
Add support for prepared transactions to built-in logical replication.
To add support for streaming transactions at prepare time into the
built-in logical replication, we need to do the following things:
* Modify the output plugin (pgoutput) to implement the new two-phase API
callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle two-phase
transactions by replaying them on prepare.
* Add a new SUBSCRIPTION option "two_phase" to allow users to enable
two-phase transactions. We enable the two_phase once the initial data sync
is over.
We however must explicitly disable replication of two-phase transactions
during replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover, we don't have a replication connection open so we don't know
where to send the data anyway.
The streaming option is not allowed with this new two_phase option. This
can be done as a separate patch.
We don't allow to toggle two_phase option of a subscription because it can
lead to an inconsistent replica. For the same reason, we don't allow to
refresh the publication once the two_phase is enabled for a subscription
unless copy_data option is false.
Author: Peter Smith, Ajin Cherian and Amit Kapila based on previous work by Nikhil Sontakke and Stas Kelvich
Reviewed-by: Amit Kapila, Sawada Masahiko, Vignesh C, Dilip Kumar, Takamichi Osumi, Greg Nancarrow
Tested-By: Haiying Tang
Discussion: https://postgr.es/m/02DA5F5E-CECE-4D9C-8B4B-418077E2C010@postgrespro.ru
Discussion: https://postgr.es/m/CAA4eK1+opiV4aFTmWWUF9h_32=HfPOW9vZASHarT0UA5oBrtGw@mail.gmail.com
2021-07-14 04:03:50 +02:00
|
|
|
bool two_phase,
|
2024-01-29 04:40:00 +01:00
|
|
|
bool failover,
|
2017-03-23 13:36:36 +01:00
|
|
|
CRSSnapshotAction snapshot_action,
|
|
|
|
XLogRecPtr *lsn);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
2024-01-29 04:40:00 +01:00
|
|
|
/*
|
|
|
|
* walrcv_alter_slot_fn
|
|
|
|
*
|
|
|
|
* Change the definition of a replication slot. Currently, it only supports
|
|
|
|
* changing the failover property of the slot.
|
|
|
|
*/
|
|
|
|
typedef void (*walrcv_alter_slot_fn) (WalReceiverConn *conn,
|
|
|
|
const char *slotname,
|
|
|
|
bool failover);
|
|
|
|
|
2020-07-02 06:57:03 +02:00
|
|
|
/*
|
|
|
|
* walrcv_get_backend_pid_fn
|
|
|
|
*
|
|
|
|
* Returns the PID of the remote backend process.
|
|
|
|
*/
|
2020-01-14 14:05:25 +01:00
|
|
|
typedef pid_t (*walrcv_get_backend_pid_fn) (WalReceiverConn *conn);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_exec_fn
|
|
|
|
*
|
|
|
|
* Send generic queries (and commands) to the remote cluster. 'nRetTypes'
|
|
|
|
* is the expected number of returned attributes, and 'retTypes' an array
|
|
|
|
* including their type OIDs. Returns the status of the execution and
|
|
|
|
* tuples if any.
|
|
|
|
*/
|
2017-03-23 13:36:36 +01:00
|
|
|
typedef WalRcvExecResult *(*walrcv_exec_fn) (WalReceiverConn *conn,
|
|
|
|
const char *query,
|
|
|
|
const int nRetTypes,
|
|
|
|
const Oid *retTypes);
|
2020-07-02 06:57:03 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* walrcv_disconnect_fn
|
|
|
|
*
|
|
|
|
* Disconnect with the cluster.
|
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
typedef void (*walrcv_disconnect_fn) (WalReceiverConn *conn);
|
|
|
|
|
|
|
|
typedef struct WalReceiverFunctionsType
|
|
|
|
{
|
2016-12-02 13:40:36 +01:00
|
|
|
walrcv_connect_fn walrcv_connect;
|
2017-01-19 18:00:00 +01:00
|
|
|
walrcv_check_conninfo_fn walrcv_check_conninfo;
|
2016-12-02 13:40:36 +01:00
|
|
|
walrcv_get_conninfo_fn walrcv_get_conninfo;
|
2018-03-31 00:51:22 +02:00
|
|
|
walrcv_get_senderinfo_fn walrcv_get_senderinfo;
|
2016-12-02 13:40:36 +01:00
|
|
|
walrcv_identify_system_fn walrcv_identify_system;
|
2024-02-05 06:15:34 +01:00
|
|
|
walrcv_get_dbname_from_conninfo_fn walrcv_get_dbname_from_conninfo;
|
2019-03-15 10:16:26 +01:00
|
|
|
walrcv_server_version_fn walrcv_server_version;
|
2016-12-02 13:40:36 +01:00
|
|
|
walrcv_readtimelinehistoryfile_fn walrcv_readtimelinehistoryfile;
|
|
|
|
walrcv_startstreaming_fn walrcv_startstreaming;
|
|
|
|
walrcv_endstreaming_fn walrcv_endstreaming;
|
|
|
|
walrcv_receive_fn walrcv_receive;
|
|
|
|
walrcv_send_fn walrcv_send;
|
2017-01-19 18:00:00 +01:00
|
|
|
walrcv_create_slot_fn walrcv_create_slot;
|
2024-01-29 04:40:00 +01:00
|
|
|
walrcv_alter_slot_fn walrcv_alter_slot;
|
2020-01-14 14:05:25 +01:00
|
|
|
walrcv_get_backend_pid_fn walrcv_get_backend_pid;
|
2017-03-23 13:36:36 +01:00
|
|
|
walrcv_exec_fn walrcv_exec;
|
2016-12-02 13:40:36 +01:00
|
|
|
walrcv_disconnect_fn walrcv_disconnect;
|
2016-11-30 18:00:00 +01:00
|
|
|
} WalReceiverFunctionsType;
|
|
|
|
|
|
|
|
extern PGDLLIMPORT WalReceiverFunctionsType *WalReceiverFunctions;
|
|
|
|
|
2024-02-05 06:15:34 +01:00
|
|
|
#define walrcv_connect(conninfo, replication, logical, must_use_password, appname, err) \
|
|
|
|
WalReceiverFunctions->walrcv_connect(conninfo, replication, logical, must_use_password, appname, err)
|
Add new predefined role pg_create_subscription.
This role can be granted to non-superusers to allow them to issue
CREATE SUBSCRIPTION. The non-superuser must additionally have CREATE
permissions on the database in which the subscription is to be
created.
Most forms of ALTER SUBSCRIPTION, including ALTER SUBSCRIPTION .. SKIP,
now require only that the role performing the operation own the
subscription, or inherit the privileges of the owner. However, to
use ALTER SUBSCRIPTION ... RENAME or ALTER SUBSCRIPTION ... OWNER TO,
you also need CREATE permission on the database. This is similar to
what we do for schemas. To change the owner of a schema, you must also
have permission to SET ROLE to the new owner, similar to what we do
for other object types.
Non-superusers are required to specify a password for authentication
and the remote side must use the password, similar to what is required
for postgres_fdw and dblink. A superuser who wants a non-superuser to
own a subscription that does not rely on password authentication may
set the new password_required=false property on that subscription. A
non-superuser may not set password_required=false and may not modify a
subscription that already has password_required=false.
This new password_required subscription property works much like the
eponymous postgres_fdw property. In both cases, the actual semantics
are that a password is not required if either (1) the property is set
to false or (2) the relevant user is the superuser.
Patch by me, reviewed by Andres Freund, Jeff Davis, Mark Dilger,
and Stephen Frost (but some of those people did not fully endorse
all of the decisions that the patch makes).
Discussion: http://postgr.es/m/CA+TgmoaDH=0Xj7OBiQnsHTKcF2c4L+=gzPBUKSJLh8zed2_+Dg@mail.gmail.com
2023-03-30 17:37:19 +02:00
|
|
|
#define walrcv_check_conninfo(conninfo, must_use_password) \
|
|
|
|
WalReceiverFunctions->walrcv_check_conninfo(conninfo, must_use_password)
|
2016-11-30 18:00:00 +01:00
|
|
|
#define walrcv_get_conninfo(conn) \
|
2016-12-02 13:40:36 +01:00
|
|
|
WalReceiverFunctions->walrcv_get_conninfo(conn)
|
2018-03-31 00:51:22 +02:00
|
|
|
#define walrcv_get_senderinfo(conn, sender_host, sender_port) \
|
|
|
|
WalReceiverFunctions->walrcv_get_senderinfo(conn, sender_host, sender_port)
|
2019-03-15 10:16:26 +01:00
|
|
|
#define walrcv_identify_system(conn, primary_tli) \
|
|
|
|
WalReceiverFunctions->walrcv_identify_system(conn, primary_tli)
|
2024-02-05 06:15:34 +01:00
|
|
|
#define walrcv_get_dbname_from_conninfo(conninfo) \
|
|
|
|
WalReceiverFunctions->walrcv_get_dbname_from_conninfo(conninfo)
|
2019-03-15 10:16:26 +01:00
|
|
|
#define walrcv_server_version(conn) \
|
|
|
|
WalReceiverFunctions->walrcv_server_version(conn)
|
2016-11-30 18:00:00 +01:00
|
|
|
#define walrcv_readtimelinehistoryfile(conn, tli, filename, content, size) \
|
2016-12-02 13:40:36 +01:00
|
|
|
WalReceiverFunctions->walrcv_readtimelinehistoryfile(conn, tli, filename, content, size)
|
2017-01-19 18:00:00 +01:00
|
|
|
#define walrcv_startstreaming(conn, options) \
|
|
|
|
WalReceiverFunctions->walrcv_startstreaming(conn, options)
|
2016-11-30 18:00:00 +01:00
|
|
|
#define walrcv_endstreaming(conn, next_tli) \
|
2016-12-02 13:40:36 +01:00
|
|
|
WalReceiverFunctions->walrcv_endstreaming(conn, next_tli)
|
2016-11-30 18:00:00 +01:00
|
|
|
#define walrcv_receive(conn, buffer, wait_fd) \
|
2016-12-02 13:40:36 +01:00
|
|
|
WalReceiverFunctions->walrcv_receive(conn, buffer, wait_fd)
|
2016-11-30 18:00:00 +01:00
|
|
|
#define walrcv_send(conn, buffer, nbytes) \
|
2016-12-02 13:40:36 +01:00
|
|
|
WalReceiverFunctions->walrcv_send(conn, buffer, nbytes)
|
2024-01-29 04:40:00 +01:00
|
|
|
#define walrcv_create_slot(conn, slotname, temporary, two_phase, failover, snapshot_action, lsn) \
|
|
|
|
WalReceiverFunctions->walrcv_create_slot(conn, slotname, temporary, two_phase, failover, snapshot_action, lsn)
|
|
|
|
#define walrcv_alter_slot(conn, slotname, failover) \
|
|
|
|
WalReceiverFunctions->walrcv_alter_slot(conn, slotname, failover)
|
2020-01-14 14:05:25 +01:00
|
|
|
#define walrcv_get_backend_pid(conn) \
|
|
|
|
WalReceiverFunctions->walrcv_get_backend_pid(conn)
|
2017-03-23 13:36:36 +01:00
|
|
|
#define walrcv_exec(conn, exec, nRetTypes, retTypes) \
|
|
|
|
WalReceiverFunctions->walrcv_exec(conn, exec, nRetTypes, retTypes)
|
2016-11-30 18:00:00 +01:00
|
|
|
#define walrcv_disconnect(conn) \
|
2016-12-02 13:40:36 +01:00
|
|
|
WalReceiverFunctions->walrcv_disconnect(conn)
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
static inline void
|
|
|
|
walrcv_clear_result(WalRcvExecResult *walres)
|
|
|
|
{
|
|
|
|
if (!walres)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (walres->err)
|
|
|
|
pfree(walres->err);
|
|
|
|
|
|
|
|
if (walres->tuplestore)
|
|
|
|
tuplestore_end(walres->tuplestore);
|
|
|
|
|
|
|
|
if (walres->tupledesc)
|
|
|
|
FreeTupleDesc(walres->tupledesc);
|
|
|
|
|
|
|
|
pfree(walres);
|
|
|
|
}
|
|
|
|
|
2010-09-13 12:14:25 +02:00
|
|
|
/* prototypes for functions in walreceiver.c */
|
2024-03-18 10:35:08 +01:00
|
|
|
extern void WalReceiverMain(char *startup_data, size_t startup_data_len) pg_attribute_noreturn();
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
extern void ProcessWalRcvInterrupts(void);
|
2022-08-04 10:36:21 +02:00
|
|
|
extern void WalRcvForceReply(void);
|
2010-09-13 12:14:25 +02:00
|
|
|
|
|
|
|
/* prototypes for functions in walreceiverfuncs.c */
|
2010-01-15 10:19:10 +01:00
|
|
|
extern Size WalRcvShmemSize(void);
|
|
|
|
extern void WalRcvShmemInit(void);
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
extern void ShutdownWalRcv(void);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
extern bool WalRcvStreaming(void);
|
|
|
|
extern bool WalRcvRunning(void);
|
2014-02-01 04:45:17 +01:00
|
|
|
extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr,
|
2020-03-27 20:04:52 +01:00
|
|
|
const char *conninfo, const char *slotname,
|
|
|
|
bool create_temp_slot);
|
2020-04-08 13:45:09 +02:00
|
|
|
extern XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI);
|
|
|
|
extern XLogRecPtr GetWalRcvWriteRecPtr(void);
|
2011-12-31 14:30:26 +01:00
|
|
|
extern int GetReplicationApplyDelay(void);
|
|
|
|
extern int GetReplicationTransferLatency(void);
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
#endif /* _WALRECEIVER_H */
|