2010-01-15 10:19:10 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* walreceiverfuncs.c
|
|
|
|
*
|
|
|
|
* This file contains functions used by the startup process to communicate
|
|
|
|
* with the walreceiver process. Functions implementing walreceiver itself
|
2010-01-20 10:16:24 +01:00
|
|
|
* are in walreceiver.c.
|
2010-01-15 10:19:10 +01:00
|
|
|
*
|
2017-01-03 19:48:53 +01:00
|
|
|
* Portions Copyright (c) 2010-2017, PostgreSQL Global Development Group
|
2010-01-15 10:19:10 +01:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/replication/walreceiverfuncs.c
|
2010-01-15 10:19:10 +01:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include <sys/stat.h>
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
#include <sys/time.h>
|
|
|
|
#include <time.h>
|
2010-01-15 10:19:10 +01:00
|
|
|
#include <unistd.h>
|
|
|
|
#include <signal.h>
|
|
|
|
|
|
|
|
#include "access/xlog_internal.h"
|
2011-11-02 15:25:01 +01:00
|
|
|
#include "postmaster/startup.h"
|
2010-01-15 10:19:10 +01:00
|
|
|
#include "replication/walreceiver.h"
|
|
|
|
#include "storage/pmsignal.h"
|
2011-09-04 07:13:16 +02:00
|
|
|
#include "storage/shmem.h"
|
2011-12-31 14:30:26 +01:00
|
|
|
#include "utils/timestamp.h"
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
WalRcvData *WalRcv = NULL;
|
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
/*
|
|
|
|
* How long to wait for walreceiver to start up after requesting
|
|
|
|
* postmaster to launch it. In seconds.
|
|
|
|
*/
|
|
|
|
#define WALRCV_STARTUP_TIMEOUT 10
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
/* Report shared memory space needed by WalRcvShmemInit */
|
|
|
|
Size
|
|
|
|
WalRcvShmemSize(void)
|
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
Size size = 0;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
size = add_size(size, sizeof(WalRcvData));
|
|
|
|
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate and initialize walreceiver-related shared memory */
|
|
|
|
void
|
|
|
|
WalRcvShmemInit(void)
|
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
bool found;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
WalRcv = (WalRcvData *)
|
|
|
|
ShmemInitStruct("Wal Receiver Ctl", WalRcvShmemSize(), &found);
|
|
|
|
|
2010-04-28 18:54:16 +02:00
|
|
|
if (!found)
|
|
|
|
{
|
|
|
|
/* First time through, so initialize */
|
|
|
|
MemSet(WalRcv, 0, WalRcvShmemSize());
|
|
|
|
WalRcv->walRcvState = WALRCV_STOPPED;
|
|
|
|
SpinLockInit(&WalRcv->mutex);
|
2016-11-30 18:00:00 +01:00
|
|
|
WalRcv->latch = NULL;
|
2010-04-28 18:54:16 +02:00
|
|
|
}
|
2010-01-15 10:19:10 +01:00
|
|
|
}
|
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
/* Is walreceiver running (or starting up)? */
|
2010-01-15 10:19:10 +01:00
|
|
|
bool
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
WalRcvRunning(void)
|
2010-01-15 10:19:10 +01:00
|
|
|
{
|
2015-10-06 21:45:02 +02:00
|
|
|
WalRcvData *walrcv = WalRcv;
|
2010-01-15 10:19:10 +01:00
|
|
|
WalRcvState state;
|
2010-02-26 03:01:40 +01:00
|
|
|
pg_time_t startTime;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
state = walrcv->walRcvState;
|
|
|
|
startTime = walrcv->startTime;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
SpinLockRelease(&walrcv->mutex);
|
2010-01-15 10:19:10 +01:00
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* If it has taken too long for walreceiver to start up, give up. Setting
|
|
|
|
* the state to STOPPED ensures that if walreceiver later does start up
|
|
|
|
* after all, it will see that it's not supposed to be running and die
|
|
|
|
* without doing anything.
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
*/
|
|
|
|
if (state == WALRCV_STARTING)
|
2010-01-15 10:19:10 +01:00
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
pg_time_t now = (pg_time_t) time(NULL);
|
2010-01-15 10:19:10 +01:00
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
if ((now - startTime) > WALRCV_STARTUP_TIMEOUT)
|
2010-01-15 10:19:10 +01:00
|
|
|
{
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
2010-01-15 10:19:10 +01:00
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
if (walrcv->walRcvState == WALRCV_STARTING)
|
|
|
|
state = walrcv->walRcvState = WALRCV_STOPPED;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
SpinLockRelease(&walrcv->mutex);
|
|
|
|
}
|
2010-01-15 10:19:10 +01:00
|
|
|
}
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
|
|
|
|
if (state != WALRCV_STOPPED)
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
2010-01-15 10:19:10 +01:00
|
|
|
}
|
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Is walreceiver running and streaming (or at least attempting to connect,
|
|
|
|
* or starting up)?
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
WalRcvStreaming(void)
|
|
|
|
{
|
2015-10-06 21:45:02 +02:00
|
|
|
WalRcvData *walrcv = WalRcv;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
WalRcvState state;
|
|
|
|
pg_time_t startTime;
|
|
|
|
|
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
|
|
|
|
|
|
|
state = walrcv->walRcvState;
|
|
|
|
startTime = walrcv->startTime;
|
|
|
|
|
|
|
|
SpinLockRelease(&walrcv->mutex);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If it has taken too long for walreceiver to start up, give up. Setting
|
|
|
|
* the state to STOPPED ensures that if walreceiver later does start up
|
|
|
|
* after all, it will see that it's not supposed to be running and die
|
|
|
|
* without doing anything.
|
|
|
|
*/
|
|
|
|
if (state == WALRCV_STARTING)
|
|
|
|
{
|
|
|
|
pg_time_t now = (pg_time_t) time(NULL);
|
|
|
|
|
|
|
|
if ((now - startTime) > WALRCV_STARTUP_TIMEOUT)
|
|
|
|
{
|
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
|
|
|
|
|
|
|
if (walrcv->walRcvState == WALRCV_STARTING)
|
|
|
|
state = walrcv->walRcvState = WALRCV_STOPPED;
|
|
|
|
|
|
|
|
SpinLockRelease(&walrcv->mutex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (state == WALRCV_STREAMING || state == WALRCV_STARTING ||
|
|
|
|
state == WALRCV_RESTARTING)
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2010-01-15 10:19:10 +01:00
|
|
|
/*
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
* Stop walreceiver (if running) and wait for it to die.
|
2011-11-02 15:25:01 +01:00
|
|
|
* Executed by the Startup process.
|
2010-01-15 10:19:10 +01:00
|
|
|
*/
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
void
|
2010-01-15 10:19:10 +01:00
|
|
|
ShutdownWalRcv(void)
|
|
|
|
{
|
2015-10-06 21:45:02 +02:00
|
|
|
WalRcvData *walrcv = WalRcv;
|
2010-02-26 03:01:40 +01:00
|
|
|
pid_t walrcvpid = 0;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Request walreceiver to stop. Walreceiver will switch to WALRCV_STOPPED
|
|
|
|
* mode once it's finished, and will also request postmaster to not
|
|
|
|
* restart itself.
|
|
|
|
*/
|
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
2010-02-26 03:01:40 +01:00
|
|
|
switch (walrcv->walRcvState)
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
{
|
|
|
|
case WALRCV_STOPPED:
|
|
|
|
break;
|
|
|
|
case WALRCV_STARTING:
|
|
|
|
walrcv->walRcvState = WALRCV_STOPPED;
|
|
|
|
break;
|
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
case WALRCV_STREAMING:
|
|
|
|
case WALRCV_WAITING:
|
|
|
|
case WALRCV_RESTARTING:
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
walrcv->walRcvState = WALRCV_STOPPING;
|
|
|
|
/* fall through */
|
|
|
|
case WALRCV_STOPPING:
|
|
|
|
walrcvpid = walrcv->pid;
|
|
|
|
break;
|
|
|
|
}
|
2010-01-15 10:19:10 +01:00
|
|
|
SpinLockRelease(&walrcv->mutex);
|
|
|
|
|
|
|
|
/*
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
* Signal walreceiver process if it was still running.
|
2010-01-15 10:19:10 +01:00
|
|
|
*/
|
|
|
|
if (walrcvpid != 0)
|
|
|
|
kill(walrcvpid, SIGTERM);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wait for walreceiver to acknowledge its death by setting state to
|
|
|
|
* WALRCV_STOPPED.
|
|
|
|
*/
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
while (WalRcvRunning())
|
2010-01-15 10:19:10 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* This possibly-long loop needs to handle interrupts of startup
|
|
|
|
* process.
|
|
|
|
*/
|
|
|
|
HandleStartupProcInterrupts();
|
|
|
|
|
|
|
|
pg_usleep(100000); /* 100ms */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Request postmaster to start walreceiver.
|
|
|
|
*
|
2014-02-01 04:45:17 +01:00
|
|
|
* recptr indicates the position where streaming should begin, conninfo
|
|
|
|
* is a libpq connection string to use, and slotname is, optionally, the name
|
|
|
|
* of a replication slot to acquire.
|
2010-01-15 10:19:10 +01:00
|
|
|
*/
|
|
|
|
void
|
2014-02-01 04:45:17 +01:00
|
|
|
RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr, const char *conninfo,
|
|
|
|
const char *slotname)
|
2010-01-15 10:19:10 +01:00
|
|
|
{
|
2015-10-06 21:45:02 +02:00
|
|
|
WalRcvData *walrcv = WalRcv;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
bool launch = false;
|
2010-02-26 03:01:40 +01:00
|
|
|
pg_time_t now = (pg_time_t) time(NULL);
|
2017-10-03 20:00:56 +02:00
|
|
|
Latch *latch;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* We always start at the beginning of the segment. That prevents a broken
|
|
|
|
* segment (i.e., with no records in the first half of a segment) from
|
|
|
|
* being created by XLOG streaming, which might cause trouble later on if
|
|
|
|
* the segment is e.g archived.
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
*/
|
Make WAL segment size configurable at initdb time.
For performance reasons a larger segment size than the default 16MB
can be useful. A larger segment size has two main benefits: Firstly,
in setups using archiving, it makes it easier to write scripts that
can keep up with higher amounts of WAL, secondly, the WAL has to be
written and synced to disk less frequently.
But at the same time large segment size are disadvantageous for
smaller databases. So far the segment size had to be configured at
compile time, often making it unrealistic to choose one fitting to a
particularly load. Therefore change it to a initdb time setting.
This includes a breaking changes to the xlogreader.h API, which now
requires the current segment size to be configured. For that and
similar reasons a number of binaries had to be taught how to recognize
the current segment size.
Author: Beena Emerson, editorialized by Andres Freund
Reviewed-By: Andres Freund, David Steele, Kuntal Ghosh, Michael
Paquier, Peter Eisentraut, Robert Hass, Tushar Ahuja
Discussion: https://postgr.es/m/CAOG9ApEAcQ--1ieKbhFzXSQPw_YLmepaa4hNdnY5+ZULpt81Mw@mail.gmail.com
2017-09-20 07:03:48 +02:00
|
|
|
if (XLogSegmentOffset(recptr, wal_segment_size) != 0)
|
|
|
|
recptr -= XLogSegmentOffset(recptr, wal_segment_size);
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
|
2010-07-03 22:43:58 +02:00
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
/* It better be stopped if we try to restart it */
|
|
|
|
Assert(walrcv->walRcvState == WALRCV_STOPPED ||
|
|
|
|
walrcv->walRcvState == WALRCV_WAITING);
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
if (conninfo != NULL)
|
|
|
|
strlcpy((char *) walrcv->conninfo, conninfo, MAXCONNINFO);
|
|
|
|
else
|
|
|
|
walrcv->conninfo[0] = '\0';
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
|
2014-02-01 04:45:17 +01:00
|
|
|
if (slotname != NULL)
|
|
|
|
strlcpy((char *) walrcv->slotname, slotname, NAMEDATALEN);
|
|
|
|
else
|
|
|
|
walrcv->slotname[0] = '\0';
|
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
if (walrcv->walRcvState == WALRCV_STOPPED)
|
|
|
|
{
|
|
|
|
launch = true;
|
|
|
|
walrcv->walRcvState = WALRCV_STARTING;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
walrcv->walRcvState = WALRCV_RESTARTING;
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
walrcv->startTime = now;
|
|
|
|
|
2011-03-01 19:46:57 +01:00
|
|
|
/*
|
Fix walsender failure at promotion.
If a standby server has a cascading standby server connected to it, it's
possible that WAL has already been sent up to the next WAL page boundary,
splitting a WAL record in the middle, when the first standby server is
promoted. Don't throw an assertion failure or error in walsender if that
happens.
Also, fix a variant of the same bug in pg_receivexlog: if it had already
received WAL on previous timeline up to a segment boundary, when the
upstream standby server is promoted so that the timeline switch record falls
on the previous segment, pg_receivexlog would miss the segment containing
the timeline switch. To fix that, have walsender send the position of the
timeline switch at end-of-streaming, in addition to the next timeline's ID.
It was previously assumed that the switch happened exactly where the
streaming stopped.
Note: this is an incompatible change in the streaming protocol. You might
get an error if you try to stream over timeline switches, if the client is
running 9.3beta1 and the server is more recent. It should be fine after a
reconnect, however.
Reported by Fujii Masao.
2013-05-08 19:10:17 +02:00
|
|
|
* If this is the first startup of walreceiver (on this timeline),
|
|
|
|
* initialize receivedUpto and latestChunkStart to the starting point.
|
2011-03-01 19:46:57 +01:00
|
|
|
*/
|
Fix walsender failure at promotion.
If a standby server has a cascading standby server connected to it, it's
possible that WAL has already been sent up to the next WAL page boundary,
splitting a WAL record in the middle, when the first standby server is
promoted. Don't throw an assertion failure or error in walsender if that
happens.
Also, fix a variant of the same bug in pg_receivexlog: if it had already
received WAL on previous timeline up to a segment boundary, when the
upstream standby server is promoted so that the timeline switch record falls
on the previous segment, pg_receivexlog would miss the segment containing
the timeline switch. To fix that, have walsender send the position of the
timeline switch at end-of-streaming, in addition to the next timeline's ID.
It was previously assumed that the switch happened exactly where the
streaming stopped.
Note: this is an incompatible change in the streaming protocol. You might
get an error if you try to stream over timeline switches, if the client is
running 9.3beta1 and the server is more recent. It should be fine after a
reconnect, however.
Reported by Fujii Masao.
2013-05-08 19:10:17 +02:00
|
|
|
if (walrcv->receiveStart == 0 || walrcv->receivedTLI != tli)
|
2011-03-01 19:46:57 +01:00
|
|
|
{
|
|
|
|
walrcv->receivedUpto = recptr;
|
Fix walsender failure at promotion.
If a standby server has a cascading standby server connected to it, it's
possible that WAL has already been sent up to the next WAL page boundary,
splitting a WAL record in the middle, when the first standby server is
promoted. Don't throw an assertion failure or error in walsender if that
happens.
Also, fix a variant of the same bug in pg_receivexlog: if it had already
received WAL on previous timeline up to a segment boundary, when the
upstream standby server is promoted so that the timeline switch record falls
on the previous segment, pg_receivexlog would miss the segment containing
the timeline switch. To fix that, have walsender send the position of the
timeline switch at end-of-streaming, in addition to the next timeline's ID.
It was previously assumed that the switch happened exactly where the
streaming stopped.
Note: this is an incompatible change in the streaming protocol. You might
get an error if you try to stream over timeline switches, if the client is
running 9.3beta1 and the server is more recent. It should be fine after a
reconnect, however.
Reported by Fujii Masao.
2013-05-08 19:10:17 +02:00
|
|
|
walrcv->receivedTLI = tli;
|
2011-03-01 19:46:57 +01:00
|
|
|
walrcv->latestChunkStart = recptr;
|
|
|
|
}
|
|
|
|
walrcv->receiveStart = recptr;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
walrcv->receiveStartTLI = tli;
|
2010-07-03 22:43:58 +02:00
|
|
|
|
2017-10-03 20:00:56 +02:00
|
|
|
latch = walrcv->latch;
|
|
|
|
|
2010-01-15 10:19:10 +01:00
|
|
|
SpinLockRelease(&walrcv->mutex);
|
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
if (launch)
|
|
|
|
SendPostmasterSignal(PMSIGNAL_START_WALRECEIVER);
|
2017-10-03 20:00:56 +02:00
|
|
|
else if (latch)
|
|
|
|
SetLatch(latch);
|
2010-01-15 10:19:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2010-07-03 22:43:58 +02:00
|
|
|
* Returns the last+1 byte position that walreceiver has written.
|
|
|
|
*
|
|
|
|
* Optionally, returns the previous chunk start, that is the first byte
|
2014-05-06 18:12:18 +02:00
|
|
|
* written in the most recent walreceiver flush cycle. Callers not
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
* interested in that value may pass NULL for latestChunkStart. Same for
|
|
|
|
* receiveTLI.
|
2010-01-15 10:19:10 +01:00
|
|
|
*/
|
|
|
|
XLogRecPtr
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
|
2010-01-15 10:19:10 +01:00
|
|
|
{
|
2015-10-06 21:45:02 +02:00
|
|
|
WalRcvData *walrcv = WalRcv;
|
2010-01-15 10:19:10 +01:00
|
|
|
XLogRecPtr recptr;
|
|
|
|
|
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
|
|
|
recptr = walrcv->receivedUpto;
|
2010-07-03 22:43:58 +02:00
|
|
|
if (latestChunkStart)
|
|
|
|
*latestChunkStart = walrcv->latestChunkStart;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
if (receiveTLI)
|
|
|
|
*receiveTLI = walrcv->receivedTLI;
|
2010-01-15 10:19:10 +01:00
|
|
|
SpinLockRelease(&walrcv->mutex);
|
|
|
|
|
|
|
|
return recptr;
|
|
|
|
}
|
2011-12-31 14:30:26 +01:00
|
|
|
|
|
|
|
/*
|
2015-03-14 00:16:50 +01:00
|
|
|
* Returns the replication apply delay in ms or -1
|
|
|
|
* if the apply delay info is not available
|
2011-12-31 14:30:26 +01:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
GetReplicationApplyDelay(void)
|
|
|
|
{
|
2015-10-06 21:45:02 +02:00
|
|
|
WalRcvData *walrcv = WalRcv;
|
2011-12-31 14:30:26 +01:00
|
|
|
XLogRecPtr receivePtr;
|
|
|
|
XLogRecPtr replayPtr;
|
|
|
|
|
2012-06-10 21:20:04 +02:00
|
|
|
long secs;
|
|
|
|
int usecs;
|
2011-12-31 14:30:26 +01:00
|
|
|
|
2017-03-14 17:57:10 +01:00
|
|
|
TimestampTz chunkReplayStartTime;
|
2015-03-14 00:16:50 +01:00
|
|
|
|
2011-12-31 14:30:26 +01:00
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
|
|
|
receivePtr = walrcv->receivedUpto;
|
|
|
|
SpinLockRelease(&walrcv->mutex);
|
|
|
|
|
Follow TLI of last replayed record, not recovery target TLI, in walsenders.
Most of the time, the last replayed record comes from the recovery target
timeline, but there is a corner case where it makes a difference. When
the startup process scans for a new timeline, and decides to change recovery
target timeline, there is a window where the recovery target TLI has already
been bumped, but there are no WAL segments from the new timeline in pg_xlog
yet. For example, if we have just replayed up to point 0/30002D8, on
timeline 1, there is a WAL file called 000000010000000000000003 in pg_xlog
that contains the WAL up to that point. When recovery switches recovery
target timeline to 2, a walsender can immediately try to read WAL from
0/30002D8, from timeline 2, so it will try to open WAL file
000000020000000000000003. However, that doesn't exist yet - the startup
process hasn't copied that file from the archive yet nor has the walreceiver
streamed it yet, so walsender fails with error "requested WAL segment
000000020000000000000003 has already been removed". That's harmless, in that
the standby will try to reconnect later and by that time the segment is
already created, but error messages that should be ignored are not good.
To fix that, have walsender track the TLI of the last replayed record,
instead of the recovery target timeline. That way walsender will not try to
read anything from timeline 2, until the WAL segment has been created and at
least one record has been replayed from it. The recovery target timeline is
now xlog.c's internal affair, it doesn't need to be exposed in shared memory
anymore.
This fixes the error reported by Thom Brown. depesz the same error message,
but I'm not sure if this fixes his scenario.
2012-12-20 13:23:31 +01:00
|
|
|
replayPtr = GetXLogReplayRecPtr(NULL);
|
2011-12-31 14:30:26 +01:00
|
|
|
|
2012-12-28 17:06:15 +01:00
|
|
|
if (receivePtr == replayPtr)
|
2011-12-31 14:30:26 +01:00
|
|
|
return 0;
|
|
|
|
|
2017-03-14 17:57:10 +01:00
|
|
|
chunkReplayStartTime = GetCurrentChunkReplayStartTime();
|
2015-03-14 00:16:50 +01:00
|
|
|
|
2017-03-14 17:57:10 +01:00
|
|
|
if (chunkReplayStartTime == 0)
|
2015-03-14 00:16:50 +01:00
|
|
|
return -1;
|
|
|
|
|
2017-03-14 17:57:10 +01:00
|
|
|
TimestampDifference(chunkReplayStartTime,
|
2011-12-31 14:30:26 +01:00
|
|
|
GetCurrentTimestamp(),
|
|
|
|
&secs, &usecs);
|
|
|
|
|
|
|
|
return (((int) secs * 1000) + (usecs / 1000));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns the network latency in ms, note that this includes any
|
|
|
|
* difference in clock settings between the servers, as well as timezone.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
GetReplicationTransferLatency(void)
|
|
|
|
{
|
2015-10-06 21:45:02 +02:00
|
|
|
WalRcvData *walrcv = WalRcv;
|
2011-12-31 14:30:26 +01:00
|
|
|
|
|
|
|
TimestampTz lastMsgSendTime;
|
|
|
|
TimestampTz lastMsgReceiptTime;
|
|
|
|
|
2012-06-10 21:20:04 +02:00
|
|
|
long secs = 0;
|
|
|
|
int usecs = 0;
|
|
|
|
int ms;
|
2011-12-31 14:30:26 +01:00
|
|
|
|
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
|
|
|
lastMsgSendTime = walrcv->lastMsgSendTime;
|
|
|
|
lastMsgReceiptTime = walrcv->lastMsgReceiptTime;
|
|
|
|
SpinLockRelease(&walrcv->mutex);
|
|
|
|
|
|
|
|
TimestampDifference(lastMsgSendTime,
|
|
|
|
lastMsgReceiptTime,
|
|
|
|
&secs, &usecs);
|
|
|
|
|
|
|
|
ms = ((int) secs * 1000) + (usecs / 1000);
|
|
|
|
|
|
|
|
return ms;
|
|
|
|
}
|