2010-01-15 10:19:10 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* walreceiverfuncs.c
|
|
|
|
*
|
|
|
|
* This file contains functions used by the startup process to communicate
|
|
|
|
* with the walreceiver process. Functions implementing walreceiver itself
|
2010-01-20 10:16:24 +01:00
|
|
|
* are in walreceiver.c.
|
2010-01-15 10:19:10 +01:00
|
|
|
*
|
2011-01-01 19:18:15 +01:00
|
|
|
* Portions Copyright (c) 2010-2011, PostgreSQL Global Development Group
|
2010-01-15 10:19:10 +01:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/replication/walreceiverfuncs.c
|
2010-01-15 10:19:10 +01:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
#include <sys/time.h>
|
|
|
|
#include <time.h>
|
2010-01-15 10:19:10 +01:00
|
|
|
#include <unistd.h>
|
|
|
|
#include <signal.h>
|
|
|
|
|
|
|
|
#include "access/xlog_internal.h"
|
2011-11-02 15:25:01 +01:00
|
|
|
#include "postmaster/startup.h"
|
2010-01-15 10:19:10 +01:00
|
|
|
#include "replication/walreceiver.h"
|
|
|
|
#include "storage/pmsignal.h"
|
2011-09-04 07:13:16 +02:00
|
|
|
#include "storage/shmem.h"
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
WalRcvData *WalRcv = NULL;
|
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
/*
|
|
|
|
* How long to wait for walreceiver to start up after requesting
|
|
|
|
* postmaster to launch it. In seconds.
|
|
|
|
*/
|
|
|
|
#define WALRCV_STARTUP_TIMEOUT 10
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
/* Report shared memory space needed by WalRcvShmemInit */
|
|
|
|
Size
|
|
|
|
WalRcvShmemSize(void)
|
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
Size size = 0;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
size = add_size(size, sizeof(WalRcvData));
|
|
|
|
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate and initialize walreceiver-related shared memory */
|
|
|
|
void
|
|
|
|
WalRcvShmemInit(void)
|
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
bool found;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
WalRcv = (WalRcvData *)
|
|
|
|
ShmemInitStruct("Wal Receiver Ctl", WalRcvShmemSize(), &found);
|
|
|
|
|
2010-04-28 18:54:16 +02:00
|
|
|
if (!found)
|
|
|
|
{
|
|
|
|
/* First time through, so initialize */
|
|
|
|
MemSet(WalRcv, 0, WalRcvShmemSize());
|
|
|
|
WalRcv->walRcvState = WALRCV_STOPPED;
|
|
|
|
SpinLockInit(&WalRcv->mutex);
|
|
|
|
}
|
2010-01-15 10:19:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Is walreceiver in progress (or starting up)? */
|
|
|
|
bool
|
|
|
|
WalRcvInProgress(void)
|
|
|
|
{
|
|
|
|
/* use volatile pointer to prevent code rearrangement */
|
|
|
|
volatile WalRcvData *walrcv = WalRcv;
|
|
|
|
WalRcvState state;
|
2010-02-26 03:01:40 +01:00
|
|
|
pg_time_t startTime;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
state = walrcv->walRcvState;
|
|
|
|
startTime = walrcv->startTime;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
SpinLockRelease(&walrcv->mutex);
|
2010-01-15 10:19:10 +01:00
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* If it has taken too long for walreceiver to start up, give up. Setting
|
|
|
|
* the state to STOPPED ensures that if walreceiver later does start up
|
|
|
|
* after all, it will see that it's not supposed to be running and die
|
|
|
|
* without doing anything.
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
*/
|
|
|
|
if (state == WALRCV_STARTING)
|
2010-01-15 10:19:10 +01:00
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
pg_time_t now = (pg_time_t) time(NULL);
|
2010-01-15 10:19:10 +01:00
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
if ((now - startTime) > WALRCV_STARTUP_TIMEOUT)
|
2010-01-15 10:19:10 +01:00
|
|
|
{
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
2010-01-15 10:19:10 +01:00
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
if (walrcv->walRcvState == WALRCV_STARTING)
|
|
|
|
state = walrcv->walRcvState = WALRCV_STOPPED;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
SpinLockRelease(&walrcv->mutex);
|
|
|
|
}
|
2010-01-15 10:19:10 +01:00
|
|
|
}
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
|
|
|
|
if (state != WALRCV_STOPPED)
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
2010-01-15 10:19:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
* Stop walreceiver (if running) and wait for it to die.
|
2011-11-02 15:25:01 +01:00
|
|
|
* Executed by the Startup process.
|
2010-01-15 10:19:10 +01:00
|
|
|
*/
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
void
|
2010-01-15 10:19:10 +01:00
|
|
|
ShutdownWalRcv(void)
|
|
|
|
{
|
|
|
|
/* use volatile pointer to prevent code rearrangement */
|
|
|
|
volatile WalRcvData *walrcv = WalRcv;
|
2010-02-26 03:01:40 +01:00
|
|
|
pid_t walrcvpid = 0;
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Request walreceiver to stop. Walreceiver will switch to WALRCV_STOPPED
|
|
|
|
* mode once it's finished, and will also request postmaster to not
|
|
|
|
* restart itself.
|
|
|
|
*/
|
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
2010-02-26 03:01:40 +01:00
|
|
|
switch (walrcv->walRcvState)
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
{
|
|
|
|
case WALRCV_STOPPED:
|
|
|
|
break;
|
|
|
|
case WALRCV_STARTING:
|
|
|
|
walrcv->walRcvState = WALRCV_STOPPED;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case WALRCV_RUNNING:
|
|
|
|
walrcv->walRcvState = WALRCV_STOPPING;
|
|
|
|
/* fall through */
|
|
|
|
case WALRCV_STOPPING:
|
|
|
|
walrcvpid = walrcv->pid;
|
|
|
|
break;
|
|
|
|
}
|
2010-01-15 10:19:10 +01:00
|
|
|
SpinLockRelease(&walrcv->mutex);
|
|
|
|
|
|
|
|
/*
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
* Signal walreceiver process if it was still running.
|
2010-01-15 10:19:10 +01:00
|
|
|
*/
|
|
|
|
if (walrcvpid != 0)
|
|
|
|
kill(walrcvpid, SIGTERM);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wait for walreceiver to acknowledge its death by setting state to
|
|
|
|
* WALRCV_STOPPED.
|
|
|
|
*/
|
|
|
|
while (WalRcvInProgress())
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* This possibly-long loop needs to handle interrupts of startup
|
|
|
|
* process.
|
|
|
|
*/
|
|
|
|
HandleStartupProcInterrupts();
|
|
|
|
|
|
|
|
pg_usleep(100000); /* 100ms */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Request postmaster to start walreceiver.
|
|
|
|
*
|
|
|
|
* recptr indicates the position where streaming should begin, and conninfo
|
|
|
|
* is a libpq connection string to use.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo)
|
|
|
|
{
|
|
|
|
/* use volatile pointer to prevent code rearrangement */
|
|
|
|
volatile WalRcvData *walrcv = WalRcv;
|
2010-02-26 03:01:40 +01:00
|
|
|
pg_time_t now = (pg_time_t) time(NULL);
|
2010-01-15 10:19:10 +01:00
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* We always start at the beginning of the segment. That prevents a broken
|
|
|
|
* segment (i.e., with no records in the first half of a segment) from
|
|
|
|
* being created by XLOG streaming, which might cause trouble later on if
|
|
|
|
* the segment is e.g archived.
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
*/
|
|
|
|
if (recptr.xrecoff % XLogSegSize != 0)
|
|
|
|
recptr.xrecoff -= recptr.xrecoff % XLogSegSize;
|
|
|
|
|
2010-07-03 22:43:58 +02:00
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
|
|
|
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
/* It better be stopped before we try to restart it */
|
|
|
|
Assert(walrcv->walRcvState == WALRCV_STOPPED);
|
2010-01-15 10:19:10 +01:00
|
|
|
|
|
|
|
if (conninfo != NULL)
|
|
|
|
strlcpy((char *) walrcv->conninfo, conninfo, MAXCONNINFO);
|
|
|
|
else
|
|
|
|
walrcv->conninfo[0] = '\0';
|
Make standby server continuously retry restoring the next WAL segment with
restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.
This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.
To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.
This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
2010-01-27 16:27:51 +01:00
|
|
|
walrcv->walRcvState = WALRCV_STARTING;
|
|
|
|
walrcv->startTime = now;
|
|
|
|
|
2011-03-01 19:46:57 +01:00
|
|
|
/*
|
2011-04-10 17:42:00 +02:00
|
|
|
* If this is the first startup of walreceiver, we initialize receivedUpto
|
|
|
|
* and latestChunkStart to receiveStart.
|
2011-03-01 19:46:57 +01:00
|
|
|
*/
|
|
|
|
if (walrcv->receiveStart.xlogid == 0 &&
|
|
|
|
walrcv->receiveStart.xrecoff == 0)
|
|
|
|
{
|
|
|
|
walrcv->receivedUpto = recptr;
|
|
|
|
walrcv->latestChunkStart = recptr;
|
|
|
|
}
|
|
|
|
walrcv->receiveStart = recptr;
|
2010-07-03 22:43:58 +02:00
|
|
|
|
2010-01-15 10:19:10 +01:00
|
|
|
SpinLockRelease(&walrcv->mutex);
|
|
|
|
|
|
|
|
SendPostmasterSignal(PMSIGNAL_START_WALRECEIVER);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2010-07-03 22:43:58 +02:00
|
|
|
* Returns the last+1 byte position that walreceiver has written.
|
|
|
|
*
|
|
|
|
* Optionally, returns the previous chunk start, that is the first byte
|
2010-07-06 21:19:02 +02:00
|
|
|
* written in the most recent walreceiver flush cycle. Callers not
|
2010-07-03 22:43:58 +02:00
|
|
|
* interested in that value may pass NULL for latestChunkStart.
|
2010-01-15 10:19:10 +01:00
|
|
|
*/
|
|
|
|
XLogRecPtr
|
2010-07-03 22:43:58 +02:00
|
|
|
GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart)
|
2010-01-15 10:19:10 +01:00
|
|
|
{
|
|
|
|
/* use volatile pointer to prevent code rearrangement */
|
|
|
|
volatile WalRcvData *walrcv = WalRcv;
|
|
|
|
XLogRecPtr recptr;
|
|
|
|
|
|
|
|
SpinLockAcquire(&walrcv->mutex);
|
|
|
|
recptr = walrcv->receivedUpto;
|
2010-07-03 22:43:58 +02:00
|
|
|
if (latestChunkStart)
|
|
|
|
*latestChunkStart = walrcv->latestChunkStart;
|
2010-01-15 10:19:10 +01:00
|
|
|
SpinLockRelease(&walrcv->mutex);
|
|
|
|
|
|
|
|
return recptr;
|
|
|
|
}
|