2010-01-20 10:16:24 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* libpqwalreceiver.c
|
|
|
|
*
|
|
|
|
* This file contains the libpq-specific parts of walreceiver. It's
|
|
|
|
* loaded as a dynamic module to avoid linking the main server binary with
|
|
|
|
* libpq.
|
|
|
|
*
|
2016-01-02 19:33:40 +01:00
|
|
|
* Portions Copyright (c) 2010-2016, PostgreSQL Global Development Group
|
2010-01-20 10:16:24 +01:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
|
2010-01-20 10:16:24 +01:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/time.h>
|
|
|
|
|
|
|
|
#include "libpq-fe.h"
|
2016-06-29 22:57:17 +02:00
|
|
|
#include "pqexpbuffer.h"
|
2010-01-20 10:16:24 +01:00
|
|
|
#include "access/xlog.h"
|
|
|
|
#include "miscadmin.h"
|
2016-11-30 18:00:00 +01:00
|
|
|
#include "pgstat.h"
|
2010-01-20 10:16:24 +01:00
|
|
|
#include "replication/walreceiver.h"
|
2016-11-30 18:00:00 +01:00
|
|
|
#include "storage/proc.h"
|
2010-01-20 10:16:24 +01:00
|
|
|
#include "utils/builtins.h"
|
|
|
|
|
|
|
|
PG_MODULE_MAGIC;
|
|
|
|
|
|
|
|
void _PG_init(void);
|
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
struct WalReceiverConn
|
|
|
|
{
|
|
|
|
/* Current connection to the primary, if any */
|
|
|
|
PGconn *streamConn;
|
|
|
|
/* Used to remember if the connection is logical or physical */
|
|
|
|
bool logical;
|
|
|
|
/* Buffer for currently read records */
|
|
|
|
char *recvBuf;
|
|
|
|
};
|
2010-01-20 10:16:24 +01:00
|
|
|
|
|
|
|
/* Prototypes for interface functions */
|
2016-11-30 18:00:00 +01:00
|
|
|
static WalReceiverConn *libpqrcv_connect(const char *conninfo,
|
|
|
|
bool logical, const char *appname);
|
|
|
|
static char *libpqrcv_get_conninfo(WalReceiverConn *conn);
|
|
|
|
static char *libpqrcv_identify_system(WalReceiverConn *conn,
|
|
|
|
TimeLineID *primary_tli);
|
|
|
|
static void libpqrcv_readtimelinehistoryfile(WalReceiverConn *conn,
|
|
|
|
TimeLineID tli, char **filename,
|
|
|
|
char **content, int *len);
|
|
|
|
static bool libpqrcv_startstreaming(WalReceiverConn *conn,
|
|
|
|
TimeLineID tli, XLogRecPtr startpoint,
|
|
|
|
const char *slotname);
|
|
|
|
static void libpqrcv_endstreaming(WalReceiverConn *conn,
|
|
|
|
TimeLineID *next_tli);
|
|
|
|
static int libpqrcv_receive(WalReceiverConn *conn, char **buffer,
|
|
|
|
pgsocket *wait_fd);
|
|
|
|
static void libpqrcv_send(WalReceiverConn *conn, const char *buffer,
|
|
|
|
int nbytes);
|
|
|
|
static void libpqrcv_disconnect(WalReceiverConn *conn);
|
|
|
|
|
|
|
|
static WalReceiverFunctionsType PQWalReceiverFunctions = {
|
|
|
|
libpqrcv_connect,
|
|
|
|
libpqrcv_get_conninfo,
|
|
|
|
libpqrcv_identify_system,
|
|
|
|
libpqrcv_readtimelinehistoryfile,
|
|
|
|
libpqrcv_startstreaming,
|
|
|
|
libpqrcv_endstreaming,
|
|
|
|
libpqrcv_receive,
|
|
|
|
libpqrcv_send,
|
|
|
|
libpqrcv_disconnect
|
|
|
|
};
|
2010-01-20 10:16:24 +01:00
|
|
|
|
|
|
|
/* Prototypes for private functions */
|
2016-11-30 18:00:00 +01:00
|
|
|
static PGresult *libpqrcv_PQexec(PGconn *streamConn, const char *query);
|
2010-01-20 10:16:24 +01:00
|
|
|
|
|
|
|
/*
|
2016-11-30 18:00:00 +01:00
|
|
|
* Module initialization function
|
2010-01-20 10:16:24 +01:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
_PG_init(void)
|
|
|
|
{
|
2016-11-30 18:00:00 +01:00
|
|
|
if (WalReceiverFunctions != NULL)
|
2010-01-20 10:16:24 +01:00
|
|
|
elog(ERROR, "libpqwalreceiver already loaded");
|
2016-11-30 18:00:00 +01:00
|
|
|
WalReceiverFunctions = &PQWalReceiverFunctions;
|
2010-01-20 10:16:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Establish the connection to the primary server for XLOG streaming
|
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
static WalReceiverConn *
|
|
|
|
libpqrcv_connect(const char *conninfo, bool logical, const char *appname)
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
2016-11-30 18:00:00 +01:00
|
|
|
WalReceiverConn *conn;
|
2015-05-24 03:35:49 +02:00
|
|
|
const char *keys[5];
|
|
|
|
const char *vals[5];
|
2016-11-30 18:00:00 +01:00
|
|
|
int i = 0;
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2010-06-11 12:13:09 +02:00
|
|
|
/*
|
2015-05-24 03:35:49 +02:00
|
|
|
* We use the expand_dbname parameter to process the connection string (or
|
|
|
|
* URI), and pass some extra options. The deliberately undocumented
|
|
|
|
* parameter "replication=true" makes it a replication connection. The
|
|
|
|
* database name is ignored by the server in replication mode, but specify
|
|
|
|
* "replication" for .pgpass lookup.
|
2010-06-11 12:13:09 +02:00
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
keys[i] = "dbname";
|
|
|
|
vals[i] = conninfo;
|
|
|
|
keys[++i] = "replication";
|
|
|
|
vals[i] = logical ? "database" : "true";
|
|
|
|
if (!logical)
|
|
|
|
{
|
|
|
|
keys[++i] = "dbname";
|
|
|
|
vals[i] = "replication";
|
|
|
|
}
|
|
|
|
keys[++i] = "fallback_application_name";
|
|
|
|
vals[i] = appname;
|
|
|
|
keys[++i] = NULL;
|
|
|
|
vals[i] = NULL;
|
|
|
|
|
|
|
|
conn = palloc0(sizeof(WalReceiverConn));
|
|
|
|
conn->streamConn = PQconnectdbParams(keys, vals, /* expand_dbname = */ true);
|
|
|
|
if (PQstatus(conn->streamConn) != CONNECTION_OK)
|
2010-01-20 10:16:24 +01:00
|
|
|
ereport(ERROR,
|
2010-04-21 05:32:53 +02:00
|
|
|
(errmsg("could not connect to the primary server: %s",
|
2016-11-30 18:00:00 +01:00
|
|
|
PQerrorMessage(conn->streamConn))));
|
|
|
|
conn->logical = logical;
|
|
|
|
|
|
|
|
return conn;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
2016-06-29 22:57:17 +02:00
|
|
|
/*
|
|
|
|
* Return a user-displayable conninfo string. Any security-sensitive fields
|
|
|
|
* are obfuscated.
|
|
|
|
*/
|
|
|
|
static char *
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_get_conninfo(WalReceiverConn *conn)
|
2016-06-29 22:57:17 +02:00
|
|
|
{
|
|
|
|
PQconninfoOption *conn_opts;
|
|
|
|
PQconninfoOption *conn_opt;
|
2016-08-15 19:42:51 +02:00
|
|
|
PQExpBufferData buf;
|
2016-06-29 22:57:17 +02:00
|
|
|
char *retval;
|
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
Assert(conn->streamConn != NULL);
|
2016-06-29 22:57:17 +02:00
|
|
|
|
|
|
|
initPQExpBuffer(&buf);
|
2016-11-30 18:00:00 +01:00
|
|
|
conn_opts = PQconninfo(conn->streamConn);
|
2016-06-29 22:57:17 +02:00
|
|
|
|
|
|
|
if (conn_opts == NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("could not parse connection string: %s",
|
|
|
|
_("out of memory"))));
|
|
|
|
|
|
|
|
/* build a clean connection string from pieces */
|
|
|
|
for (conn_opt = conn_opts; conn_opt->keyword != NULL; conn_opt++)
|
|
|
|
{
|
2016-08-15 19:42:51 +02:00
|
|
|
bool obfuscate;
|
2016-06-29 22:57:17 +02:00
|
|
|
|
|
|
|
/* Skip debug and empty options */
|
|
|
|
if (strchr(conn_opt->dispchar, 'D') ||
|
|
|
|
conn_opt->val == NULL ||
|
|
|
|
conn_opt->val[0] == '\0')
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Obfuscate security-sensitive options */
|
|
|
|
obfuscate = strchr(conn_opt->dispchar, '*') != NULL;
|
|
|
|
|
|
|
|
appendPQExpBuffer(&buf, "%s%s=%s",
|
|
|
|
buf.len == 0 ? "" : " ",
|
|
|
|
conn_opt->keyword,
|
|
|
|
obfuscate ? "********" : conn_opt->val);
|
|
|
|
}
|
|
|
|
|
|
|
|
PQconninfoFree(conn_opts);
|
|
|
|
|
|
|
|
retval = PQExpBufferDataBroken(buf) ? NULL : pstrdup(buf.data);
|
|
|
|
termPQExpBuffer(&buf);
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Check that primary's system identifier matches ours, and fetch the current
|
|
|
|
* timeline ID of the primary.
|
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
static char *
|
|
|
|
libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli)
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
{
|
|
|
|
PGresult *res;
|
|
|
|
char *primary_sysid;
|
2010-01-20 10:16:24 +01:00
|
|
|
|
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* Get the system identifier and timeline ID as a DataRow message from the
|
|
|
|
* primary server.
|
2010-01-20 10:16:24 +01:00
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
res = libpqrcv_PQexec(conn->streamConn, "IDENTIFY_SYSTEM");
|
2010-01-20 10:16:24 +01:00
|
|
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
2010-02-26 03:01:40 +01:00
|
|
|
{
|
2010-01-20 10:16:24 +01:00
|
|
|
PQclear(res);
|
|
|
|
ereport(ERROR,
|
2010-03-21 01:17:59 +01:00
|
|
|
(errmsg("could not receive database system identifier and timeline ID from "
|
2010-01-20 10:16:24 +01:00
|
|
|
"the primary server: %s",
|
2016-11-30 18:00:00 +01:00
|
|
|
PQerrorMessage(conn->streamConn))));
|
2010-02-26 03:01:40 +01:00
|
|
|
}
|
2014-08-19 11:30:38 +02:00
|
|
|
if (PQnfields(res) < 3 || PQntuples(res) != 1)
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
int ntuples = PQntuples(res);
|
|
|
|
int nfields = PQnfields(res);
|
|
|
|
|
2010-01-20 10:16:24 +01:00
|
|
|
PQclear(res);
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("invalid response from primary server"),
|
2014-05-15 13:49:11 +02:00
|
|
|
errdetail("Could not identify system: got %d rows and %d fields, expected %d rows and %d or more fields.",
|
2014-08-19 11:30:38 +02:00
|
|
|
ntuples, nfields, 3, 1)));
|
2010-01-20 10:16:24 +01:00
|
|
|
}
|
2016-11-30 18:00:00 +01:00
|
|
|
primary_sysid = pstrdup(PQgetvalue(res, 0, 0));
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
*primary_tli = pg_atoi(PQgetvalue(res, 0, 1), 4, 0);
|
2010-01-20 10:16:24 +01:00
|
|
|
PQclear(res);
|
2016-11-30 18:00:00 +01:00
|
|
|
|
|
|
|
return primary_sysid;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Start streaming WAL data from given startpoint and timeline.
|
|
|
|
*
|
|
|
|
* Returns true if we switched successfully to copy-both mode. False
|
|
|
|
* means the server received the command and executed it successfully, but
|
|
|
|
* didn't switch to copy-mode. That means that there was no WAL on the
|
|
|
|
* requested timeline and starting point, because the server switched to
|
|
|
|
* another timeline at or before the requested starting point. On failure,
|
|
|
|
* throws an ERROR.
|
|
|
|
*/
|
|
|
|
static bool
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_startstreaming(WalReceiverConn *conn,
|
|
|
|
TimeLineID tli, XLogRecPtr startpoint,
|
|
|
|
const char *slotname)
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
{
|
2016-11-30 18:00:00 +01:00
|
|
|
StringInfoData cmd;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
PGresult *res;
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
Assert(!conn->logical);
|
|
|
|
|
|
|
|
initStringInfo(&cmd);
|
|
|
|
|
2010-01-20 10:16:24 +01:00
|
|
|
/* Start streaming from the point requested by startup process */
|
2014-02-01 04:45:17 +01:00
|
|
|
if (slotname != NULL)
|
2016-11-30 18:00:00 +01:00
|
|
|
appendStringInfo(&cmd,
|
|
|
|
"START_REPLICATION SLOT \"%s\" %X/%X TIMELINE %u",
|
|
|
|
slotname,
|
|
|
|
(uint32) (startpoint >> 32), (uint32) startpoint,
|
|
|
|
tli);
|
2014-02-01 04:45:17 +01:00
|
|
|
else
|
2016-11-30 18:00:00 +01:00
|
|
|
appendStringInfo(&cmd, "START_REPLICATION %X/%X TIMELINE %u",
|
|
|
|
(uint32) (startpoint >> 32), (uint32) startpoint,
|
|
|
|
tli);
|
|
|
|
res = libpqrcv_PQexec(conn->streamConn, cmd.data);
|
|
|
|
pfree(cmd.data);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
|
|
|
|
if (PQresultStatus(res) == PGRES_COMMAND_OK)
|
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else if (PQresultStatus(res) != PGRES_COPY_BOTH)
|
2010-04-19 16:10:45 +02:00
|
|
|
{
|
|
|
|
PQclear(res);
|
2010-01-20 10:16:24 +01:00
|
|
|
ereport(ERROR,
|
2010-03-21 01:17:59 +01:00
|
|
|
(errmsg("could not start WAL streaming: %s",
|
2016-11-30 18:00:00 +01:00
|
|
|
PQerrorMessage(conn->streamConn))));
|
2010-04-19 16:10:45 +02:00
|
|
|
}
|
2010-01-20 10:16:24 +01:00
|
|
|
PQclear(res);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2013-01-18 10:48:29 +01:00
|
|
|
* Stop streaming WAL data. Returns the next timeline's ID in *next_tli, as
|
|
|
|
* reported by the server, or 0 if it did not report it.
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
*/
|
|
|
|
static void
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_endstreaming(WalReceiverConn *conn, TimeLineID *next_tli)
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
{
|
|
|
|
PGresult *res;
|
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
if (PQputCopyEnd(conn->streamConn, NULL) <= 0 ||
|
|
|
|
PQflush(conn->streamConn))
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
ereport(ERROR,
|
2013-05-29 22:58:43 +02:00
|
|
|
(errmsg("could not send end-of-streaming message to primary: %s",
|
2016-11-30 18:00:00 +01:00
|
|
|
PQerrorMessage(conn->streamConn))));
|
|
|
|
|
|
|
|
*next_tli = 0;
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2013-01-18 10:48:29 +01:00
|
|
|
/*
|
|
|
|
* After COPY is finished, we should receive a result set indicating the
|
2013-05-29 22:58:43 +02:00
|
|
|
* next timeline's ID, or just CommandComplete if the server was shut
|
|
|
|
* down.
|
2013-01-18 10:48:29 +01:00
|
|
|
*
|
|
|
|
* If we had not yet received CopyDone from the backend, PGRES_COPY_IN
|
|
|
|
* would also be possible. However, at the moment this function is only
|
|
|
|
* called after receiving CopyDone from the backend - the walreceiver
|
|
|
|
* never terminates replication on its own initiative.
|
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
res = PQgetResult(conn->streamConn);
|
2013-01-18 10:48:29 +01:00
|
|
|
if (PQresultStatus(res) == PGRES_TUPLES_OK)
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
{
|
Fix walsender failure at promotion.
If a standby server has a cascading standby server connected to it, it's
possible that WAL has already been sent up to the next WAL page boundary,
splitting a WAL record in the middle, when the first standby server is
promoted. Don't throw an assertion failure or error in walsender if that
happens.
Also, fix a variant of the same bug in pg_receivexlog: if it had already
received WAL on previous timeline up to a segment boundary, when the
upstream standby server is promoted so that the timeline switch record falls
on the previous segment, pg_receivexlog would miss the segment containing
the timeline switch. To fix that, have walsender send the position of the
timeline switch at end-of-streaming, in addition to the next timeline's ID.
It was previously assumed that the switch happened exactly where the
streaming stopped.
Note: this is an incompatible change in the streaming protocol. You might
get an error if you try to stream over timeline switches, if the client is
running 9.3beta1 and the server is more recent. It should be fine after a
reconnect, however.
Reported by Fujii Masao.
2013-05-08 19:10:17 +02:00
|
|
|
/*
|
|
|
|
* Read the next timeline's ID. The server also sends the timeline's
|
|
|
|
* starting point, but it is ignored.
|
|
|
|
*/
|
|
|
|
if (PQnfields(res) < 2 || PQntuples(res) != 1)
|
2013-01-18 10:48:29 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("unexpected result set after end-of-streaming")));
|
|
|
|
*next_tli = pg_atoi(PQgetvalue(res, 0, 0), sizeof(uint32), 0);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
PQclear(res);
|
2013-01-18 10:48:29 +01:00
|
|
|
|
|
|
|
/* the result set should be followed by CommandComplete */
|
2016-11-30 18:00:00 +01:00
|
|
|
res = PQgetResult(conn->streamConn);
|
|
|
|
}
|
|
|
|
else if (PQresultStatus(res) == PGRES_COPY_OUT)
|
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
|
|
|
|
/* End the copy */
|
|
|
|
PQendcopy(conn->streamConn);
|
|
|
|
|
|
|
|
/* CommandComplete should follow */
|
|
|
|
res = PQgetResult(conn->streamConn);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
}
|
2013-01-18 10:48:29 +01:00
|
|
|
|
|
|
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("error reading result of streaming command: %s",
|
2016-11-30 18:00:00 +01:00
|
|
|
PQerrorMessage(conn->streamConn))));
|
2015-02-12 01:20:49 +01:00
|
|
|
PQclear(res);
|
2013-01-18 10:48:29 +01:00
|
|
|
|
|
|
|
/* Verify that there are no more results */
|
2016-11-30 18:00:00 +01:00
|
|
|
res = PQgetResult(conn->streamConn);
|
2013-01-18 10:48:29 +01:00
|
|
|
if (res != NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("unexpected result after CommandComplete: %s",
|
2016-11-30 18:00:00 +01:00
|
|
|
PQerrorMessage(conn->streamConn))));
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fetch the timeline history file for 'tli' from primary.
|
|
|
|
*/
|
|
|
|
static void
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_readtimelinehistoryfile(WalReceiverConn *conn,
|
|
|
|
TimeLineID tli, char **filename,
|
|
|
|
char **content, int *len)
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
{
|
|
|
|
PGresult *res;
|
|
|
|
char cmd[64];
|
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
Assert(!conn->logical);
|
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Request the primary to send over the history file for given timeline.
|
|
|
|
*/
|
|
|
|
snprintf(cmd, sizeof(cmd), "TIMELINE_HISTORY %u", tli);
|
2016-11-30 18:00:00 +01:00
|
|
|
res = libpqrcv_PQexec(conn->streamConn, cmd);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("could not receive timeline history file from "
|
|
|
|
"the primary server: %s",
|
2016-11-30 18:00:00 +01:00
|
|
|
PQerrorMessage(conn->streamConn))));
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
}
|
|
|
|
if (PQnfields(res) != 2 || PQntuples(res) != 1)
|
|
|
|
{
|
|
|
|
int ntuples = PQntuples(res);
|
|
|
|
int nfields = PQnfields(res);
|
|
|
|
|
|
|
|
PQclear(res);
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("invalid response from primary server"),
|
|
|
|
errdetail("Expected 1 tuple with 2 fields, got %d tuples with %d fields.",
|
|
|
|
ntuples, nfields)));
|
|
|
|
}
|
|
|
|
*filename = pstrdup(PQgetvalue(res, 0, 0));
|
|
|
|
|
|
|
|
*len = PQgetlength(res, 0, 1);
|
|
|
|
*content = palloc(*len);
|
|
|
|
memcpy(*content, PQgetvalue(res, 0, 1), *len);
|
|
|
|
PQclear(res);
|
2010-01-20 10:16:24 +01:00
|
|
|
}
|
|
|
|
|
2010-04-19 16:10:45 +02:00
|
|
|
/*
|
|
|
|
* Send a query and wait for the results by using the asynchronous libpq
|
2016-12-02 14:15:36 +01:00
|
|
|
* functions and socket readiness events.
|
2010-04-19 16:10:45 +02:00
|
|
|
*
|
|
|
|
* We must not use the regular blocking libpq functions like PQexec()
|
|
|
|
* since they are uninterruptible by signals on some platforms, such as
|
|
|
|
* Windows.
|
|
|
|
*
|
|
|
|
* The function is modeled on PQexec() in libpq, but only implements
|
|
|
|
* those parts that are in use in the walreceiver.
|
|
|
|
*
|
|
|
|
* Queries are always executed on the connection in streamConn.
|
|
|
|
*/
|
|
|
|
static PGresult *
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_PQexec(PGconn *streamConn, const char *query)
|
2010-04-19 16:10:45 +02:00
|
|
|
{
|
2010-07-06 21:19:02 +02:00
|
|
|
PGresult *result = NULL;
|
|
|
|
PGresult *lastResult = NULL;
|
2010-04-19 16:10:45 +02:00
|
|
|
|
|
|
|
/*
|
2010-07-06 21:19:02 +02:00
|
|
|
* PQexec() silently discards any prior query results on the connection.
|
|
|
|
* This is not required for walreceiver since it's expected that walsender
|
|
|
|
* won't generate any such junk results.
|
2010-04-19 16:10:45 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
2010-07-06 21:19:02 +02:00
|
|
|
* Submit a query. Since we don't use non-blocking mode, this also can
|
|
|
|
* block. But its risk is relatively small, so we ignore that for now.
|
2010-04-19 16:10:45 +02:00
|
|
|
*/
|
|
|
|
if (!PQsendQuery(streamConn, query))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
/*
|
2010-07-06 21:19:02 +02:00
|
|
|
* Receive data until PQgetResult is ready to get the result without
|
|
|
|
* blocking.
|
2010-04-19 16:10:45 +02:00
|
|
|
*/
|
|
|
|
while (PQisBusy(streamConn))
|
|
|
|
{
|
2016-11-30 18:00:00 +01:00
|
|
|
int rc;
|
|
|
|
|
2010-04-19 16:10:45 +02:00
|
|
|
/*
|
|
|
|
* We don't need to break down the sleep into smaller increments,
|
2016-11-30 18:00:00 +01:00
|
|
|
* since we'll get interrupted by signals and can either handle
|
|
|
|
* interrupts here or elog(FATAL) within SIGTERM signal handler if
|
|
|
|
* the signal arrives in the middle of establishment of
|
|
|
|
* replication connection.
|
2010-04-19 16:10:45 +02:00
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
ResetLatch(&MyProc->procLatch);
|
|
|
|
rc = WaitLatchOrSocket(&MyProc->procLatch,
|
|
|
|
WL_POSTMASTER_DEATH | WL_SOCKET_READABLE |
|
|
|
|
WL_LATCH_SET,
|
|
|
|
PQsocket(streamConn),
|
|
|
|
0,
|
|
|
|
WAIT_EVENT_LIBPQWALRECEIVER_READ);
|
|
|
|
if (rc & WL_POSTMASTER_DEATH)
|
|
|
|
exit(1);
|
|
|
|
|
|
|
|
/* interrupted */
|
|
|
|
if (rc & WL_LATCH_SET)
|
|
|
|
{
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
continue;
|
|
|
|
}
|
2010-04-19 16:10:45 +02:00
|
|
|
if (PQconsumeInput(streamConn) == 0)
|
|
|
|
return NULL; /* trouble */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2010-07-06 21:19:02 +02:00
|
|
|
* Emulate the PQexec()'s behavior of returning the last result when
|
|
|
|
* there are many. Since walsender will never generate multiple
|
|
|
|
* results, we skip the concatenation of error messages.
|
2010-04-19 16:10:45 +02:00
|
|
|
*/
|
|
|
|
result = PQgetResult(streamConn);
|
|
|
|
if (result == NULL)
|
|
|
|
break; /* query is complete */
|
|
|
|
|
|
|
|
PQclear(lastResult);
|
|
|
|
lastResult = result;
|
|
|
|
|
|
|
|
if (PQresultStatus(lastResult) == PGRES_COPY_IN ||
|
|
|
|
PQresultStatus(lastResult) == PGRES_COPY_OUT ||
|
2010-12-11 15:27:37 +01:00
|
|
|
PQresultStatus(lastResult) == PGRES_COPY_BOTH ||
|
2010-04-19 16:10:45 +02:00
|
|
|
PQstatus(streamConn) == CONNECTION_BAD)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return lastResult;
|
|
|
|
}
|
|
|
|
|
2010-01-20 10:16:24 +01:00
|
|
|
/*
|
|
|
|
* Disconnect connection to primary, if any.
|
|
|
|
*/
|
|
|
|
static void
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_disconnect(WalReceiverConn *conn)
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
2016-11-30 18:00:00 +01:00
|
|
|
PQfinish(conn->streamConn);
|
|
|
|
if (conn->recvBuf != NULL)
|
|
|
|
PQfreemem(conn->recvBuf);
|
|
|
|
pfree(conn);
|
2010-01-20 10:16:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2016-03-30 03:16:12 +02:00
|
|
|
* Receive a message available from XLOG stream.
|
2010-01-20 10:16:24 +01:00
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
*
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
* If data was received, returns the length of the data. *buffer is set to
|
|
|
|
* point to a buffer holding the received message. The buffer is only valid
|
|
|
|
* until the next libpqrcv_* call.
|
2010-01-20 10:16:24 +01:00
|
|
|
*
|
2016-03-30 03:16:12 +02:00
|
|
|
* If no data was available immediately, returns 0, and *wait_fd is set to a
|
2016-04-14 19:49:37 +02:00
|
|
|
* socket descriptor which can be waited on before trying again.
|
2010-01-20 10:16:24 +01:00
|
|
|
*
|
2013-05-29 22:58:43 +02:00
|
|
|
* -1 if the server ended the COPY.
|
2010-01-20 10:16:24 +01:00
|
|
|
*
|
|
|
|
* ereports on error.
|
|
|
|
*/
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
static int
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_receive(WalReceiverConn *conn, char **buffer,
|
|
|
|
pgsocket *wait_fd)
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
|
|
|
int rawlen;
|
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
if (conn->recvBuf != NULL)
|
|
|
|
PQfreemem(conn->recvBuf);
|
|
|
|
conn->recvBuf = NULL;
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2011-01-13 16:51:28 +01:00
|
|
|
/* Try to receive a CopyData message */
|
2016-11-30 18:00:00 +01:00
|
|
|
rawlen = PQgetCopyData(conn->streamConn, &conn->recvBuf, 1);
|
2011-01-13 16:51:28 +01:00
|
|
|
if (rawlen == 0)
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
2016-03-30 03:16:12 +02:00
|
|
|
/* Try consuming some data. */
|
2016-11-30 18:00:00 +01:00
|
|
|
if (PQconsumeInput(conn->streamConn) == 0)
|
2010-01-20 10:16:24 +01:00
|
|
|
ereport(ERROR,
|
2010-03-21 01:17:59 +01:00
|
|
|
(errmsg("could not receive data from WAL stream: %s",
|
2016-11-30 18:00:00 +01:00
|
|
|
PQerrorMessage(conn->streamConn))));
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2011-01-13 16:51:28 +01:00
|
|
|
/* Now that we've consumed some input, try again */
|
2016-11-30 18:00:00 +01:00
|
|
|
rawlen = PQgetCopyData(conn->streamConn, &conn->recvBuf, 1);
|
2011-01-13 16:51:28 +01:00
|
|
|
if (rawlen == 0)
|
2016-03-30 03:16:12 +02:00
|
|
|
{
|
|
|
|
/* Tell caller to try again when our socket is ready. */
|
2016-11-30 18:00:00 +01:00
|
|
|
*wait_fd = PQsocket(conn->streamConn);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
return 0;
|
2016-03-30 03:16:12 +02:00
|
|
|
}
|
2011-01-13 16:51:28 +01:00
|
|
|
}
|
2010-02-26 03:01:40 +01:00
|
|
|
if (rawlen == -1) /* end-of-streaming or error */
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
PGresult *res;
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
res = PQgetResult(conn->streamConn);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
if (PQresultStatus(res) == PGRES_COMMAND_OK ||
|
|
|
|
PQresultStatus(res) == PGRES_COPY_IN)
|
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
else
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
ereport(ERROR,
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
(errmsg("could not receive data from WAL stream: %s",
|
2016-11-30 18:00:00 +01:00
|
|
|
PQerrorMessage(conn->streamConn))));
|
2010-01-20 10:16:24 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (rawlen < -1)
|
|
|
|
ereport(ERROR,
|
2010-03-21 01:17:59 +01:00
|
|
|
(errmsg("could not receive data from WAL stream: %s",
|
2016-11-30 18:00:00 +01:00
|
|
|
PQerrorMessage(conn->streamConn))));
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2010-02-03 10:47:19 +01:00
|
|
|
/* Return received messages to caller */
|
2016-11-30 18:00:00 +01:00
|
|
|
*buffer = conn->recvBuf;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
return rawlen;
|
2010-01-20 10:16:24 +01:00
|
|
|
}
|
2010-12-11 15:27:37 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Send a message to XLOG stream.
|
|
|
|
*
|
|
|
|
* ereports on error.
|
|
|
|
*/
|
|
|
|
static void
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_send(WalReceiverConn *conn, const char *buffer, int nbytes)
|
2010-12-11 15:27:37 +01:00
|
|
|
{
|
2016-11-30 18:00:00 +01:00
|
|
|
if (PQputCopyData(conn->streamConn, buffer, nbytes) <= 0 ||
|
|
|
|
PQflush(conn->streamConn))
|
2010-12-11 15:27:37 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("could not send data to WAL stream: %s",
|
2016-11-30 18:00:00 +01:00
|
|
|
PQerrorMessage(conn->streamConn))));
|
2010-12-11 15:27:37 +01:00
|
|
|
}
|