2010-01-20 10:16:24 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* libpqwalreceiver.c
|
|
|
|
*
|
|
|
|
* This file contains the libpq-specific parts of walreceiver. It's
|
|
|
|
* loaded as a dynamic module to avoid linking the main server binary with
|
|
|
|
* libpq.
|
|
|
|
*
|
2020-01-01 18:21:45 +01:00
|
|
|
* Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group
|
2010-01-20 10:16:24 +01:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
|
2010-01-20 10:16:24 +01:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/time.h>
|
|
|
|
|
|
|
|
#include "access/xlog.h"
|
2017-03-23 13:36:36 +01:00
|
|
|
#include "catalog/pg_type.h"
|
2020-08-10 18:22:54 +02:00
|
|
|
#include "common/connect.h"
|
2017-03-23 13:36:36 +01:00
|
|
|
#include "funcapi.h"
|
2019-11-12 04:00:16 +01:00
|
|
|
#include "libpq-fe.h"
|
2017-02-23 17:27:59 +01:00
|
|
|
#include "mb/pg_wchar.h"
|
2010-01-20 10:16:24 +01:00
|
|
|
#include "miscadmin.h"
|
2016-11-30 18:00:00 +01:00
|
|
|
#include "pgstat.h"
|
2019-11-12 04:00:16 +01:00
|
|
|
#include "pqexpbuffer.h"
|
2010-01-20 10:16:24 +01:00
|
|
|
#include "replication/walreceiver.h"
|
|
|
|
#include "utils/builtins.h"
|
2017-03-23 13:36:36 +01:00
|
|
|
#include "utils/memutils.h"
|
2017-01-19 18:00:00 +01:00
|
|
|
#include "utils/pg_lsn.h"
|
2017-03-23 13:36:36 +01:00
|
|
|
#include "utils/tuplestore.h"
|
2010-01-20 10:16:24 +01:00
|
|
|
|
|
|
|
PG_MODULE_MAGIC;
|
|
|
|
|
|
|
|
void _PG_init(void);
|
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
struct WalReceiverConn
|
|
|
|
{
|
|
|
|
/* Current connection to the primary, if any */
|
2017-05-17 22:31:56 +02:00
|
|
|
PGconn *streamConn;
|
2016-11-30 18:00:00 +01:00
|
|
|
/* Used to remember if the connection is logical or physical */
|
2017-05-17 22:31:56 +02:00
|
|
|
bool logical;
|
2016-11-30 18:00:00 +01:00
|
|
|
/* Buffer for currently read records */
|
2017-05-17 22:31:56 +02:00
|
|
|
char *recvBuf;
|
2016-11-30 18:00:00 +01:00
|
|
|
};
|
2010-01-20 10:16:24 +01:00
|
|
|
|
|
|
|
/* Prototypes for interface functions */
|
2016-11-30 18:00:00 +01:00
|
|
|
static WalReceiverConn *libpqrcv_connect(const char *conninfo,
|
2019-05-22 19:04:48 +02:00
|
|
|
bool logical, const char *appname,
|
|
|
|
char **err);
|
2017-01-19 18:00:00 +01:00
|
|
|
static void libpqrcv_check_conninfo(const char *conninfo);
|
2016-11-30 18:00:00 +01:00
|
|
|
static char *libpqrcv_get_conninfo(WalReceiverConn *conn);
|
2018-03-31 00:51:22 +02:00
|
|
|
static void libpqrcv_get_senderinfo(WalReceiverConn *conn,
|
2019-05-22 19:04:48 +02:00
|
|
|
char **sender_host, int *sender_port);
|
2016-11-30 18:00:00 +01:00
|
|
|
static char *libpqrcv_identify_system(WalReceiverConn *conn,
|
2019-05-22 19:04:48 +02:00
|
|
|
TimeLineID *primary_tli);
|
2019-05-22 18:55:34 +02:00
|
|
|
static int libpqrcv_server_version(WalReceiverConn *conn);
|
2016-11-30 18:00:00 +01:00
|
|
|
static void libpqrcv_readtimelinehistoryfile(WalReceiverConn *conn,
|
2019-05-22 19:04:48 +02:00
|
|
|
TimeLineID tli, char **filename,
|
|
|
|
char **content, int *len);
|
2016-11-30 18:00:00 +01:00
|
|
|
static bool libpqrcv_startstreaming(WalReceiverConn *conn,
|
2019-05-22 19:04:48 +02:00
|
|
|
const WalRcvStreamOptions *options);
|
2016-11-30 18:00:00 +01:00
|
|
|
static void libpqrcv_endstreaming(WalReceiverConn *conn,
|
2019-05-22 19:04:48 +02:00
|
|
|
TimeLineID *next_tli);
|
|
|
|
static int libpqrcv_receive(WalReceiverConn *conn, char **buffer,
|
|
|
|
pgsocket *wait_fd);
|
2016-11-30 18:00:00 +01:00
|
|
|
static void libpqrcv_send(WalReceiverConn *conn, const char *buffer,
|
2019-05-22 19:04:48 +02:00
|
|
|
int nbytes);
|
2017-01-19 18:00:00 +01:00
|
|
|
static char *libpqrcv_create_slot(WalReceiverConn *conn,
|
2019-05-22 19:04:48 +02:00
|
|
|
const char *slotname,
|
|
|
|
bool temporary,
|
|
|
|
CRSSnapshotAction snapshot_action,
|
|
|
|
XLogRecPtr *lsn);
|
2020-01-14 14:05:25 +01:00
|
|
|
static pid_t libpqrcv_get_backend_pid(WalReceiverConn *conn);
|
2017-03-23 13:36:36 +01:00
|
|
|
static WalRcvExecResult *libpqrcv_exec(WalReceiverConn *conn,
|
2019-05-22 19:04:48 +02:00
|
|
|
const char *query,
|
|
|
|
const int nRetTypes,
|
|
|
|
const Oid *retTypes);
|
2016-11-30 18:00:00 +01:00
|
|
|
static void libpqrcv_disconnect(WalReceiverConn *conn);
|
|
|
|
|
|
|
|
static WalReceiverFunctionsType PQWalReceiverFunctions = {
|
|
|
|
libpqrcv_connect,
|
2017-01-19 18:00:00 +01:00
|
|
|
libpqrcv_check_conninfo,
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_get_conninfo,
|
2018-03-31 00:51:22 +02:00
|
|
|
libpqrcv_get_senderinfo,
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_identify_system,
|
2019-03-15 10:16:26 +01:00
|
|
|
libpqrcv_server_version,
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_readtimelinehistoryfile,
|
|
|
|
libpqrcv_startstreaming,
|
|
|
|
libpqrcv_endstreaming,
|
|
|
|
libpqrcv_receive,
|
|
|
|
libpqrcv_send,
|
2017-01-19 18:00:00 +01:00
|
|
|
libpqrcv_create_slot,
|
2020-01-14 14:05:25 +01:00
|
|
|
libpqrcv_get_backend_pid,
|
2017-03-23 13:36:36 +01:00
|
|
|
libpqrcv_exec,
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_disconnect
|
|
|
|
};
|
2010-01-20 10:16:24 +01:00
|
|
|
|
|
|
|
/* Prototypes for private functions */
|
2016-11-30 18:00:00 +01:00
|
|
|
static PGresult *libpqrcv_PQexec(PGconn *streamConn, const char *query);
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
static PGresult *libpqrcv_PQgetResult(PGconn *streamConn);
|
2017-01-19 18:00:00 +01:00
|
|
|
static char *stringlist_to_identifierstr(PGconn *conn, List *strings);
|
2010-01-20 10:16:24 +01:00
|
|
|
|
|
|
|
/*
|
2016-11-30 18:00:00 +01:00
|
|
|
* Module initialization function
|
2010-01-20 10:16:24 +01:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
_PG_init(void)
|
|
|
|
{
|
2016-11-30 18:00:00 +01:00
|
|
|
if (WalReceiverFunctions != NULL)
|
2010-01-20 10:16:24 +01:00
|
|
|
elog(ERROR, "libpqwalreceiver already loaded");
|
2016-11-30 18:00:00 +01:00
|
|
|
WalReceiverFunctions = &PQWalReceiverFunctions;
|
2010-01-20 10:16:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Establish the connection to the primary server for XLOG streaming
|
2017-01-19 18:00:00 +01:00
|
|
|
*
|
|
|
|
* Returns NULL on error and fills the err with palloc'ed error message.
|
2010-01-20 10:16:24 +01:00
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
static WalReceiverConn *
|
2017-01-19 18:00:00 +01:00
|
|
|
libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
|
|
|
|
char **err)
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
2016-11-30 18:00:00 +01:00
|
|
|
WalReceiverConn *conn;
|
2017-03-03 15:07:22 +01:00
|
|
|
PostgresPollingStatusType status;
|
2015-05-24 03:35:49 +02:00
|
|
|
const char *keys[5];
|
|
|
|
const char *vals[5];
|
2016-11-30 18:00:00 +01:00
|
|
|
int i = 0;
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2010-06-11 12:13:09 +02:00
|
|
|
/*
|
2015-05-24 03:35:49 +02:00
|
|
|
* We use the expand_dbname parameter to process the connection string (or
|
2018-04-14 16:04:36 +02:00
|
|
|
* URI), and pass some extra options.
|
2010-06-11 12:13:09 +02:00
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
keys[i] = "dbname";
|
|
|
|
vals[i] = conninfo;
|
|
|
|
keys[++i] = "replication";
|
|
|
|
vals[i] = logical ? "database" : "true";
|
|
|
|
if (!logical)
|
|
|
|
{
|
2018-04-14 16:04:36 +02:00
|
|
|
/*
|
|
|
|
* The database name is ignored by the server in replication mode, but
|
|
|
|
* specify "replication" for .pgpass lookup.
|
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
keys[++i] = "dbname";
|
|
|
|
vals[i] = "replication";
|
|
|
|
}
|
|
|
|
keys[++i] = "fallback_application_name";
|
|
|
|
vals[i] = appname;
|
2017-02-23 17:27:59 +01:00
|
|
|
if (logical)
|
|
|
|
{
|
|
|
|
keys[++i] = "client_encoding";
|
|
|
|
vals[i] = GetDatabaseEncodingName();
|
|
|
|
}
|
2016-11-30 18:00:00 +01:00
|
|
|
keys[++i] = NULL;
|
|
|
|
vals[i] = NULL;
|
|
|
|
|
2017-02-23 17:27:59 +01:00
|
|
|
Assert(i < sizeof(keys));
|
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
conn = palloc0(sizeof(WalReceiverConn));
|
2017-03-03 15:07:22 +01:00
|
|
|
conn->streamConn = PQconnectStartParams(keys, vals,
|
2017-05-17 22:31:56 +02:00
|
|
|
/* expand_dbname = */ true);
|
2017-03-03 15:07:22 +01:00
|
|
|
if (PQstatus(conn->streamConn) == CONNECTION_BAD)
|
|
|
|
{
|
|
|
|
*err = pchomp(PQerrorMessage(conn->streamConn));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2017-03-06 15:33:26 +01:00
|
|
|
/*
|
|
|
|
* Poll connection until we have OK or FAILED status.
|
|
|
|
*
|
2017-03-15 18:26:26 +01:00
|
|
|
* Per spec for PQconnectPoll, first wait till socket is write-ready.
|
2017-03-06 15:33:26 +01:00
|
|
|
*/
|
2017-03-15 18:26:26 +01:00
|
|
|
status = PGRES_POLLING_WRITING;
|
|
|
|
do
|
2017-03-03 15:07:22 +01:00
|
|
|
{
|
2017-03-15 18:26:26 +01:00
|
|
|
int io_flag;
|
|
|
|
int rc;
|
|
|
|
|
Distinguish wait-for-connection from wait-for-write-ready on Windows.
The API for WaitLatch and friends followed the Unix convention in which
waiting for a socket connection to complete is identical to waiting for
the socket to accept a write. While Windows provides a select(2)
emulation that agrees with that, the native WaitForMultipleObjects API
treats them as quite different --- and for some bizarre reason, it will
report a not-yet-connected socket as write-ready. libpq itself has so
far escaped dealing with this because it waits with select(), but in
libpqwalreceiver.c we want to wait using WaitLatchOrSocket. The semantics
mismatch resulted in replication connection failures on Windows, but only
for remote connections (apparently, localhost connections complete
immediately, or at least too fast for anyone to have noticed the problem
in single-machine testing).
To fix, introduce an additional WL_SOCKET_CONNECTED wait flag for
WaitLatchOrSocket, which is identical to WL_SOCKET_WRITEABLE on
non-Windows, but results in waiting for FD_CONNECT events on Windows.
Ideally, we would also distinguish the two conditions in the API for
PQconnectPoll(), but changing that API at this point seems infeasible.
Instead, cheat by checking for PQstatus() == CONNECTION_STARTED to
determine that we're still waiting for the connection to complete.
(This is a cheat mainly because CONNECTION_STARTED is documented as an
internal state rather than something callers should rely on. Perhaps
we ought to change the documentation ... but this patch doesn't.)
Per reports from Jobin Augustine and Igor Neyman. Back-patch to v10
where commit 1e8a85009 exposed this longstanding shortcoming.
Andres Freund, minor fix and some code review/beautification by me
Discussion: https://postgr.es/m/CAHBggj8g2T+ZDcACZ2FmzX9CTxkWjKBsHd6NkYB4i9Ojf6K1Fw@mail.gmail.com
2017-08-15 17:07:52 +02:00
|
|
|
if (status == PGRES_POLLING_READING)
|
|
|
|
io_flag = WL_SOCKET_READABLE;
|
|
|
|
#ifdef WIN32
|
|
|
|
/* Windows needs a different test while waiting for connection-made */
|
|
|
|
else if (PQstatus(conn->streamConn) == CONNECTION_STARTED)
|
|
|
|
io_flag = WL_SOCKET_CONNECTED;
|
|
|
|
#endif
|
|
|
|
else
|
|
|
|
io_flag = WL_SOCKET_WRITEABLE;
|
2017-03-15 18:26:26 +01:00
|
|
|
|
2017-06-07 01:13:00 +02:00
|
|
|
rc = WaitLatchOrSocket(MyLatch,
|
Add WL_EXIT_ON_PM_DEATH pseudo-event.
Users of the WaitEventSet and WaitLatch() APIs can now choose between
asking for WL_POSTMASTER_DEATH and then handling it explicitly, or asking
for WL_EXIT_ON_PM_DEATH to trigger immediate exit on postmaster death.
This reduces code duplication, since almost all callers want the latter.
Repair all code that was previously ignoring postmaster death completely,
or requesting the event but ignoring it, or requesting the event but then
doing an unconditional PostmasterIsAlive() call every time through its
event loop (which is an expensive syscall on platforms for which we don't
have USE_POSTMASTER_DEATH_SIGNAL support).
Assert that callers of WaitLatchXXX() under the postmaster remember to
ask for either WL_POSTMASTER_DEATH or WL_EXIT_ON_PM_DEATH, to prevent
future bugs.
The only process that doesn't handle postmaster death is syslogger. It
waits until all backends holding the write end of the syslog pipe
(including the postmaster) have closed it by exiting, to be sure to
capture any parting messages. By using the WaitEventSet API directly
it avoids the new assertion, and as a by-product it may be slightly
more efficient on platforms that have epoll().
Author: Thomas Munro
Reviewed-by: Kyotaro Horiguchi, Heikki Linnakangas, Tom Lane
Discussion: https://postgr.es/m/CAEepm%3D1TCviRykkUb69ppWLr_V697rzd1j3eZsRMmbXvETfqbQ%40mail.gmail.com,
https://postgr.es/m/CAEepm=2LqHzizbe7muD7-2yHUbTOoF7Q+qkSD5Q41kuhttRTwA@mail.gmail.com
2018-11-23 08:16:41 +01:00
|
|
|
WL_EXIT_ON_PM_DEATH | WL_LATCH_SET | io_flag,
|
2017-03-15 18:26:26 +01:00
|
|
|
PQsocket(conn->streamConn),
|
|
|
|
0,
|
2017-08-08 21:37:44 +02:00
|
|
|
WAIT_EVENT_LIBPQWALRECEIVER_CONNECT);
|
2017-03-15 18:26:26 +01:00
|
|
|
|
|
|
|
/* Interrupted? */
|
|
|
|
if (rc & WL_LATCH_SET)
|
2017-03-03 15:07:22 +01:00
|
|
|
{
|
2017-06-07 01:13:00 +02:00
|
|
|
ResetLatch(MyLatch);
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
ProcessWalRcvInterrupts();
|
2017-03-03 15:07:22 +01:00
|
|
|
}
|
2017-03-15 18:26:26 +01:00
|
|
|
|
|
|
|
/* If socket is ready, advance the libpq state machine */
|
|
|
|
if (rc & io_flag)
|
|
|
|
status = PQconnectPoll(conn->streamConn);
|
|
|
|
} while (status != PGRES_POLLING_OK && status != PGRES_POLLING_FAILED);
|
2017-03-03 15:07:22 +01:00
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
if (PQstatus(conn->streamConn) != CONNECTION_OK)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
2017-02-27 14:30:06 +01:00
|
|
|
*err = pchomp(PQerrorMessage(conn->streamConn));
|
2017-01-19 18:00:00 +01:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-08-10 18:22:54 +02:00
|
|
|
if (logical)
|
|
|
|
{
|
|
|
|
PGresult *res;
|
|
|
|
|
|
|
|
res = libpqrcv_PQexec(conn->streamConn,
|
|
|
|
ALWAYS_SECURE_SEARCH_PATH_SQL);
|
|
|
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("could not clear search path: %s",
|
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
|
|
|
}
|
|
|
|
PQclear(res);
|
|
|
|
}
|
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
conn->logical = logical;
|
|
|
|
|
|
|
|
return conn;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Validate connection info string (just try to parse it)
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
libpqrcv_check_conninfo(const char *conninfo)
|
|
|
|
{
|
2017-05-17 22:31:56 +02:00
|
|
|
PQconninfoOption *opts = NULL;
|
|
|
|
char *err = NULL;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
opts = PQconninfoParse(conninfo, &err);
|
|
|
|
if (opts == NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("invalid connection string syntax: %s", err)));
|
|
|
|
|
|
|
|
PQconninfoFree(opts);
|
|
|
|
}
|
|
|
|
|
2016-06-29 22:57:17 +02:00
|
|
|
/*
|
|
|
|
* Return a user-displayable conninfo string. Any security-sensitive fields
|
|
|
|
* are obfuscated.
|
|
|
|
*/
|
|
|
|
static char *
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_get_conninfo(WalReceiverConn *conn)
|
2016-06-29 22:57:17 +02:00
|
|
|
{
|
|
|
|
PQconninfoOption *conn_opts;
|
|
|
|
PQconninfoOption *conn_opt;
|
2016-08-15 19:42:51 +02:00
|
|
|
PQExpBufferData buf;
|
2016-06-29 22:57:17 +02:00
|
|
|
char *retval;
|
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
Assert(conn->streamConn != NULL);
|
2016-06-29 22:57:17 +02:00
|
|
|
|
|
|
|
initPQExpBuffer(&buf);
|
2016-11-30 18:00:00 +01:00
|
|
|
conn_opts = PQconninfo(conn->streamConn);
|
2016-06-29 22:57:17 +02:00
|
|
|
|
|
|
|
if (conn_opts == NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("could not parse connection string: %s",
|
|
|
|
_("out of memory"))));
|
|
|
|
|
|
|
|
/* build a clean connection string from pieces */
|
|
|
|
for (conn_opt = conn_opts; conn_opt->keyword != NULL; conn_opt++)
|
|
|
|
{
|
2016-08-15 19:42:51 +02:00
|
|
|
bool obfuscate;
|
2016-06-29 22:57:17 +02:00
|
|
|
|
|
|
|
/* Skip debug and empty options */
|
|
|
|
if (strchr(conn_opt->dispchar, 'D') ||
|
|
|
|
conn_opt->val == NULL ||
|
|
|
|
conn_opt->val[0] == '\0')
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Obfuscate security-sensitive options */
|
|
|
|
obfuscate = strchr(conn_opt->dispchar, '*') != NULL;
|
|
|
|
|
|
|
|
appendPQExpBuffer(&buf, "%s%s=%s",
|
|
|
|
buf.len == 0 ? "" : " ",
|
|
|
|
conn_opt->keyword,
|
|
|
|
obfuscate ? "********" : conn_opt->val);
|
|
|
|
}
|
|
|
|
|
|
|
|
PQconninfoFree(conn_opts);
|
|
|
|
|
|
|
|
retval = PQExpBufferDataBroken(buf) ? NULL : pstrdup(buf.data);
|
|
|
|
termPQExpBuffer(&buf);
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
2018-03-31 00:51:22 +02:00
|
|
|
/*
|
|
|
|
* Provides information of sender this WAL receiver is connected to.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
libpqrcv_get_senderinfo(WalReceiverConn *conn, char **sender_host,
|
2018-04-26 20:47:16 +02:00
|
|
|
int *sender_port)
|
2018-03-31 00:51:22 +02:00
|
|
|
{
|
2018-04-26 20:47:16 +02:00
|
|
|
char *ret = NULL;
|
2018-03-31 00:51:22 +02:00
|
|
|
|
|
|
|
*sender_host = NULL;
|
|
|
|
*sender_port = 0;
|
|
|
|
|
|
|
|
Assert(conn->streamConn != NULL);
|
|
|
|
|
|
|
|
ret = PQhost(conn->streamConn);
|
|
|
|
if (ret && strlen(ret) != 0)
|
|
|
|
*sender_host = pstrdup(ret);
|
|
|
|
|
|
|
|
ret = PQport(conn->streamConn);
|
|
|
|
if (ret && strlen(ret) != 0)
|
|
|
|
*sender_port = atoi(ret);
|
|
|
|
}
|
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Check that primary's system identifier matches ours, and fetch the current
|
|
|
|
* timeline ID of the primary.
|
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
static char *
|
2019-03-15 10:16:26 +01:00
|
|
|
libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli)
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
{
|
|
|
|
PGresult *res;
|
|
|
|
char *primary_sysid;
|
2010-01-20 10:16:24 +01:00
|
|
|
|
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* Get the system identifier and timeline ID as a DataRow message from the
|
|
|
|
* primary server.
|
2010-01-20 10:16:24 +01:00
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
res = libpqrcv_PQexec(conn->streamConn, "IDENTIFY_SYSTEM");
|
2010-01-20 10:16:24 +01:00
|
|
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
2010-02-26 03:01:40 +01:00
|
|
|
{
|
2010-01-20 10:16:24 +01:00
|
|
|
PQclear(res);
|
|
|
|
ereport(ERROR,
|
2010-03-21 01:17:59 +01:00
|
|
|
(errmsg("could not receive database system identifier and timeline ID from "
|
2010-01-20 10:16:24 +01:00
|
|
|
"the primary server: %s",
|
2017-02-27 14:30:06 +01:00
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
2010-02-26 03:01:40 +01:00
|
|
|
}
|
2014-08-19 11:30:38 +02:00
|
|
|
if (PQnfields(res) < 3 || PQntuples(res) != 1)
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
int ntuples = PQntuples(res);
|
|
|
|
int nfields = PQnfields(res);
|
|
|
|
|
2010-01-20 10:16:24 +01:00
|
|
|
PQclear(res);
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("invalid response from primary server"),
|
2014-05-15 13:49:11 +02:00
|
|
|
errdetail("Could not identify system: got %d rows and %d fields, expected %d rows and %d or more fields.",
|
2014-08-19 11:30:38 +02:00
|
|
|
ntuples, nfields, 3, 1)));
|
2010-01-20 10:16:24 +01:00
|
|
|
}
|
2016-11-30 18:00:00 +01:00
|
|
|
primary_sysid = pstrdup(PQgetvalue(res, 0, 0));
|
2018-07-22 23:58:01 +02:00
|
|
|
*primary_tli = pg_strtoint32(PQgetvalue(res, 0, 1));
|
2010-01-20 10:16:24 +01:00
|
|
|
PQclear(res);
|
2016-11-30 18:00:00 +01:00
|
|
|
|
|
|
|
return primary_sysid;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
2019-03-15 10:16:26 +01:00
|
|
|
/*
|
|
|
|
* Thin wrapper around libpq to obtain server version.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
libpqrcv_server_version(WalReceiverConn *conn)
|
|
|
|
{
|
|
|
|
return PQserverVersion(conn->streamConn);
|
|
|
|
}
|
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
/*
|
2017-01-19 18:00:00 +01:00
|
|
|
* Start streaming WAL data from given streaming options.
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
*
|
|
|
|
* Returns true if we switched successfully to copy-both mode. False
|
|
|
|
* means the server received the command and executed it successfully, but
|
|
|
|
* didn't switch to copy-mode. That means that there was no WAL on the
|
|
|
|
* requested timeline and starting point, because the server switched to
|
|
|
|
* another timeline at or before the requested starting point. On failure,
|
|
|
|
* throws an ERROR.
|
|
|
|
*/
|
|
|
|
static bool
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_startstreaming(WalReceiverConn *conn,
|
2017-01-19 18:00:00 +01:00
|
|
|
const WalRcvStreamOptions *options)
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
{
|
2016-11-30 18:00:00 +01:00
|
|
|
StringInfoData cmd;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
PGresult *res;
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
Assert(options->logical == conn->logical);
|
|
|
|
Assert(options->slotname || !options->logical);
|
2016-11-30 18:00:00 +01:00
|
|
|
|
|
|
|
initStringInfo(&cmd);
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/* Build the command. */
|
|
|
|
appendStringInfoString(&cmd, "START_REPLICATION");
|
|
|
|
if (options->slotname != NULL)
|
|
|
|
appendStringInfo(&cmd, " SLOT \"%s\"",
|
|
|
|
options->slotname);
|
|
|
|
|
|
|
|
if (options->logical)
|
2017-08-16 05:34:39 +02:00
|
|
|
appendStringInfoString(&cmd, " LOGICAL");
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
appendStringInfo(&cmd, " %X/%X",
|
|
|
|
(uint32) (options->startpoint >> 32),
|
|
|
|
(uint32) options->startpoint);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Additional options are different depending on if we are doing logical
|
|
|
|
* or physical replication.
|
|
|
|
*/
|
|
|
|
if (options->logical)
|
|
|
|
{
|
2017-05-17 22:31:56 +02:00
|
|
|
char *pubnames_str;
|
|
|
|
List *pubnames;
|
|
|
|
char *pubnames_literal;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
appendStringInfoString(&cmd, " (");
|
2017-01-23 17:06:30 +01:00
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
appendStringInfo(&cmd, "proto_version '%u'",
|
|
|
|
options->proto.logical.proto_version);
|
2017-01-23 17:06:30 +01:00
|
|
|
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
if (options->proto.logical.streaming &&
|
|
|
|
PQserverVersion(conn->streamConn) >= 140000)
|
2020-10-15 09:35:17 +02:00
|
|
|
appendStringInfoString(&cmd, ", streaming 'on'");
|
Add support for streaming to built-in logical replication.
To add support for streaming of in-progress transactions into the
built-in logical replication, we need to do three things:
* Extend the logical replication protocol, so identify in-progress
transactions, and allow adding additional bits of information (e.g.
XID of subtransactions).
* Modify the output plugin (pgoutput) to implement the new stream
API callbacks, by leveraging the extended replication protocol.
* Modify the replication apply worker, to properly handle streamed
in-progress transaction by spilling the data to disk and then
replaying them on commit.
We however must explicitly disable streaming replication during
replication slot creation, even if the plugin supports it. We
don't need to replicate the changes accumulated during this phase,
and moreover we don't have a replication connection open so we
don't have where to send the data anyway.
Author: Tomas Vondra, Dilip Kumar and Amit Kapila
Reviewed-by: Amit Kapila, Kuntal Ghosh and Ajin Cherian
Tested-by: Neha Sharma, Mahendra Singh Thalor and Ajin Cherian
Discussion: https://postgr.es/m/688b0b7f-2f6c-d827-c27b-216a8e3ea700@2ndquadrant.com
2020-09-03 04:24:07 +02:00
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
pubnames = options->proto.logical.publication_names;
|
|
|
|
pubnames_str = stringlist_to_identifierstr(conn->streamConn, pubnames);
|
2017-01-23 17:06:30 +01:00
|
|
|
if (!pubnames_str)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("could not start WAL streaming: %s",
|
2017-02-27 14:30:06 +01:00
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
2017-01-23 17:06:30 +01:00
|
|
|
pubnames_literal = PQescapeLiteral(conn->streamConn, pubnames_str,
|
|
|
|
strlen(pubnames_str));
|
|
|
|
if (!pubnames_literal)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("could not start WAL streaming: %s",
|
2017-02-27 14:30:06 +01:00
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
2017-01-23 17:06:30 +01:00
|
|
|
appendStringInfo(&cmd, ", publication_names %s", pubnames_literal);
|
|
|
|
PQfreemem(pubnames_literal);
|
2017-01-19 18:00:00 +01:00
|
|
|
pfree(pubnames_str);
|
2017-01-23 17:06:30 +01:00
|
|
|
|
2020-07-18 18:44:51 +02:00
|
|
|
if (options->proto.logical.binary &&
|
|
|
|
PQserverVersion(conn->streamConn) >= 140000)
|
|
|
|
appendStringInfoString(&cmd, ", binary 'true'");
|
|
|
|
|
2017-01-23 17:06:30 +01:00
|
|
|
appendStringInfoChar(&cmd, ')');
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
2014-02-01 04:45:17 +01:00
|
|
|
else
|
2017-01-19 18:00:00 +01:00
|
|
|
appendStringInfo(&cmd, " TIMELINE %u",
|
|
|
|
options->proto.physical.startpointTLI);
|
|
|
|
|
|
|
|
/* Start streaming. */
|
2016-11-30 18:00:00 +01:00
|
|
|
res = libpqrcv_PQexec(conn->streamConn, cmd.data);
|
|
|
|
pfree(cmd.data);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
|
|
|
|
if (PQresultStatus(res) == PGRES_COMMAND_OK)
|
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
else if (PQresultStatus(res) != PGRES_COPY_BOTH)
|
2010-04-19 16:10:45 +02:00
|
|
|
{
|
|
|
|
PQclear(res);
|
2010-01-20 10:16:24 +01:00
|
|
|
ereport(ERROR,
|
2010-03-21 01:17:59 +01:00
|
|
|
(errmsg("could not start WAL streaming: %s",
|
2017-02-27 14:30:06 +01:00
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
2010-04-19 16:10:45 +02:00
|
|
|
}
|
2010-01-20 10:16:24 +01:00
|
|
|
PQclear(res);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2013-01-18 10:48:29 +01:00
|
|
|
* Stop streaming WAL data. Returns the next timeline's ID in *next_tli, as
|
|
|
|
* reported by the server, or 0 if it did not report it.
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
*/
|
|
|
|
static void
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_endstreaming(WalReceiverConn *conn, TimeLineID *next_tli)
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
{
|
|
|
|
PGresult *res;
|
|
|
|
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
/*
|
|
|
|
* Send copy-end message. As in libpqrcv_PQexec, this could theoretically
|
|
|
|
* block, but the risk seems small.
|
|
|
|
*/
|
2016-11-30 18:00:00 +01:00
|
|
|
if (PQputCopyEnd(conn->streamConn, NULL) <= 0 ||
|
|
|
|
PQflush(conn->streamConn))
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
ereport(ERROR,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
(errmsg("could not send end-of-streaming message to primary: %s",
|
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
2016-11-30 18:00:00 +01:00
|
|
|
|
|
|
|
*next_tli = 0;
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2013-01-18 10:48:29 +01:00
|
|
|
/*
|
|
|
|
* After COPY is finished, we should receive a result set indicating the
|
2013-05-29 22:58:43 +02:00
|
|
|
* next timeline's ID, or just CommandComplete if the server was shut
|
|
|
|
* down.
|
2013-01-18 10:48:29 +01:00
|
|
|
*
|
2017-05-17 22:31:56 +02:00
|
|
|
* If we had not yet received CopyDone from the backend, PGRES_COPY_OUT is
|
|
|
|
* also possible in case we aborted the copy in mid-stream.
|
2013-01-18 10:48:29 +01:00
|
|
|
*/
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
res = libpqrcv_PQgetResult(conn->streamConn);
|
2013-01-18 10:48:29 +01:00
|
|
|
if (PQresultStatus(res) == PGRES_TUPLES_OK)
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
{
|
Fix walsender failure at promotion.
If a standby server has a cascading standby server connected to it, it's
possible that WAL has already been sent up to the next WAL page boundary,
splitting a WAL record in the middle, when the first standby server is
promoted. Don't throw an assertion failure or error in walsender if that
happens.
Also, fix a variant of the same bug in pg_receivexlog: if it had already
received WAL on previous timeline up to a segment boundary, when the
upstream standby server is promoted so that the timeline switch record falls
on the previous segment, pg_receivexlog would miss the segment containing
the timeline switch. To fix that, have walsender send the position of the
timeline switch at end-of-streaming, in addition to the next timeline's ID.
It was previously assumed that the switch happened exactly where the
streaming stopped.
Note: this is an incompatible change in the streaming protocol. You might
get an error if you try to stream over timeline switches, if the client is
running 9.3beta1 and the server is more recent. It should be fine after a
reconnect, however.
Reported by Fujii Masao.
2013-05-08 19:10:17 +02:00
|
|
|
/*
|
|
|
|
* Read the next timeline's ID. The server also sends the timeline's
|
|
|
|
* starting point, but it is ignored.
|
|
|
|
*/
|
|
|
|
if (PQnfields(res) < 2 || PQntuples(res) != 1)
|
2013-01-18 10:48:29 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("unexpected result set after end-of-streaming")));
|
2018-07-22 23:58:01 +02:00
|
|
|
*next_tli = pg_strtoint32(PQgetvalue(res, 0, 0));
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
PQclear(res);
|
2013-01-18 10:48:29 +01:00
|
|
|
|
|
|
|
/* the result set should be followed by CommandComplete */
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
res = libpqrcv_PQgetResult(conn->streamConn);
|
2016-11-30 18:00:00 +01:00
|
|
|
}
|
|
|
|
else if (PQresultStatus(res) == PGRES_COPY_OUT)
|
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
|
|
|
|
/* End the copy */
|
2017-06-30 18:22:33 +02:00
|
|
|
if (PQendcopy(conn->streamConn))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("error while shutting down streaming COPY: %s",
|
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
2016-11-30 18:00:00 +01:00
|
|
|
|
|
|
|
/* CommandComplete should follow */
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
res = libpqrcv_PQgetResult(conn->streamConn);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
}
|
2013-01-18 10:48:29 +01:00
|
|
|
|
|
|
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("error reading result of streaming command: %s",
|
2017-02-27 14:30:06 +01:00
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
2015-02-12 01:20:49 +01:00
|
|
|
PQclear(res);
|
2013-01-18 10:48:29 +01:00
|
|
|
|
|
|
|
/* Verify that there are no more results */
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
res = libpqrcv_PQgetResult(conn->streamConn);
|
2013-01-18 10:48:29 +01:00
|
|
|
if (res != NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("unexpected result after CommandComplete: %s",
|
2017-02-27 14:30:06 +01:00
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fetch the timeline history file for 'tli' from primary.
|
|
|
|
*/
|
|
|
|
static void
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_readtimelinehistoryfile(WalReceiverConn *conn,
|
|
|
|
TimeLineID tli, char **filename,
|
|
|
|
char **content, int *len)
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
{
|
|
|
|
PGresult *res;
|
|
|
|
char cmd[64];
|
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
Assert(!conn->logical);
|
|
|
|
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
/*
|
|
|
|
* Request the primary to send over the history file for given timeline.
|
|
|
|
*/
|
|
|
|
snprintf(cmd, sizeof(cmd), "TIMELINE_HISTORY %u", tli);
|
2016-11-30 18:00:00 +01:00
|
|
|
res = libpqrcv_PQexec(conn->streamConn, cmd);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("could not receive timeline history file from "
|
|
|
|
"the primary server: %s",
|
2017-02-27 14:30:06 +01:00
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
}
|
|
|
|
if (PQnfields(res) != 2 || PQntuples(res) != 1)
|
|
|
|
{
|
|
|
|
int ntuples = PQntuples(res);
|
|
|
|
int nfields = PQnfields(res);
|
|
|
|
|
|
|
|
PQclear(res);
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("invalid response from primary server"),
|
|
|
|
errdetail("Expected 1 tuple with 2 fields, got %d tuples with %d fields.",
|
|
|
|
ntuples, nfields)));
|
|
|
|
}
|
|
|
|
*filename = pstrdup(PQgetvalue(res, 0, 0));
|
|
|
|
|
|
|
|
*len = PQgetlength(res, 0, 1);
|
|
|
|
*content = palloc(*len);
|
|
|
|
memcpy(*content, PQgetvalue(res, 0, 1), *len);
|
|
|
|
PQclear(res);
|
2010-01-20 10:16:24 +01:00
|
|
|
}
|
|
|
|
|
2010-04-19 16:10:45 +02:00
|
|
|
/*
|
|
|
|
* Send a query and wait for the results by using the asynchronous libpq
|
2016-12-02 14:15:36 +01:00
|
|
|
* functions and socket readiness events.
|
2010-04-19 16:10:45 +02:00
|
|
|
*
|
|
|
|
* We must not use the regular blocking libpq functions like PQexec()
|
|
|
|
* since they are uninterruptible by signals on some platforms, such as
|
|
|
|
* Windows.
|
|
|
|
*
|
|
|
|
* The function is modeled on PQexec() in libpq, but only implements
|
2017-03-23 13:36:36 +01:00
|
|
|
* those parts that are in use in the walreceiver api.
|
2010-04-19 16:10:45 +02:00
|
|
|
*
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
* May return NULL, rather than an error result, on failure.
|
2010-04-19 16:10:45 +02:00
|
|
|
*/
|
|
|
|
static PGresult *
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_PQexec(PGconn *streamConn, const char *query)
|
2010-04-19 16:10:45 +02:00
|
|
|
{
|
2010-07-06 21:19:02 +02:00
|
|
|
PGresult *lastResult = NULL;
|
2010-04-19 16:10:45 +02:00
|
|
|
|
|
|
|
/*
|
2010-07-06 21:19:02 +02:00
|
|
|
* PQexec() silently discards any prior query results on the connection.
|
2017-05-17 22:31:56 +02:00
|
|
|
* This is not required for this function as it's expected that the caller
|
|
|
|
* (which is this library in all cases) will behave correctly and we don't
|
|
|
|
* have to be backwards compatible with old libpq.
|
2010-04-19 16:10:45 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
* Submit the query. Since we don't use non-blocking mode, this could
|
|
|
|
* theoretically block. In practice, since we don't send very long query
|
|
|
|
* strings, the risk seems negligible.
|
2010-04-19 16:10:45 +02:00
|
|
|
*/
|
|
|
|
if (!PQsendQuery(streamConn, query))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for (;;)
|
|
|
|
{
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
/* Wait for, and collect, the next PGresult. */
|
|
|
|
PGresult *result;
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 21:03:39 +02:00
|
|
|
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
result = libpqrcv_PQgetResult(streamConn);
|
|
|
|
if (result == NULL)
|
|
|
|
break; /* query is complete, or failure */
|
2010-04-19 16:10:45 +02:00
|
|
|
|
|
|
|
/*
|
Fix low-probability leaks of PGresult objects in the backend.
We had three occurrences of essentially the same coding pattern
wherein we tried to retrieve a query result from a libpq connection
without blocking. In the case where PQconsumeInput failed (typically
indicating a lost connection), all three loops simply gave up and
returned, forgetting to clear any previously-collected PGresult
object. Since those are malloc'd not palloc'd, the oversight results
in a process-lifespan memory leak.
One instance, in libpqwalreceiver, is of little significance because
the walreceiver process would just quit anyway if its connection fails.
But we might as well fix it.
The other two instances, in postgres_fdw, are somewhat more worrisome
because at least in principle the scenario could be repeated, allowing
the amount of memory leaked to build up to something worth worrying
about. Moreover, in these cases the loops contain CHECK_FOR_INTERRUPTS
calls, as well as other calls that could potentially elog(ERROR),
providing another way to exit without having cleared the PGresult.
Here we need to add PG_TRY logic similar to what exists in quite a
few other places in postgres_fdw.
Coverity noted the libpqwalreceiver bug; I found the other two cases
by checking all calls of PQconsumeInput.
Back-patch to all supported versions as appropriate (9.2 lacks
postgres_fdw, so this is really quite unexciting for that branch).
Discussion: https://postgr.es/m/22620.1497486981@sss.pgh.pa.us
2017-06-15 21:03:39 +02:00
|
|
|
* Emulate PQexec()'s behavior of returning the last result when there
|
|
|
|
* are many. We are fine with returning just last error message.
|
2010-04-19 16:10:45 +02:00
|
|
|
*/
|
|
|
|
PQclear(lastResult);
|
|
|
|
lastResult = result;
|
|
|
|
|
|
|
|
if (PQresultStatus(lastResult) == PGRES_COPY_IN ||
|
|
|
|
PQresultStatus(lastResult) == PGRES_COPY_OUT ||
|
2010-12-11 15:27:37 +01:00
|
|
|
PQresultStatus(lastResult) == PGRES_COPY_BOTH ||
|
2010-04-19 16:10:45 +02:00
|
|
|
PQstatus(streamConn) == CONNECTION_BAD)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return lastResult;
|
|
|
|
}
|
|
|
|
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
/*
|
|
|
|
* Perform the equivalent of PQgetResult(), but watch for interrupts.
|
|
|
|
*/
|
|
|
|
static PGresult *
|
|
|
|
libpqrcv_PQgetResult(PGconn *streamConn)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Collect data until PQgetResult is ready to get the result without
|
|
|
|
* blocking.
|
|
|
|
*/
|
|
|
|
while (PQisBusy(streamConn))
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't need to break down the sleep into smaller increments,
|
|
|
|
* since we'll get interrupted by signals and can handle any
|
|
|
|
* interrupts here.
|
|
|
|
*/
|
|
|
|
rc = WaitLatchOrSocket(MyLatch,
|
|
|
|
WL_EXIT_ON_PM_DEATH | WL_SOCKET_READABLE |
|
|
|
|
WL_LATCH_SET,
|
|
|
|
PQsocket(streamConn),
|
|
|
|
0,
|
|
|
|
WAIT_EVENT_LIBPQWALRECEIVER_RECEIVE);
|
|
|
|
|
|
|
|
/* Interrupted? */
|
|
|
|
if (rc & WL_LATCH_SET)
|
|
|
|
{
|
|
|
|
ResetLatch(MyLatch);
|
|
|
|
ProcessWalRcvInterrupts();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Consume whatever data is available from the socket */
|
|
|
|
if (PQconsumeInput(streamConn) == 0)
|
|
|
|
{
|
|
|
|
/* trouble; return NULL */
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now we can collect and return the next PGresult */
|
|
|
|
return PQgetResult(streamConn);
|
|
|
|
}
|
|
|
|
|
2010-01-20 10:16:24 +01:00
|
|
|
/*
|
|
|
|
* Disconnect connection to primary, if any.
|
|
|
|
*/
|
|
|
|
static void
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_disconnect(WalReceiverConn *conn)
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
2016-11-30 18:00:00 +01:00
|
|
|
PQfinish(conn->streamConn);
|
|
|
|
if (conn->recvBuf != NULL)
|
|
|
|
PQfreemem(conn->recvBuf);
|
|
|
|
pfree(conn);
|
2010-01-20 10:16:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2016-03-30 03:16:12 +02:00
|
|
|
* Receive a message available from XLOG stream.
|
2010-01-20 10:16:24 +01:00
|
|
|
*
|
|
|
|
* Returns:
|
|
|
|
*
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
* If data was received, returns the length of the data. *buffer is set to
|
|
|
|
* point to a buffer holding the received message. The buffer is only valid
|
|
|
|
* until the next libpqrcv_* call.
|
2010-01-20 10:16:24 +01:00
|
|
|
*
|
2016-03-30 03:16:12 +02:00
|
|
|
* If no data was available immediately, returns 0, and *wait_fd is set to a
|
2016-04-14 19:49:37 +02:00
|
|
|
* socket descriptor which can be waited on before trying again.
|
2010-01-20 10:16:24 +01:00
|
|
|
*
|
2013-05-29 22:58:43 +02:00
|
|
|
* -1 if the server ended the COPY.
|
2010-01-20 10:16:24 +01:00
|
|
|
*
|
|
|
|
* ereports on error.
|
|
|
|
*/
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
static int
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_receive(WalReceiverConn *conn, char **buffer,
|
|
|
|
pgsocket *wait_fd)
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
|
|
|
int rawlen;
|
|
|
|
|
2016-11-30 18:00:00 +01:00
|
|
|
if (conn->recvBuf != NULL)
|
|
|
|
PQfreemem(conn->recvBuf);
|
|
|
|
conn->recvBuf = NULL;
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2011-01-13 16:51:28 +01:00
|
|
|
/* Try to receive a CopyData message */
|
2016-11-30 18:00:00 +01:00
|
|
|
rawlen = PQgetCopyData(conn->streamConn, &conn->recvBuf, 1);
|
2011-01-13 16:51:28 +01:00
|
|
|
if (rawlen == 0)
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
2016-03-30 03:16:12 +02:00
|
|
|
/* Try consuming some data. */
|
2016-11-30 18:00:00 +01:00
|
|
|
if (PQconsumeInput(conn->streamConn) == 0)
|
2010-01-20 10:16:24 +01:00
|
|
|
ereport(ERROR,
|
2010-03-21 01:17:59 +01:00
|
|
|
(errmsg("could not receive data from WAL stream: %s",
|
2017-02-27 14:30:06 +01:00
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2011-01-13 16:51:28 +01:00
|
|
|
/* Now that we've consumed some input, try again */
|
2016-11-30 18:00:00 +01:00
|
|
|
rawlen = PQgetCopyData(conn->streamConn, &conn->recvBuf, 1);
|
2011-01-13 16:51:28 +01:00
|
|
|
if (rawlen == 0)
|
2016-03-30 03:16:12 +02:00
|
|
|
{
|
|
|
|
/* Tell caller to try again when our socket is ready. */
|
2016-11-30 18:00:00 +01:00
|
|
|
*wait_fd = PQsocket(conn->streamConn);
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
return 0;
|
2016-03-30 03:16:12 +02:00
|
|
|
}
|
2011-01-13 16:51:28 +01:00
|
|
|
}
|
2010-02-26 03:01:40 +01:00
|
|
|
if (rawlen == -1) /* end-of-streaming or error */
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
PGresult *res;
|
2010-01-20 10:16:24 +01:00
|
|
|
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
res = libpqrcv_PQgetResult(conn->streamConn);
|
2017-03-23 13:36:36 +01:00
|
|
|
if (PQresultStatus(res) == PGRES_COMMAND_OK)
|
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
|
2017-06-08 23:42:18 +02:00
|
|
|
/* Verify that there are no more results. */
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
res = libpqrcv_PQgetResult(conn->streamConn);
|
2017-03-23 13:36:36 +01:00
|
|
|
if (res != NULL)
|
2017-06-08 23:42:18 +02:00
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the other side closed the connection orderly (otherwise
|
|
|
|
* we'd seen an error, or PGRES_COPY_IN) don't report an error
|
|
|
|
* here, but let callers deal with it.
|
|
|
|
*/
|
|
|
|
if (PQstatus(conn->streamConn) == CONNECTION_BAD)
|
|
|
|
return -1;
|
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("unexpected result after CommandComplete: %s",
|
|
|
|
PQerrorMessage(conn->streamConn))));
|
2017-06-08 23:42:18 +02:00
|
|
|
}
|
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
else if (PQresultStatus(res) == PGRES_COPY_IN)
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
else
|
2010-01-20 10:16:24 +01:00
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
ereport(ERROR,
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
(errmsg("could not receive data from WAL stream: %s",
|
2017-02-27 14:30:06 +01:00
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
2010-01-20 10:16:24 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (rawlen < -1)
|
|
|
|
ereport(ERROR,
|
2010-03-21 01:17:59 +01:00
|
|
|
(errmsg("could not receive data from WAL stream: %s",
|
2017-02-27 14:30:06 +01:00
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
2010-01-20 10:16:24 +01:00
|
|
|
|
2010-02-03 10:47:19 +01:00
|
|
|
/* Return received messages to caller */
|
2016-11-30 18:00:00 +01:00
|
|
|
*buffer = conn->recvBuf;
|
Allow a streaming replication standby to follow a timeline switch.
Before this patch, streaming replication would refuse to start replicating
if the timeline in the primary doesn't exactly match the standby. The
situation where it doesn't match is when you have a master, and two
standbys, and you promote one of the standbys to become new master.
Promoting bumps up the timeline ID, and after that bump, the other standby
would refuse to continue.
There's significantly more timeline related logic in streaming replication
now. First of all, when a standby connects to primary, it will ask the
primary for any timeline history files that are missing from the standby.
The missing files are sent using a new replication command TIMELINE_HISTORY,
and stored in standby's pg_xlog directory. Using the timeline history files,
the standby can follow the latest timeline present in the primary
(recovery_target_timeline='latest'), just as it can follow new timelines
appearing in an archive directory.
START_REPLICATION now takes a TIMELINE parameter, to specify exactly which
timeline to stream WAL from. This allows the standby to request the primary
to send over WAL that precedes the promotion. The replication protocol is
changed slightly (in a backwards-compatible way although there's little hope
of streaming replication working across major versions anyway), to allow
replication to stop when the end of timeline reached, putting the walsender
back into accepting a replication command.
Many thanks to Amit Kapila for testing and reviewing various versions of
this patch.
2012-12-13 18:00:00 +01:00
|
|
|
return rawlen;
|
2010-01-20 10:16:24 +01:00
|
|
|
}
|
2010-12-11 15:27:37 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Send a message to XLOG stream.
|
|
|
|
*
|
|
|
|
* ereports on error.
|
|
|
|
*/
|
|
|
|
static void
|
2016-11-30 18:00:00 +01:00
|
|
|
libpqrcv_send(WalReceiverConn *conn, const char *buffer, int nbytes)
|
2010-12-11 15:27:37 +01:00
|
|
|
{
|
2016-11-30 18:00:00 +01:00
|
|
|
if (PQputCopyData(conn->streamConn, buffer, nbytes) <= 0 ||
|
|
|
|
PQflush(conn->streamConn))
|
2010-12-11 15:27:37 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("could not send data to WAL stream: %s",
|
2017-02-27 14:30:06 +01:00
|
|
|
pchomp(PQerrorMessage(conn->streamConn)))));
|
2010-12-11 15:27:37 +01:00
|
|
|
}
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Create new replication slot.
|
|
|
|
* Returns the name of the exported snapshot for logical slot or NULL for
|
|
|
|
* physical slot.
|
|
|
|
*/
|
|
|
|
static char *
|
|
|
|
libpqrcv_create_slot(WalReceiverConn *conn, const char *slotname,
|
2017-03-23 13:36:36 +01:00
|
|
|
bool temporary, CRSSnapshotAction snapshot_action,
|
|
|
|
XLogRecPtr *lsn)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
2017-05-17 22:31:56 +02:00
|
|
|
PGresult *res;
|
|
|
|
StringInfoData cmd;
|
|
|
|
char *snapshot;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
initStringInfo(&cmd);
|
|
|
|
|
2017-03-14 22:13:56 +01:00
|
|
|
appendStringInfo(&cmd, "CREATE_REPLICATION_SLOT \"%s\"", slotname);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
if (temporary)
|
2017-08-16 05:34:39 +02:00
|
|
|
appendStringInfoString(&cmd, " TEMPORARY");
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
if (conn->logical)
|
2017-03-14 22:13:56 +01:00
|
|
|
{
|
2017-08-16 05:34:39 +02:00
|
|
|
appendStringInfoString(&cmd, " LOGICAL pgoutput");
|
2017-03-23 13:36:36 +01:00
|
|
|
switch (snapshot_action)
|
|
|
|
{
|
|
|
|
case CRS_EXPORT_SNAPSHOT:
|
2017-08-16 05:34:39 +02:00
|
|
|
appendStringInfoString(&cmd, " EXPORT_SNAPSHOT");
|
2017-03-23 13:36:36 +01:00
|
|
|
break;
|
|
|
|
case CRS_NOEXPORT_SNAPSHOT:
|
2017-08-16 05:34:39 +02:00
|
|
|
appendStringInfoString(&cmd, " NOEXPORT_SNAPSHOT");
|
2017-03-23 13:36:36 +01:00
|
|
|
break;
|
|
|
|
case CRS_USE_SNAPSHOT:
|
2017-08-16 05:34:39 +02:00
|
|
|
appendStringInfoString(&cmd, " USE_SNAPSHOT");
|
2017-03-23 13:36:36 +01:00
|
|
|
break;
|
|
|
|
}
|
2017-03-14 22:13:56 +01:00
|
|
|
}
|
2020-01-14 14:07:11 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
appendStringInfoString(&cmd, " PHYSICAL RESERVE_WAL");
|
|
|
|
}
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
res = libpqrcv_PQexec(conn->streamConn, cmd.data);
|
|
|
|
pfree(cmd.data);
|
|
|
|
|
|
|
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
|
|
|
{
|
|
|
|
PQclear(res);
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("could not create replication slot \"%s\": %s",
|
2017-02-27 14:30:06 +01:00
|
|
|
slotname, pchomp(PQerrorMessage(conn->streamConn)))));
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
2020-01-11 09:00:19 +01:00
|
|
|
if (lsn)
|
|
|
|
*lsn = DatumGetLSN(DirectFunctionCall1Coll(pg_lsn_in, InvalidOid,
|
|
|
|
CStringGetDatum(PQgetvalue(res, 0, 1))));
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
if (!PQgetisnull(res, 0, 2))
|
|
|
|
snapshot = pstrdup(PQgetvalue(res, 0, 2));
|
|
|
|
else
|
|
|
|
snapshot = NULL;
|
|
|
|
|
|
|
|
PQclear(res);
|
|
|
|
|
|
|
|
return snapshot;
|
|
|
|
}
|
|
|
|
|
2020-01-14 14:05:25 +01:00
|
|
|
/*
|
|
|
|
* Return PID of remote backend process.
|
|
|
|
*/
|
|
|
|
static pid_t
|
|
|
|
libpqrcv_get_backend_pid(WalReceiverConn *conn)
|
|
|
|
{
|
|
|
|
return PQbackendPID(conn->streamConn);
|
|
|
|
}
|
|
|
|
|
2017-01-19 18:00:00 +01:00
|
|
|
/*
|
2017-03-23 13:36:36 +01:00
|
|
|
* Convert tuple query result to tuplestore.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
libpqrcv_processTuples(PGresult *pgres, WalRcvExecResult *walres,
|
2017-05-17 22:31:56 +02:00
|
|
|
const int nRetTypes, const Oid *retTypes)
|
2017-03-23 13:36:36 +01:00
|
|
|
{
|
2017-05-17 22:31:56 +02:00
|
|
|
int tupn;
|
|
|
|
int coln;
|
|
|
|
int nfields = PQnfields(pgres);
|
|
|
|
HeapTuple tuple;
|
|
|
|
AttInMetadata *attinmeta;
|
|
|
|
MemoryContext rowcontext;
|
|
|
|
MemoryContext oldcontext;
|
2017-03-23 13:36:36 +01:00
|
|
|
|
|
|
|
/* Make sure we got expected number of fields. */
|
|
|
|
if (nfields != nRetTypes)
|
|
|
|
ereport(ERROR,
|
2017-06-05 10:38:26 +02:00
|
|
|
(errmsg("invalid query response"),
|
2017-03-23 13:36:36 +01:00
|
|
|
errdetail("Expected %d fields, got %d fields.",
|
|
|
|
nRetTypes, nfields)));
|
|
|
|
|
|
|
|
walres->tuplestore = tuplestore_begin_heap(true, false, work_mem);
|
|
|
|
|
|
|
|
/* Create tuple descriptor corresponding to expected result. */
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
|
|
|
walres->tupledesc = CreateTemplateTupleDesc(nRetTypes);
|
2017-03-23 13:36:36 +01:00
|
|
|
for (coln = 0; coln < nRetTypes; coln++)
|
|
|
|
TupleDescInitEntry(walres->tupledesc, (AttrNumber) coln + 1,
|
|
|
|
PQfname(pgres, coln), retTypes[coln], -1, 0);
|
|
|
|
attinmeta = TupleDescGetAttInMetadata(walres->tupledesc);
|
|
|
|
|
2017-03-24 13:41:32 +01:00
|
|
|
/* No point in doing more here if there were no tuples returned. */
|
|
|
|
if (PQntuples(pgres) == 0)
|
|
|
|
return;
|
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
/* Create temporary context for local allocations. */
|
|
|
|
rowcontext = AllocSetContextCreate(CurrentMemoryContext,
|
|
|
|
"libpqrcv query result context",
|
|
|
|
ALLOCSET_DEFAULT_SIZES);
|
|
|
|
|
|
|
|
/* Process returned rows. */
|
|
|
|
for (tupn = 0; tupn < PQntuples(pgres); tupn++)
|
|
|
|
{
|
2017-05-17 22:31:56 +02:00
|
|
|
char *cstrs[MaxTupleAttributeNumber];
|
2017-03-23 13:36:36 +01:00
|
|
|
|
In walreceiver, don't try to do ereport() in a signal handler.
This is quite unsafe, even for the case of ereport(FATAL) where we won't
return control to the interrupted code, and despite this code's use of
a flag to restrict the areas where we'd try to do it. It's possible
for example that we interrupt malloc or free while that's holding a lock
that's meant to protect against cross-thread interference. Then, any
attempt to do malloc or free within ereport() will result in a deadlock,
preventing the walreceiver process from exiting in response to SIGTERM.
We hypothesize that this explains some hard-to-reproduce failures seen
in the buildfarm.
Hence, get rid of the immediate-exit code in WalRcvShutdownHandler,
as well as the logic associated with WalRcvImmediateInterruptOK.
Instead, we need to take care that potentially-blocking operations
in the walreceiver's data transmission logic (libpqwalreceiver.c)
will respond reasonably promptly to the process's latch becoming
set and then call ProcessWalRcvInterrupts. Much of the needed code
for that was already present in libpqwalreceiver.c. I refactored
things a bit so that all the uses of PQgetResult use latch-aware
waiting, but didn't need to do much more.
These changes should be enough to ensure that libpqwalreceiver.c
will respond promptly to SIGTERM whenever it's waiting to receive
data. In principle, it could block for a long time while waiting
to send data too, and this patch does nothing to guard against that.
I think that that hazard is mostly theoretical though: such blocking
should occur only if we fill the kernel's data transmission buffers,
and we don't generally send enough data to make that happen without
waiting for input. If we find out that the hazard isn't just
theoretical, we could fix it by using PQsetnonblocking, but that
would require more ticklish changes than I care to make now.
This is a bug fix, but it seems like too big a change to push into
the back branches without much more testing than there's time for
right now. Perhaps we'll back-patch once we have more confidence
in the change.
Patch by me; thanks to Thomas Munro for review.
Discussion: https://postgr.es/m/20190416070119.GK2673@paquier.xyz
2019-04-29 18:26:07 +02:00
|
|
|
ProcessWalRcvInterrupts();
|
2017-03-23 13:36:36 +01:00
|
|
|
|
|
|
|
/* Do the allocations in temporary context. */
|
|
|
|
oldcontext = MemoryContextSwitchTo(rowcontext);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fill cstrs with null-terminated strings of column values.
|
|
|
|
*/
|
|
|
|
for (coln = 0; coln < nfields; coln++)
|
|
|
|
{
|
|
|
|
if (PQgetisnull(pgres, tupn, coln))
|
|
|
|
cstrs[coln] = NULL;
|
|
|
|
else
|
|
|
|
cstrs[coln] = PQgetvalue(pgres, tupn, coln);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Convert row to a tuple, and add it to the tuplestore */
|
|
|
|
tuple = BuildTupleFromCStrings(attinmeta, cstrs);
|
|
|
|
tuplestore_puttuple(walres->tuplestore, tuple);
|
|
|
|
|
|
|
|
/* Clean up */
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
MemoryContextReset(rowcontext);
|
|
|
|
}
|
|
|
|
|
|
|
|
MemoryContextDelete(rowcontext);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Public interface for sending generic queries (and commands).
|
2017-01-19 18:00:00 +01:00
|
|
|
*
|
2017-03-23 13:36:36 +01:00
|
|
|
* This can only be called from process connected to database.
|
2017-01-19 18:00:00 +01:00
|
|
|
*/
|
2017-03-23 13:36:36 +01:00
|
|
|
static WalRcvExecResult *
|
|
|
|
libpqrcv_exec(WalReceiverConn *conn, const char *query,
|
|
|
|
const int nRetTypes, const Oid *retTypes)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
2017-03-23 13:36:36 +01:00
|
|
|
PGresult *pgres = NULL;
|
|
|
|
WalRcvExecResult *walres = palloc0(sizeof(WalRcvExecResult));
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
if (MyDatabaseId == InvalidOid)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errmsg("the query interface requires a database connection")));
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
pgres = libpqrcv_PQexec(conn->streamConn, query);
|
|
|
|
|
|
|
|
switch (PQresultStatus(pgres))
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
2017-03-23 13:36:36 +01:00
|
|
|
case PGRES_SINGLE_TUPLE:
|
|
|
|
case PGRES_TUPLES_OK:
|
|
|
|
walres->status = WALRCV_OK_TUPLES;
|
|
|
|
libpqrcv_processTuples(pgres, walres, nRetTypes, retTypes);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PGRES_COPY_IN:
|
|
|
|
walres->status = WALRCV_OK_COPY_IN;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PGRES_COPY_OUT:
|
|
|
|
walres->status = WALRCV_OK_COPY_OUT;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PGRES_COPY_BOTH:
|
|
|
|
walres->status = WALRCV_OK_COPY_BOTH;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PGRES_COMMAND_OK:
|
|
|
|
walres->status = WALRCV_OK_COMMAND;
|
|
|
|
break;
|
|
|
|
|
2017-05-17 22:31:56 +02:00
|
|
|
/* Empty query is considered error. */
|
2017-03-23 13:36:36 +01:00
|
|
|
case PGRES_EMPTY_QUERY:
|
|
|
|
walres->status = WALRCV_ERROR;
|
|
|
|
walres->err = _("empty query");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PGRES_NONFATAL_ERROR:
|
|
|
|
case PGRES_FATAL_ERROR:
|
|
|
|
case PGRES_BAD_RESPONSE:
|
|
|
|
walres->status = WALRCV_ERROR;
|
|
|
|
walres->err = pchomp(PQerrorMessage(conn->streamConn));
|
|
|
|
break;
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
PQclear(pgres);
|
2017-01-19 18:00:00 +01:00
|
|
|
|
2017-03-23 13:36:36 +01:00
|
|
|
return walres;
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Given a List of strings, return it as single comma separated
|
|
|
|
* string, quoting identifiers as needed.
|
|
|
|
*
|
|
|
|
* This is essentially the reverse of SplitIdentifierString.
|
|
|
|
*
|
|
|
|
* The caller should free the result.
|
|
|
|
*/
|
|
|
|
static char *
|
|
|
|
stringlist_to_identifierstr(PGconn *conn, List *strings)
|
|
|
|
{
|
2017-05-17 22:31:56 +02:00
|
|
|
ListCell *lc;
|
2017-01-19 18:00:00 +01:00
|
|
|
StringInfoData res;
|
2017-05-17 22:31:56 +02:00
|
|
|
bool first = true;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
initStringInfo(&res);
|
|
|
|
|
2017-05-17 22:31:56 +02:00
|
|
|
foreach(lc, strings)
|
2017-01-19 18:00:00 +01:00
|
|
|
{
|
2017-05-17 22:31:56 +02:00
|
|
|
char *val = strVal(lfirst(lc));
|
|
|
|
char *val_escaped;
|
2017-01-19 18:00:00 +01:00
|
|
|
|
|
|
|
if (first)
|
|
|
|
first = false;
|
|
|
|
else
|
|
|
|
appendStringInfoChar(&res, ',');
|
|
|
|
|
2017-01-23 17:06:30 +01:00
|
|
|
val_escaped = PQescapeIdentifier(conn, val, strlen(val));
|
|
|
|
if (!val_escaped)
|
|
|
|
{
|
|
|
|
free(res.data);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
appendStringInfoString(&res, val_escaped);
|
|
|
|
PQfreemem(val_escaped);
|
2017-01-19 18:00:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return res.data;
|
|
|
|
}
|