Restructure libpq's handling of send failures.

Originally, if libpq got a failure (e.g., ECONNRESET) while trying to
send data to the server, it would just report that and wash its hands
of the matter.  It was soon found that that wasn't a very pleasant way
of coping with server-initiated disconnections, so we introduced a hack
(pqHandleSendFailure) in the code that sends queries to make it peek
ahead for server error reports before reporting the send failure.

It now emerges that related cases can occur during connection setup;
in particular, as of TLS 1.3 it's unsafe to assume that SSL connection
failures will be reported by SSL_connect rather than during our first
send attempt.  We could have fixed that in a hacky way by applying
pqHandleSendFailure after a startup packet send failure, but
(a) pqHandleSendFailure explicitly disclaims suitability for use in any
state except query startup, and (b) the problem still potentially exists
for other send attempts in libpq.

Instead, let's fix this in a more general fashion by eliminating
pqHandleSendFailure altogether, and instead arranging to postpone
all reports of send failures in libpq until after we've made an
attempt to read and process server messages.  The send failure won't
be reported at all if we find a server message or detect input EOF.

(Note: this removes one of the reasons why libpq typically overwrites,
rather than appending to, conn->errorMessage: pqHandleSendFailure needed
that behavior so that the send failure report would be replaced if we
got a server message or read failure report.  Eventually I'd like to get
rid of that overwrite behavior altogether, but today is not that day.
For the moment, pqSendSome is assuming that its callees will overwrite
not append to conn->errorMessage.)

Possibly this change should get back-patched someday; but it needs
testing first, so let's not consider that till after v12 beta.

Discussion: https://postgr.es/m/CAEepm=2n6Nv+5tFfe8YnkUm1fXgvxR0Mm1FoD+QKG-vLNGLyKg@mail.gmail.com
This commit is contained in:
Tom Lane 2019-03-19 16:20:20 -04:00
parent 5e28b778bf
commit 1f39a1c064
6 changed files with 109 additions and 75 deletions

View File

@ -537,6 +537,10 @@ pqDropServerData(PGconn *conn)
conn->last_sqlstate[0] = '\0';
conn->auth_req_received = false;
conn->password_needed = false;
conn->write_failed = false;
if (conn->write_err_msg)
free(conn->write_err_msg);
conn->write_err_msg = NULL;
conn->be_pid = 0;
conn->be_key = 0;
}
@ -3702,6 +3706,8 @@ freePGconn(PGconn *conn)
/* Note that conn->Pfdebug is not ours to close or free */
if (conn->last_query)
free(conn->last_query);
if (conn->write_err_msg)
free(conn->write_err_msg);
if (conn->inBuffer)
free(conn->inBuffer);
if (conn->outBuffer)

View File

@ -790,6 +790,32 @@ pqSaveErrorResult(PGconn *conn)
}
}
/*
* As above, and append conn->write_err_msg to whatever other error we have.
* This is used when we've detected a write failure and have exhausted our
* chances of reporting something else instead.
*/
static void
pqSaveWriteError(PGconn *conn)
{
/*
* Ensure conn->result is an error result, and add anything in
* conn->errorMessage to it.
*/
pqSaveErrorResult(conn);
/*
* Now append write_err_msg to that. If it's null because of previous
* strdup failure, do what we can. (It's likely our machinations here are
* all getting OOM failures as well, but ...)
*/
if (conn->write_err_msg && conn->write_err_msg[0] != '\0')
pqCatenateResultError(conn->result, conn->write_err_msg);
else
pqCatenateResultError(conn->result,
libpq_gettext("write to server failed\n"));
}
/*
* This subroutine prepares an async result object for return to the caller.
* If there is not already an async result object, build an error object
@ -1224,7 +1250,7 @@ PQsendQuery(PGconn *conn, const char *query)
pqPuts(query, conn) < 0 ||
pqPutMsgEnd(conn) < 0)
{
pqHandleSendFailure(conn);
/* error message should be set up already */
return 0;
}
@ -1243,7 +1269,7 @@ PQsendQuery(PGconn *conn, const char *query)
*/
if (pqFlush(conn) < 0)
{
pqHandleSendFailure(conn);
/* error message should be set up already */
return 0;
}
@ -1389,7 +1415,7 @@ PQsendPrepare(PGconn *conn,
return 1;
sendFailed:
pqHandleSendFailure(conn);
/* error message should be set up already */
return 0;
}
@ -1641,39 +1667,10 @@ PQsendQueryGuts(PGconn *conn,
return 1;
sendFailed:
pqHandleSendFailure(conn);
/* error message should be set up already */
return 0;
}
/*
* pqHandleSendFailure: try to clean up after failure to send command.
*
* Primarily, what we want to accomplish here is to process any ERROR or
* NOTICE messages that the backend might have sent just before it died.
* Since we're in IDLE state, all such messages will get sent to the notice
* processor.
*
* NOTE: this routine should only be called in PGASYNC_IDLE state.
*/
void
pqHandleSendFailure(PGconn *conn)
{
/*
* Accept and parse any available input data, ignoring I/O errors. Note
* that if pqReadData decides the backend has closed the channel, it will
* close our side of the socket --- that's just what we want here.
*/
while (pqReadData(conn) > 0)
parseInput(conn);
/*
* Be sure to parse available input messages even if we read no data.
* (Note: calling parseInput within the above loop isn't really necessary,
* but it prevents buffer bloat if there's a lot of data available.)
*/
parseInput(conn);
}
/*
* Select row-by-row processing mode
*/
@ -1763,8 +1760,11 @@ PQisBusy(PGconn *conn)
/* Parse any available data, if our state permits. */
parseInput(conn);
/* PQgetResult will return immediately in all states except BUSY. */
return conn->asyncStatus == PGASYNC_BUSY;
/*
* PQgetResult will return immediately in all states except BUSY, or if we
* had a write failure.
*/
return conn->asyncStatus == PGASYNC_BUSY || conn->write_failed;
}
@ -1804,7 +1804,13 @@ PQgetResult(PGconn *conn)
}
}
/* Wait for some more data, and load it. */
/*
* Wait for some more data, and load it. (Note: if the connection has
* been lost, pqWait should return immediately because the socket
* should be read-ready, either with the last server data or with an
* EOF indication. We expect therefore that this won't result in any
* undue delay in reporting a previous write failure.)
*/
if (flushResult ||
pqWait(true, false, conn) ||
pqReadData(conn) < 0)
@ -1820,6 +1826,17 @@ PQgetResult(PGconn *conn)
/* Parse it. */
parseInput(conn);
/*
* If we had a write error, but nothing above obtained a query result
* or detected a read error, report the write error.
*/
if (conn->write_failed && conn->asyncStatus == PGASYNC_BUSY)
{
pqSaveWriteError(conn);
conn->asyncStatus = PGASYNC_IDLE;
return pqPrepareAsyncResult(conn);
}
}
/* Return the appropriate thing. */
@ -2252,7 +2269,7 @@ PQsendDescribe(PGconn *conn, char desc_type, const char *desc_target)
return 1;
sendFailed:
pqHandleSendFailure(conn);
/* error message should be set up already */
return 0;
}

View File

@ -824,6 +824,13 @@ definitelyFailed:
*
* Return 0 on success, -1 on failure and 1 when not all data could be sent
* because the socket would block and the connection is non-blocking.
*
* Upon write failure, conn->write_failed is set and the error message is
* saved in conn->write_err_msg, but we clear the output buffer and return
* zero anyway; this is because callers should soldier on until it's possible
* to read from the server and check for an error message. write_err_msg
* should be reported only when we are unable to obtain a server error first.
* (Thus, a -1 result is returned only for an internal *read* failure.)
*/
static int
pqSendSome(PGconn *conn, int len)
@ -832,13 +839,32 @@ pqSendSome(PGconn *conn, int len)
int remaining = conn->outCount;
int result = 0;
/*
* If we already had a write failure, we will never again try to send data
* on that connection. Even if the kernel would let us, we've probably
* lost message boundary sync with the server. conn->write_failed
* therefore persists until the connection is reset, and we just discard
* all data presented to be written.
*/
if (conn->write_failed)
{
/* conn->write_err_msg should be set up already */
conn->outCount = 0;
return 0;
}
if (conn->sock == PGINVALID_SOCKET)
{
printfPQExpBuffer(&conn->errorMessage,
libpq_gettext("connection not open\n"));
conn->write_failed = true;
/* Transfer error message to conn->write_err_msg, if possible */
/* (strdup failure is OK, we'll cope later) */
conn->write_err_msg = strdup(conn->errorMessage.data);
resetPQExpBuffer(&conn->errorMessage);
/* Discard queued data; no chance it'll ever be sent */
conn->outCount = 0;
return -1;
return 0;
}
/* while there's still data to send */
@ -876,17 +902,24 @@ pqSendSome(PGconn *conn, int len)
default:
/* pqsecure_write set the error message for us */
conn->write_failed = true;
/*
* We used to close the socket here, but that's a bad idea
* since there might be unread data waiting (typically, a
* NOTICE message from the backend telling us it's
* committing hara-kiri...). Leave the socket open until
* pqReadData finds no more data can be read. But abandon
* attempt to send data.
* Transfer error message to conn->write_err_msg, if
* possible (strdup failure is OK, we'll cope later).
*
* Note: this assumes that pqsecure_write and its children
* will overwrite not append to conn->errorMessage. If
* that's ever changed, we could remember the length of
* conn->errorMessage at entry to this routine, and then
* save and delete just what was appended.
*/
conn->write_err_msg = strdup(conn->errorMessage.data);
resetPQExpBuffer(&conn->errorMessage);
/* Discard queued data; no chance it'll ever be sent */
conn->outCount = 0;
return -1;
return 0;
}
}
else
@ -921,6 +954,9 @@ pqSendSome(PGconn *conn, int len)
* can do, and works pretty well in practice. (The documentation
* used to say that you only need to wait for write-ready, so
* there are still plenty of applications like that out there.)
*
* Note that errors here don't result in write_failed becoming
* set.
*/
if (pqReadData(conn) < 0)
{
@ -956,6 +992,7 @@ pqSendSome(PGconn *conn, int len)
*
* Return 0 on success, -1 on failure and 1 when not all data could be sent
* because the socket would block and the connection is non-blocking.
* (See pqSendSome comments about how failure should be handled.)
*/
int
pqFlush(PGconn *conn)

View File

@ -1450,42 +1450,30 @@ pqFunctionCall2(PGconn *conn, Oid fnid,
pqPutInt(fnid, 4, conn) != 0 || /* function id */
pqPutInt(nargs, 4, conn) != 0) /* # of args */
{
pqHandleSendFailure(conn);
/* error message should be set up already */
return NULL;
}
for (i = 0; i < nargs; ++i)
{ /* len.int4 + contents */
if (pqPutInt(args[i].len, 4, conn))
{
pqHandleSendFailure(conn);
return NULL;
}
if (args[i].isint)
{
if (pqPutInt(args[i].u.integer, 4, conn))
{
pqHandleSendFailure(conn);
return NULL;
}
}
else
{
if (pqPutnchar((char *) args[i].u.ptr, args[i].len, conn))
{
pqHandleSendFailure(conn);
return NULL;
}
}
}
if (pqPutMsgEnd(conn) < 0 ||
pqFlush(conn))
{
pqHandleSendFailure(conn);
return NULL;
}
for (;;)
{

View File

@ -1926,50 +1926,35 @@ pqFunctionCall3(PGconn *conn, Oid fnid,
pqPutInt(1, 2, conn) < 0 || /* format code: BINARY */
pqPutInt(nargs, 2, conn) < 0) /* # of args */
{
pqHandleSendFailure(conn);
/* error message should be set up already */
return NULL;
}
for (i = 0; i < nargs; ++i)
{ /* len.int4 + contents */
if (pqPutInt(args[i].len, 4, conn))
{
pqHandleSendFailure(conn);
return NULL;
}
if (args[i].len == -1)
continue; /* it's NULL */
if (args[i].isint)
{
if (pqPutInt(args[i].u.integer, args[i].len, conn))
{
pqHandleSendFailure(conn);
return NULL;
}
}
else
{
if (pqPutnchar((char *) args[i].u.ptr, args[i].len, conn))
{
pqHandleSendFailure(conn);
return NULL;
}
}
}
if (pqPutInt(1, 2, conn) < 0) /* result format code: BINARY */
{
pqHandleSendFailure(conn);
return NULL;
}
if (pqPutMsgEnd(conn) < 0 ||
pqFlush(conn))
{
pqHandleSendFailure(conn);
return NULL;
}
for (;;)
{

View File

@ -410,6 +410,8 @@ struct pg_conn
bool password_needed; /* true if server demanded a password */
bool sigpipe_so; /* have we masked SIGPIPE via SO_NOSIGPIPE? */
bool sigpipe_flag; /* can we mask SIGPIPE via MSG_NOSIGNAL? */
bool write_failed; /* have we had a write failure on sock? */
char *write_err_msg; /* write error message, or NULL if OOM */
/* Transient state needed while establishing connection */
bool try_next_addr; /* time to advance to next address/host? */
@ -585,7 +587,6 @@ extern void pqSaveMessageField(PGresult *res, char code,
extern void pqSaveParameterStatus(PGconn *conn, const char *name,
const char *value);
extern int pqRowProcessor(PGconn *conn, const char **errmsgp);
extern void pqHandleSendFailure(PGconn *conn);
/* === in fe-protocol2.c === */