libpq: Try next host if one of them times out.

If one host in a multi-host connection string times out, move on to
the next specified host instead of giving up entirely.

Takayuki Tsunakawa, reviewed by Michael Paquier.  I added
a minor adjustment to the documentation.

Discussion: http://postgr.es/m/0A3221C70F24FB45833433255569204D1F6F42F5@G01JPEXMBYT05
This commit is contained in:
Robert Haas 2017-05-19 16:19:51 -04:00
parent aa41bc794c
commit 5f374fe7a8
3 changed files with 33 additions and 13 deletions

View File

@ -1041,9 +1041,10 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
string). Zero or not specified means wait indefinitely. It is not
recommended to use a timeout of less than 2 seconds.
This timeout applies separately to each connection attempt.
For example, if you specify two hosts and both of them are unreachable,
and <literal>connect_timeout</> is 5, the total time spent waiting for a
connection might be up to 10 seconds.
For example, if you specify two hosts and <literal>connect_timeout</>
is 5, each host will time out if no connection is made within 5
seconds, so the total time spent waiting for a connection might be
up to 10 seconds.
</para>
</listitem>
</varlistentry>

View File

@ -1720,6 +1720,7 @@ connectDBComplete(PGconn *conn)
{
PostgresPollingStatusType flag = PGRES_POLLING_WRITING;
time_t finish_time = ((time_t) -1);
int timeout = 0;
if (conn == NULL || conn->status == CONNECTION_BAD)
return 0;
@ -1729,8 +1730,7 @@ connectDBComplete(PGconn *conn)
*/
if (conn->connect_timeout != NULL)
{
int timeout = atoi(conn->connect_timeout);
timeout = atoi(conn->connect_timeout);
if (timeout > 0)
{
/*
@ -1745,6 +1745,8 @@ connectDBComplete(PGconn *conn)
for (;;)
{
int ret = 0;
/*
* Wait, if necessary. Note that the initial state (just after
* PQconnectStart) is to wait for the socket to select for writing.
@ -1761,7 +1763,8 @@ connectDBComplete(PGconn *conn)
return 1; /* success! */
case PGRES_POLLING_READING:
if (pqWaitTimed(1, 0, conn, finish_time))
ret = pqWaitTimed(1, 0, conn, finish_time);
if (ret == -1)
{
conn->status = CONNECTION_BAD;
return 0;
@ -1769,7 +1772,8 @@ connectDBComplete(PGconn *conn)
break;
case PGRES_POLLING_WRITING:
if (pqWaitTimed(0, 1, conn, finish_time))
ret = pqWaitTimed(0, 1, conn, finish_time);
if (ret == -1)
{
conn->status = CONNECTION_BAD;
return 0;
@ -1782,6 +1786,23 @@ connectDBComplete(PGconn *conn)
return 0;
}
if (ret == 1) /* connect_timeout elapsed */
{
/* If there are no more hosts, return (the error message is already set) */
if (++conn->whichhost >= conn->nconnhost)
{
conn->whichhost = 0;
conn->status = CONNECTION_BAD;
return 0;
}
/* Attempt connection to the next host, starting the connect_timeout timer */
pqDropConnection(conn, true);
conn->addr_cur = conn->connhost[conn->whichhost].addrlist;
conn->status = CONNECTION_NEEDED;
if (conn->connect_timeout != NULL)
finish_time = time(NULL) + timeout;
}
/*
* Now try to advance the state machine.
*/

View File

@ -991,11 +991,9 @@ pqWait(int forRead, int forWrite, PGconn *conn)
/*
* pqWaitTimed: wait, but not past finish_time.
*
* If finish_time is exceeded then we return failure (EOF). This is like
* the response for a kernel exception because we don't want the caller
* to try to read/write in that case.
*
* finish_time = ((time_t) -1) disables the wait limit.
*
* Returns -1 on failure, 0 if the socket is readable/writable, 1 if it timed out.
*/
int
pqWaitTimed(int forRead, int forWrite, PGconn *conn, time_t finish_time)
@ -1005,13 +1003,13 @@ pqWaitTimed(int forRead, int forWrite, PGconn *conn, time_t finish_time)
result = pqSocketCheck(conn, forRead, forWrite, finish_time);
if (result < 0)
return EOF; /* errorMessage is already set */
return -1; /* errorMessage is already set */
if (result == 0)
{
printfPQExpBuffer(&conn->errorMessage,
libpq_gettext("timeout expired\n"));
return EOF;
return 1;
}
return 0;