libpq: Try next host if one of them times out.

If one host in a multi-host connection string times out, move on to
the next specified host instead of giving up entirely.

Takayuki Tsunakawa, reviewed by Michael Paquier.  I added
a minor adjustment to the documentation.

Discussion: http://postgr.es/m/0A3221C70F24FB45833433255569204D1F6F42F5@G01JPEXMBYT05
This commit is contained in:
Robert Haas 2017-05-19 16:19:51 -04:00
parent aa41bc794c
commit 5f374fe7a8
3 changed files with 33 additions and 13 deletions

View File

@ -1041,9 +1041,10 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
string). Zero or not specified means wait indefinitely. It is not string). Zero or not specified means wait indefinitely. It is not
recommended to use a timeout of less than 2 seconds. recommended to use a timeout of less than 2 seconds.
This timeout applies separately to each connection attempt. This timeout applies separately to each connection attempt.
For example, if you specify two hosts and both of them are unreachable, For example, if you specify two hosts and <literal>connect_timeout</>
and <literal>connect_timeout</> is 5, the total time spent waiting for a is 5, each host will time out if no connection is made within 5
connection might be up to 10 seconds. seconds, so the total time spent waiting for a connection might be
up to 10 seconds.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>

View File

@ -1720,6 +1720,7 @@ connectDBComplete(PGconn *conn)
{ {
PostgresPollingStatusType flag = PGRES_POLLING_WRITING; PostgresPollingStatusType flag = PGRES_POLLING_WRITING;
time_t finish_time = ((time_t) -1); time_t finish_time = ((time_t) -1);
int timeout = 0;
if (conn == NULL || conn->status == CONNECTION_BAD) if (conn == NULL || conn->status == CONNECTION_BAD)
return 0; return 0;
@ -1729,8 +1730,7 @@ connectDBComplete(PGconn *conn)
*/ */
if (conn->connect_timeout != NULL) if (conn->connect_timeout != NULL)
{ {
int timeout = atoi(conn->connect_timeout); timeout = atoi(conn->connect_timeout);
if (timeout > 0) if (timeout > 0)
{ {
/* /*
@ -1745,6 +1745,8 @@ connectDBComplete(PGconn *conn)
for (;;) for (;;)
{ {
int ret = 0;
/* /*
* Wait, if necessary. Note that the initial state (just after * Wait, if necessary. Note that the initial state (just after
* PQconnectStart) is to wait for the socket to select for writing. * PQconnectStart) is to wait for the socket to select for writing.
@ -1761,7 +1763,8 @@ connectDBComplete(PGconn *conn)
return 1; /* success! */ return 1; /* success! */
case PGRES_POLLING_READING: case PGRES_POLLING_READING:
if (pqWaitTimed(1, 0, conn, finish_time)) ret = pqWaitTimed(1, 0, conn, finish_time);
if (ret == -1)
{ {
conn->status = CONNECTION_BAD; conn->status = CONNECTION_BAD;
return 0; return 0;
@ -1769,7 +1772,8 @@ connectDBComplete(PGconn *conn)
break; break;
case PGRES_POLLING_WRITING: case PGRES_POLLING_WRITING:
if (pqWaitTimed(0, 1, conn, finish_time)) ret = pqWaitTimed(0, 1, conn, finish_time);
if (ret == -1)
{ {
conn->status = CONNECTION_BAD; conn->status = CONNECTION_BAD;
return 0; return 0;
@ -1782,6 +1786,23 @@ connectDBComplete(PGconn *conn)
return 0; return 0;
} }
if (ret == 1) /* connect_timeout elapsed */
{
/* If there are no more hosts, return (the error message is already set) */
if (++conn->whichhost >= conn->nconnhost)
{
conn->whichhost = 0;
conn->status = CONNECTION_BAD;
return 0;
}
/* Attempt connection to the next host, starting the connect_timeout timer */
pqDropConnection(conn, true);
conn->addr_cur = conn->connhost[conn->whichhost].addrlist;
conn->status = CONNECTION_NEEDED;
if (conn->connect_timeout != NULL)
finish_time = time(NULL) + timeout;
}
/* /*
* Now try to advance the state machine. * Now try to advance the state machine.
*/ */

View File

@ -991,11 +991,9 @@ pqWait(int forRead, int forWrite, PGconn *conn)
/* /*
* pqWaitTimed: wait, but not past finish_time. * pqWaitTimed: wait, but not past finish_time.
* *
* If finish_time is exceeded then we return failure (EOF). This is like
* the response for a kernel exception because we don't want the caller
* to try to read/write in that case.
*
* finish_time = ((time_t) -1) disables the wait limit. * finish_time = ((time_t) -1) disables the wait limit.
*
* Returns -1 on failure, 0 if the socket is readable/writable, 1 if it timed out.
*/ */
int int
pqWaitTimed(int forRead, int forWrite, PGconn *conn, time_t finish_time) pqWaitTimed(int forRead, int forWrite, PGconn *conn, time_t finish_time)
@ -1005,13 +1003,13 @@ pqWaitTimed(int forRead, int forWrite, PGconn *conn, time_t finish_time)
result = pqSocketCheck(conn, forRead, forWrite, finish_time); result = pqSocketCheck(conn, forRead, forWrite, finish_time);
if (result < 0) if (result < 0)
return EOF; /* errorMessage is already set */ return -1; /* errorMessage is already set */
if (result == 0) if (result == 0)
{ {
printfPQExpBuffer(&conn->errorMessage, printfPQExpBuffer(&conn->errorMessage,
libpq_gettext("timeout expired\n")); libpq_gettext("timeout expired\n"));
return EOF; return 1;
} }
return 0; return 0;