Copy and store addrinfo in libpq-owned private memory

This refactors libpq to copy addrinfos returned by getaddrinfo to
memory owned by libpq such that future improvements can alter for
example the order of entries.

As a nice side effect of this refactor the mechanism for iteration
over addresses in PQconnectPoll is now identical to its iteration
over hosts.

Author: Jelte Fennema <postgres@jeltef.nl>
Reviewed-by: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-by: Michael Banck <mbanck@gmx.net>
Reviewed-by: Andrey Borodin <amborodin86@gmail.com>
Discussion: https://postgr.es/m/PR3PR83MB04768E2FF04818EEB2179949F7A69@PR3PR83MB0476.EURPRD83.prod.outlook.com
This commit is contained in:
Daniel Gustafsson 2023-03-29 21:41:27 +02:00
parent 8e5eef50c5
commit 44d85ba5a3
4 changed files with 92 additions and 34 deletions

View File

@ -27,6 +27,12 @@ typedef struct
socklen_t salen; socklen_t salen;
} SockAddr; } SockAddr;
typedef struct
{
int family;
SockAddr addr;
} AddrInfo;
/* Configure the UNIX socket location for the well known port. */ /* Configure the UNIX socket location for the well known port. */
#define UNIXSOCK_PATH(path, port, sockdir) \ #define UNIXSOCK_PATH(path, port, sockdir) \

View File

@ -389,6 +389,7 @@ static bool fillPGconn(PGconn *conn, PQconninfoOption *connOptions);
static void freePGconn(PGconn *conn); static void freePGconn(PGconn *conn);
static void closePGconn(PGconn *conn); static void closePGconn(PGconn *conn);
static void release_conn_addrinfo(PGconn *conn); static void release_conn_addrinfo(PGconn *conn);
static int store_conn_addrinfo(PGconn *conn, struct addrinfo *addrlist);
static void sendTerminateConn(PGconn *conn); static void sendTerminateConn(PGconn *conn);
static PQconninfoOption *conninfo_init(PQExpBuffer errorMessage); static PQconninfoOption *conninfo_init(PQExpBuffer errorMessage);
static PQconninfoOption *parse_connection_string(const char *connstr, static PQconninfoOption *parse_connection_string(const char *connstr,
@ -2295,7 +2296,7 @@ connectDBComplete(PGconn *conn)
time_t finish_time = ((time_t) -1); time_t finish_time = ((time_t) -1);
int timeout = 0; int timeout = 0;
int last_whichhost = -2; /* certainly different from whichhost */ int last_whichhost = -2; /* certainly different from whichhost */
struct addrinfo *last_addr_cur = NULL; int last_whichaddr = -2; /* certainly different from whichaddr */
if (conn == NULL || conn->status == CONNECTION_BAD) if (conn == NULL || conn->status == CONNECTION_BAD)
return 0; return 0;
@ -2339,11 +2340,11 @@ connectDBComplete(PGconn *conn)
if (flag != PGRES_POLLING_OK && if (flag != PGRES_POLLING_OK &&
timeout > 0 && timeout > 0 &&
(conn->whichhost != last_whichhost || (conn->whichhost != last_whichhost ||
conn->addr_cur != last_addr_cur)) conn->whichaddr != last_whichaddr))
{ {
finish_time = time(NULL) + timeout; finish_time = time(NULL) + timeout;
last_whichhost = conn->whichhost; last_whichhost = conn->whichhost;
last_addr_cur = conn->addr_cur; last_whichaddr = conn->whichaddr;
} }
/* /*
@ -2490,9 +2491,9 @@ keep_going: /* We will come back to here until there is
/* Time to advance to next address, or next host if no more addresses? */ /* Time to advance to next address, or next host if no more addresses? */
if (conn->try_next_addr) if (conn->try_next_addr)
{ {
if (conn->addr_cur && conn->addr_cur->ai_next) if (conn->whichaddr < conn->naddr)
{ {
conn->addr_cur = conn->addr_cur->ai_next; conn->whichaddr++;
reset_connection_state_machine = true; reset_connection_state_machine = true;
} }
else else
@ -2505,6 +2506,7 @@ keep_going: /* We will come back to here until there is
{ {
pg_conn_host *ch; pg_conn_host *ch;
struct addrinfo hint; struct addrinfo hint;
struct addrinfo *addrlist;
int thisport; int thisport;
int ret; int ret;
char portstr[MAXPGPATH]; char portstr[MAXPGPATH];
@ -2545,7 +2547,7 @@ keep_going: /* We will come back to here until there is
/* Initialize hint structure */ /* Initialize hint structure */
MemSet(&hint, 0, sizeof(hint)); MemSet(&hint, 0, sizeof(hint));
hint.ai_socktype = SOCK_STREAM; hint.ai_socktype = SOCK_STREAM;
conn->addrlist_family = hint.ai_family = AF_UNSPEC; hint.ai_family = AF_UNSPEC;
/* Figure out the port number we're going to use. */ /* Figure out the port number we're going to use. */
if (ch->port == NULL || ch->port[0] == '\0') if (ch->port == NULL || ch->port[0] == '\0')
@ -2568,8 +2570,8 @@ keep_going: /* We will come back to here until there is
{ {
case CHT_HOST_NAME: case CHT_HOST_NAME:
ret = pg_getaddrinfo_all(ch->host, portstr, &hint, ret = pg_getaddrinfo_all(ch->host, portstr, &hint,
&conn->addrlist); &addrlist);
if (ret || !conn->addrlist) if (ret || !addrlist)
{ {
libpq_append_conn_error(conn, "could not translate host name \"%s\" to address: %s", libpq_append_conn_error(conn, "could not translate host name \"%s\" to address: %s",
ch->host, gai_strerror(ret)); ch->host, gai_strerror(ret));
@ -2580,8 +2582,8 @@ keep_going: /* We will come back to here until there is
case CHT_HOST_ADDRESS: case CHT_HOST_ADDRESS:
hint.ai_flags = AI_NUMERICHOST; hint.ai_flags = AI_NUMERICHOST;
ret = pg_getaddrinfo_all(ch->hostaddr, portstr, &hint, ret = pg_getaddrinfo_all(ch->hostaddr, portstr, &hint,
&conn->addrlist); &addrlist);
if (ret || !conn->addrlist) if (ret || !addrlist)
{ {
libpq_append_conn_error(conn, "could not parse network address \"%s\": %s", libpq_append_conn_error(conn, "could not parse network address \"%s\": %s",
ch->hostaddr, gai_strerror(ret)); ch->hostaddr, gai_strerror(ret));
@ -2590,7 +2592,7 @@ keep_going: /* We will come back to here until there is
break; break;
case CHT_UNIX_SOCKET: case CHT_UNIX_SOCKET:
conn->addrlist_family = hint.ai_family = AF_UNIX; hint.ai_family = AF_UNIX;
UNIXSOCK_PATH(portstr, thisport, ch->host); UNIXSOCK_PATH(portstr, thisport, ch->host);
if (strlen(portstr) >= UNIXSOCK_PATH_BUFLEN) if (strlen(portstr) >= UNIXSOCK_PATH_BUFLEN)
{ {
@ -2605,8 +2607,8 @@ keep_going: /* We will come back to here until there is
* name as a Unix-domain socket path. * name as a Unix-domain socket path.
*/ */
ret = pg_getaddrinfo_all(NULL, portstr, &hint, ret = pg_getaddrinfo_all(NULL, portstr, &hint,
&conn->addrlist); &addrlist);
if (ret || !conn->addrlist) if (ret || !addrlist)
{ {
libpq_append_conn_error(conn, "could not translate Unix-domain socket path \"%s\" to address: %s", libpq_append_conn_error(conn, "could not translate Unix-domain socket path \"%s\" to address: %s",
portstr, gai_strerror(ret)); portstr, gai_strerror(ret));
@ -2615,8 +2617,15 @@ keep_going: /* We will come back to here until there is
break; break;
} }
/* OK, scan this addrlist for a working server address */ /*
conn->addr_cur = conn->addrlist; * Store a copy of the addrlist in private memory so we can perform
* randomization for load balancing.
*/
ret = store_conn_addrinfo(conn, addrlist);
pg_freeaddrinfo_all(hint.ai_family, addrlist);
if (ret)
goto error_return; /* message already logged */
reset_connection_state_machine = true; reset_connection_state_machine = true;
conn->try_next_host = false; conn->try_next_host = false;
} }
@ -2673,31 +2682,30 @@ keep_going: /* We will come back to here until there is
{ {
/* /*
* Try to initiate a connection to one of the addresses * Try to initiate a connection to one of the addresses
* returned by pg_getaddrinfo_all(). conn->addr_cur is the * returned by pg_getaddrinfo_all(). conn->whichaddr is the
* next one to try. * next one to try.
* *
* The extra level of braces here is historical. It's not * The extra level of braces here is historical. It's not
* worth reindenting this whole switch case to remove 'em. * worth reindenting this whole switch case to remove 'em.
*/ */
{ {
struct addrinfo *addr_cur = conn->addr_cur;
char host_addr[NI_MAXHOST]; char host_addr[NI_MAXHOST];
int sock_type; int sock_type;
AddrInfo *addr_cur;
/* /*
* Advance to next possible host, if we've tried all of * Advance to next possible host, if we've tried all of
* the addresses for the current host. * the addresses for the current host.
*/ */
if (addr_cur == NULL) if (conn->whichaddr == conn->naddr)
{ {
conn->try_next_host = true; conn->try_next_host = true;
goto keep_going; goto keep_going;
} }
addr_cur = &conn->addr[conn->whichaddr];
/* Remember current address for possible use later */ /* Remember current address for possible use later */
memcpy(&conn->raddr.addr, addr_cur->ai_addr, memcpy(&conn->raddr, &addr_cur->addr, sizeof(SockAddr));
addr_cur->ai_addrlen);
conn->raddr.salen = addr_cur->ai_addrlen;
/* /*
* Set connip, too. Note we purposely ignore strdup * Set connip, too. Note we purposely ignore strdup
@ -2732,7 +2740,7 @@ keep_going: /* We will come back to here until there is
*/ */
sock_type |= SOCK_NONBLOCK; sock_type |= SOCK_NONBLOCK;
#endif #endif
conn->sock = socket(addr_cur->ai_family, sock_type, 0); conn->sock = socket(addr_cur->family, sock_type, 0);
if (conn->sock == PGINVALID_SOCKET) if (conn->sock == PGINVALID_SOCKET)
{ {
int errorno = SOCK_ERRNO; int errorno = SOCK_ERRNO;
@ -2743,7 +2751,7 @@ keep_going: /* We will come back to here until there is
* cases where the address list includes both IPv4 and * cases where the address list includes both IPv4 and
* IPv6 but kernel only accepts one family. * IPv6 but kernel only accepts one family.
*/ */
if (addr_cur->ai_next != NULL || if (conn->whichaddr < conn->naddr ||
conn->whichhost + 1 < conn->nconnhost) conn->whichhost + 1 < conn->nconnhost)
{ {
conn->try_next_addr = true; conn->try_next_addr = true;
@ -2769,7 +2777,7 @@ keep_going: /* We will come back to here until there is
* TCP sockets, nonblock mode, close-on-exec. Try the * TCP sockets, nonblock mode, close-on-exec. Try the
* next address if any of this fails. * next address if any of this fails.
*/ */
if (addr_cur->ai_family != AF_UNIX) if (addr_cur->family != AF_UNIX)
{ {
if (!connectNoDelay(conn)) if (!connectNoDelay(conn))
{ {
@ -2800,7 +2808,7 @@ keep_going: /* We will come back to here until there is
#endif /* F_SETFD */ #endif /* F_SETFD */
#endif #endif
if (addr_cur->ai_family != AF_UNIX) if (addr_cur->family != AF_UNIX)
{ {
#ifndef WIN32 #ifndef WIN32
int on = 1; int on = 1;
@ -2892,8 +2900,8 @@ keep_going: /* We will come back to here until there is
* Start/make connection. This should not block, since we * Start/make connection. This should not block, since we
* are in nonblock mode. If it does, well, too bad. * are in nonblock mode. If it does, well, too bad.
*/ */
if (connect(conn->sock, addr_cur->ai_addr, if (connect(conn->sock, (struct sockaddr *) &addr_cur->addr.addr,
addr_cur->ai_addrlen) < 0) addr_cur->addr.salen) < 0)
{ {
if (SOCK_ERRNO == EINPROGRESS || if (SOCK_ERRNO == EINPROGRESS ||
#ifdef WIN32 #ifdef WIN32
@ -4318,6 +4326,49 @@ freePGconn(PGconn *conn)
free(conn); free(conn);
} }
/*
* store_conn_addrinfo
* - copy addrinfo to PGconn object
*
* Copies the addrinfos from addrlist to the PGconn object such that the
* addrinfos can be manipulated by libpq. Returns a positive integer on
* failure, otherwise zero.
*/
static int
store_conn_addrinfo(PGconn *conn, struct addrinfo *addrlist)
{
struct addrinfo *ai = addrlist;
conn->whichaddr = 0;
conn->naddr = 0;
while (ai)
{
ai = ai->ai_next;
conn->naddr++;
}
conn->addr = calloc(conn->naddr, sizeof(AddrInfo));
if (conn->addr == NULL)
{
libpq_append_conn_error(conn, "out of memory");
return 1;
}
ai = addrlist;
for (int i = 0; i < conn->naddr; i++)
{
conn->addr[i].family = ai->ai_family;
memcpy(&conn->addr[i].addr.addr, ai->ai_addr,
ai->ai_addrlen);
conn->addr[i].addr.salen = ai->ai_addrlen;
ai = ai->ai_next;
}
return 0;
}
/* /*
* release_conn_addrinfo * release_conn_addrinfo
* - Free any addrinfo list in the PGconn. * - Free any addrinfo list in the PGconn.
@ -4325,11 +4376,10 @@ freePGconn(PGconn *conn)
static void static void
release_conn_addrinfo(PGconn *conn) release_conn_addrinfo(PGconn *conn)
{ {
if (conn->addrlist) if (conn->addr)
{ {
pg_freeaddrinfo_all(conn->addrlist_family, conn->addrlist); free(conn->addr);
conn->addrlist = NULL; conn->addr = NULL;
conn->addr_cur = NULL; /* for safety */
} }
} }

View File

@ -471,9 +471,10 @@ struct pg_conn
PGTargetServerType target_server_type; /* desired session properties */ PGTargetServerType target_server_type; /* desired session properties */
bool try_next_addr; /* time to advance to next address/host? */ bool try_next_addr; /* time to advance to next address/host? */
bool try_next_host; /* time to advance to next connhost[]? */ bool try_next_host; /* time to advance to next connhost[]? */
struct addrinfo *addrlist; /* list of addresses for current connhost */ int naddr; /* number of addresses returned by getaddrinfo */
struct addrinfo *addr_cur; /* the one currently being tried */ int whichaddr; /* the address currently being tried */
int addrlist_family; /* needed to know how to free addrlist */ AddrInfo *addr; /* the array of addresses for the currently
* tried host */
bool send_appname; /* okay to send application_name? */ bool send_appname; /* okay to send application_name? */
/* Miscellaneous stuff */ /* Miscellaneous stuff */

View File

@ -26,6 +26,7 @@ AcquireSampleRowsFunc
ActionList ActionList
ActiveSnapshotElt ActiveSnapshotElt
AddForeignUpdateTargets_function AddForeignUpdateTargets_function
AddrInfo
AffixNode AffixNode
AffixNodeData AffixNodeData
AfterTriggerEvent AfterTriggerEvent