From 4bad60e3fd9a5fc6070fd4d1bd820a280e174654 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Tue, 13 Jan 2015 12:58:43 +0100 Subject: [PATCH] Allow latches to wait for socket writability without waiting for readability. So far WaitLatchOrSocket() required to pass in WL_SOCKET_READABLE as that solely was used to indicate error conditions, like EOF. Waiting for WL_SOCKET_WRITEABLE would have meant to busy wait upon socket errors. Adjust the API to signal errors by returning the socket as readable, writable or both, depending on WL_SOCKET_READABLE/WL_SOCKET_WRITEABLE being specified. It would arguably be nicer to return WL_SOCKET_ERROR but that's not possible on platforms and would probably also result in more complex callsites. This previously had explicitly been forbidden in e42a21b9e6c9, as there was no strong use case at that point. We now are looking into making FE/BE communication use latches, so changing this makes sense. There also are some portability concerns because there cases of older platforms where select(2) is known to, in violation of POSIX, not return a socket as writable after the peer has closed it. So far the platforms where that's the case provide a working poll(2). If we find one where that's not the case, we'll need to add a workaround for that platform. Discussion: 20140927191243.GD5423@alap3.anarazel.de Reviewed-By: Heikki Linnakangas, Noah Misch --- src/backend/port/unix_latch.c | 31 ++++++++++++++++++++++++------- src/backend/port/win32_latch.c | 15 ++++++++++----- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/backend/port/unix_latch.c b/src/backend/port/unix_latch.c index ecf01bd22b..92ae780158 100644 --- a/src/backend/port/unix_latch.c +++ b/src/backend/port/unix_latch.c @@ -200,9 +200,9 @@ WaitLatch(volatile Latch *latch, int wakeEvents, long timeout) * Like WaitLatch, but with an extra socket argument for WL_SOCKET_* * conditions. * - * When waiting on a socket, WL_SOCKET_READABLE *must* be included in - * 'wakeEvents'; WL_SOCKET_WRITEABLE is optional. The reason for this is - * that EOF and error conditions are reported only via WL_SOCKET_READABLE. + * When waiting on a socket, EOF and error conditions are reported by + * returning the socket as readable/writable or both, depending on + * WL_SOCKET_READABLE/WL_SOCKET_WRITEABLE being specified. */ int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, @@ -230,8 +230,6 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE); Assert(wakeEvents != 0); /* must have at least one wake event */ - /* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */ - Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE); if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid) elog(ERROR, "cannot wait on a latch owned by another process"); @@ -291,7 +289,16 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, break; } - /* Must wait ... we use poll(2) if available, otherwise select(2) */ + /* + * Must wait ... we use poll(2) if available, otherwise select(2). + * + * On at least older linux kernels select(), in violation of POSIX, + * doesn't reliably return a socket as writable if closed - but we + * rely on that. So far all the known cases of this problem are on + * platforms that also provide a poll() implementation without that + * bug. If we find one where that's not the case, we'll need to add a + * workaround. + */ #ifdef HAVE_POLL nfds = 0; if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) @@ -346,7 +353,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, { /* at least one event occurred, so check revents values */ if ((wakeEvents & WL_SOCKET_READABLE) && - (pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL))) + (pfds[0].revents & POLLIN)) { /* data available in socket, or EOF/error condition */ result |= WL_SOCKET_READABLE; @@ -354,8 +361,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, if ((wakeEvents & WL_SOCKET_WRITEABLE) && (pfds[0].revents & POLLOUT)) { + /* socket is writable */ result |= WL_SOCKET_WRITEABLE; } + if (pfds[0].revents & (POLLHUP | POLLERR | POLLNVAL)) + { + /* EOF/error condition */ + if (wakeEvents & WL_SOCKET_READABLE) + result |= WL_SOCKET_READABLE; + if (wakeEvents & WL_SOCKET_WRITEABLE) + result |= WL_SOCKET_WRITEABLE; + } /* * We expect a POLLHUP when the remote end is closed, but because @@ -439,6 +455,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, } if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask)) { + /* socket is writable, or EOF */ result |= WL_SOCKET_WRITEABLE; } if ((wakeEvents & WL_POSTMASTER_DEATH) && diff --git a/src/backend/port/win32_latch.c b/src/backend/port/win32_latch.c index 112e60ed59..da0657c915 100644 --- a/src/backend/port/win32_latch.c +++ b/src/backend/port/win32_latch.c @@ -117,8 +117,6 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE); Assert(wakeEvents != 0); /* must have at least one wake event */ - /* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */ - Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE); if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid) elog(ERROR, "cannot wait on a latch owned by another process"); @@ -152,10 +150,10 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) { /* Need an event object to represent events on the socket */ - int flags = 0; + int flags = FD_CLOSE; /* always check for errors/EOF */ if (wakeEvents & WL_SOCKET_READABLE) - flags |= (FD_READ | FD_CLOSE); + flags |= FD_READ; if (wakeEvents & WL_SOCKET_WRITEABLE) flags |= FD_WRITE; @@ -232,7 +230,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, elog(ERROR, "failed to enumerate network events: error code %u", WSAGetLastError()); if ((wakeEvents & WL_SOCKET_READABLE) && - (resEvents.lNetworkEvents & (FD_READ | FD_CLOSE))) + (resEvents.lNetworkEvents & FD_READ)) { result |= WL_SOCKET_READABLE; } @@ -241,6 +239,13 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, { result |= WL_SOCKET_WRITEABLE; } + if (resEvents.lNetworkEvents & FD_CLOSE) + { + if (wakeEvents & WL_SOCKET_READABLE) + result |= WL_SOCKET_READABLE; + if (wakeEvents & WL_SOCKET_WRITEABLE) + result |= WL_SOCKET_WRITEABLE; + } } else if ((wakeEvents & WL_POSTMASTER_DEATH) && rc == WAIT_OBJECT_0 + pmdeath_eventno)