Allow latches to wait for socket writability without waiting for readability.

So far WaitLatchOrSocket() required to pass in WL_SOCKET_READABLE as that solely was used to indicate error conditions, like EOF. Waiting for WL_SOCKET_WRITEABLE would have meant to busy wait upon socket errors. Adjust the API to signal errors by returning the socket as readable, writable or both, depending on WL_SOCKET_READABLE/WL_SOCKET_WRITEABLE being specified. It would arguably be nicer to return WL_SOCKET_ERROR but that's not possible on platforms and would probably also result in more complex callsites. This previously had explicitly been forbidden in e42a21b9e6, as there was no strong use case at that point. We now are looking into making FE/BE communication use latches, so changing this makes sense. There also are some portability concerns because there cases of older platforms where select(2) is known to, in violation of POSIX, not return a socket as writable after the peer has closed it. So far the platforms where that's the case provide a working poll(2). If we find one where that's not the case, we'll need to add a workaround for that platform. Discussion: 20140927191243.GD5423@alap3.anarazel.de Reviewed-By: Heikki Linnakangas, Noah Misch
2015-01-13 12:58:43 +01:00 · 2015-01-13 12:58:43 +01:00 · 4bad60e3fd
parent 3dfce37627
commit 4bad60e3fd
2 changed files with 34 additions and 12 deletions
--- a/src/backend/port/unix_latch.c
+++ b/src/backend/port/unix_latch.c
@ -200,9 +200,9 @@ WaitLatch(volatile Latch *latch, int wakeEvents, long timeout)
 * Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
 * conditions.
 *
- * When waiting on a socket, WL_SOCKET_READABLE *must* be included in
- * 'wakeEvents'; WL_SOCKET_WRITEABLE is optional.  The reason for this is
- * that EOF and error conditions are reported only via WL_SOCKET_READABLE.
+ * When waiting on a socket, EOF and error conditions are reported by
+ * returning the socket as readable/writable or both, depending on
+ * WL_SOCKET_READABLE/WL_SOCKET_WRITEABLE being specified.
 */
 int
 WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
@ -230,8 +230,6 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);

 	Assert(wakeEvents != 0);	/* must have at least one wake event */
-	/* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */
-	Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE);

 	if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
 		elog(ERROR, "cannot wait on a latch owned by another process");
@ -291,7 +289,16 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 			break;
 		}

-		/* Must wait ... we use poll(2) if available, otherwise select(2) */
+		/*
+		 * Must wait ... we use poll(2) if available, otherwise select(2).
+		 *
+		 * On at least older linux kernels select(), in violation of POSIX,
+		 * doesn't reliably return a socket as writable if closed - but we
+		 * rely on that. So far all the known cases of this problem are on
+		 * platforms that also provide a poll() implementation without that
+		 * bug.  If we find one where that's not the case, we'll need to add a
+		 * workaround.
+		 */
 #ifdef HAVE_POLL
 		nfds = 0;
 		if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
@ -346,7 +353,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		{
 			/* at least one event occurred, so check revents values */
 			if ((wakeEvents & WL_SOCKET_READABLE) &&
-				(pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
+				(pfds[0].revents & POLLIN))
 			{
 				/* data available in socket, or EOF/error condition */
 				result |= WL_SOCKET_READABLE;
@ -354,8 +361,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 			if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
 				(pfds[0].revents & POLLOUT))
 			{
+				/* socket is writable */
 				result |= WL_SOCKET_WRITEABLE;
 			}
+			if (pfds[0].revents & (POLLHUP | POLLERR | POLLNVAL))
+			{
+				/* EOF/error condition */
+				if (wakeEvents & WL_SOCKET_READABLE)
+					result |= WL_SOCKET_READABLE;
+				if (wakeEvents & WL_SOCKET_WRITEABLE)
+					result |= WL_SOCKET_WRITEABLE;
+			}

 			/*
 			 * We expect a POLLHUP when the remote end is closed, but because
@ -439,6 +455,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 			}
 			if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
 			{
+				/* socket is writable, or EOF */
 				result |= WL_SOCKET_WRITEABLE;
 			}
 			if ((wakeEvents & WL_POSTMASTER_DEATH) &&
--- a/src/backend/port/win32_latch.c
+++ b/src/backend/port/win32_latch.c
@ -117,8 +117,6 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);

 	Assert(wakeEvents != 0);	/* must have at least one wake event */
-	/* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */
-	Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE);

 	if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
 		elog(ERROR, "cannot wait on a latch owned by another process");
@ -152,10 +150,10 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 	if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
 	{
 		/* Need an event object to represent events on the socket */
-		int			flags = 0;
+		int			flags = FD_CLOSE; /* always check for errors/EOF */

 		if (wakeEvents & WL_SOCKET_READABLE)
-			flags |= (FD_READ | FD_CLOSE);
+			flags |= FD_READ;
 		if (wakeEvents & WL_SOCKET_WRITEABLE)
 			flags |= FD_WRITE;

@ -232,7 +230,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 				elog(ERROR, "failed to enumerate network events: error code %u",
 					 WSAGetLastError());
 			if ((wakeEvents & WL_SOCKET_READABLE) &&
-				(resEvents.lNetworkEvents & (FD_READ | FD_CLOSE)))
+				(resEvents.lNetworkEvents & FD_READ))
 			{
 				result |= WL_SOCKET_READABLE;
 			}
@ -241,6 +239,13 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 			{
 				result |= WL_SOCKET_WRITEABLE;
 			}
+			if (resEvents.lNetworkEvents & FD_CLOSE)
+			{
+				if (wakeEvents & WL_SOCKET_READABLE)
+					result |= WL_SOCKET_READABLE;
+				if (wakeEvents & WL_SOCKET_WRITEABLE)
+					result |= WL_SOCKET_WRITEABLE;
+			}
 		}
 		else if ((wakeEvents & WL_POSTMASTER_DEATH) &&
 				 rc == WAIT_OBJECT_0 + pmdeath_eventno)