Fix WaitLatch() to return promptly when the requested timeout expires.

If the sleep is interrupted by a signal, we must recompute the remaining time to wait; otherwise, a steady stream of non-wait-terminating interrupts could delay return from WaitLatch indefinitely. This has been shown to be a problem for the autovacuum launcher, and there may well be other places now or in the future with similar issues. So we'd better make the function robust, even though this'll add at least one gettimeofday call per wait. Back-patch to 9.2. We might eventually need to fix 9.1 as well, but the code is quite different there, and the usage of WaitLatch in 9.1 is so limited that it's not clearly important to do so. Reported and diagnosed by Jeff Janes, though I rewrote his patch rather heavily.
2012-11-08 20:04:48 -05:00 · 2012-11-08 20:04:48 -05:00 · 3e7fdcffd6
parent dcc55dd21a
commit 3e7fdcffd6
3 changed files with 156 additions and 90 deletions
--- a/src/backend/port/unix_latch.c
+++ b/src/backend/port/unix_latch.c
@ -48,6 +48,7 @@
 #endif

 #include "miscadmin.h"
+#include "portability/instr_time.h"
 #include "postmaster/postmaster.h"
 #include "storage/latch.h"
 #include "storage/pmsignal.h"
@ -176,12 +177,8 @@ DisownLatch(volatile Latch *latch)
 * function returns immediately.
 *
 * The 'timeout' is given in milliseconds. It must be >= 0 if WL_TIMEOUT flag
- * is given.  On some platforms, signals do not interrupt the wait, or even
- * cause the timeout to be restarted, so beware that the function can sleep
- * for several times longer than the requested timeout.  However, this
- * difficulty is not so great as it seems, because the signal handlers for any
- * signals that the caller should respond to ought to be programmed to end the
- * wait by calling SetLatch.  Ideally, the timeout parameter is vestigial.
+ * is given.  Note that some extra overhead is incurred when WL_TIMEOUT is
+ * given, so avoid using a timeout if possible.
 *
 * The latch must be owned by the current process, ie. it must be a
 * backend-local latch initialized with InitLatch, or a shared latch
@ -211,13 +208,16 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 {
 	int			result = 0;
 	int			rc;
+	instr_time	start_time,
+				cur_time;
+	long		cur_timeout;

 #ifdef HAVE_POLL
 	struct pollfd pfds[3];
 	int			nfds;
 #else
 	struct timeval tv,
-			   *tvp = NULL;
+			   *tvp;
 	fd_set		input_mask;
 	fd_set		output_mask;
 	int			hifd;
@ -234,21 +234,30 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 	if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
 		elog(ERROR, "cannot wait on a latch owned by another process");

-	/* Initialize timeout */
+	/*
+	 * Initialize timeout if requested.  We must record the current time so
+	 * that we can determine the remaining timeout if the poll() or select()
+	 * is interrupted.	(On some platforms, select() will update the contents
+	 * of "tv" for us, but unfortunately we can't rely on that.)
+	 */
 	if (wakeEvents & WL_TIMEOUT)
 	{
+		INSTR_TIME_SET_CURRENT(start_time);
 		Assert(timeout >= 0);
+		cur_timeout = timeout;
+
 #ifndef HAVE_POLL
-		tv.tv_sec = timeout / 1000L;
-		tv.tv_usec = (timeout % 1000L) * 1000L;
+		tv.tv_sec = cur_timeout / 1000L;
+		tv.tv_usec = (cur_timeout % 1000L) * 1000L;
 		tvp = &tv;
 #endif
 	}
 	else
 	{
-#ifdef HAVE_POLL
-		/* make sure poll() agrees there is no timeout */
-		timeout = -1;
+		cur_timeout = -1;
+
+#ifndef HAVE_POLL
+		tvp = NULL;
 #endif
 	}

@ -311,54 +320,62 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		}

 		/* Sleep */
-		rc = poll(pfds, nfds, (int) timeout);
+		rc = poll(pfds, nfds, (int) cur_timeout);

 		/* Check return code */
 		if (rc < 0)
 		{
-			if (errno == EINTR)
-				continue;
-			waiting = false;
-			ereport(ERROR,
-					(errcode_for_socket_access(),
-					 errmsg("poll() failed: %m")));
+			/* EINTR is okay, otherwise complain */
+			if (errno != EINTR)
+			{
+				waiting = false;
+				ereport(ERROR,
+						(errcode_for_socket_access(),
+						 errmsg("poll() failed: %m")));
+			}
 		}
-		if (rc == 0 && (wakeEvents & WL_TIMEOUT))
+		else if (rc == 0)
 		{
 			/* timeout exceeded */
-			result |= WL_TIMEOUT;
+			if (wakeEvents & WL_TIMEOUT)
+				result |= WL_TIMEOUT;
 		}
-		if ((wakeEvents & WL_SOCKET_READABLE) &&
-			(pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
+		else
 		{
-			/* data available in socket, or EOF/error condition */
-			result |= WL_SOCKET_READABLE;
-		}
-		if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
-			(pfds[0].revents & POLLOUT))
-		{
-			result |= WL_SOCKET_WRITEABLE;
-		}
+			/* at least one event occurred, so check revents values */
+			if ((wakeEvents & WL_SOCKET_READABLE) &&
+				(pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
+			{
+				/* data available in socket, or EOF/error condition */
+				result |= WL_SOCKET_READABLE;
+			}
+			if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
+				(pfds[0].revents & POLLOUT))
+			{
+				result |= WL_SOCKET_WRITEABLE;
+			}

-		/*
-		 * We expect a POLLHUP when the remote end is closed, but because we
-		 * don't expect the pipe to become readable or to have any errors
-		 * either, treat those as postmaster death, too.
-		 */
-		if ((wakeEvents & WL_POSTMASTER_DEATH) &&
-		  (pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
-		{
 			/*
-			 * According to the select(2) man page on Linux, select(2) may
-			 * spuriously return and report a file descriptor as readable,
-			 * when it's not; and presumably so can poll(2).  It's not clear
-			 * that the relevant cases would ever apply to the postmaster
-			 * pipe, but since the consequences of falsely returning
-			 * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
-			 * trouble to positively verify EOF with PostmasterIsAlive().
+			 * We expect a POLLHUP when the remote end is closed, but because
+			 * we don't expect the pipe to become readable or to have any
+			 * errors either, treat those cases as postmaster death, too.
 			 */
-			if (!PostmasterIsAlive())
-				result |= WL_POSTMASTER_DEATH;
+			if ((wakeEvents & WL_POSTMASTER_DEATH) &&
+				(pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
+			{
+				/*
+				 * According to the select(2) man page on Linux, select(2) may
+				 * spuriously return and report a file descriptor as readable,
+				 * when it's not; and presumably so can poll(2).  It's not
+				 * clear that the relevant cases would ever apply to the
+				 * postmaster pipe, but since the consequences of falsely
+				 * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
+				 * we take the trouble to positively verify EOF with
+				 * PostmasterIsAlive().
+				 */
+				if (!PostmasterIsAlive())
+					result |= WL_POSTMASTER_DEATH;
+			}
 		}
 #else							/* !HAVE_POLL */

@ -395,43 +412,66 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		/* Check return code */
 		if (rc < 0)
 		{
-			if (errno == EINTR)
-				continue;
-			waiting = false;
-			ereport(ERROR,
-					(errcode_for_socket_access(),
-					 errmsg("select() failed: %m")));
+			/* EINTR is okay, otherwise complain */
+			if (errno != EINTR)
+			{
+				waiting = false;
+				ereport(ERROR,
+						(errcode_for_socket_access(),
+						 errmsg("select() failed: %m")));
+			}
 		}
-		if (rc == 0 && (wakeEvents & WL_TIMEOUT))
+		else if (rc == 0)
 		{
 			/* timeout exceeded */
-			result |= WL_TIMEOUT;
+			if (wakeEvents & WL_TIMEOUT)
+				result |= WL_TIMEOUT;
 		}
-		if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
+		else
 		{
-			/* data available in socket, or EOF */
-			result |= WL_SOCKET_READABLE;
-		}
-		if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
-		{
-			result |= WL_SOCKET_WRITEABLE;
-		}
-		if ((wakeEvents & WL_POSTMASTER_DEATH) &&
+			/* at least one event occurred, so check masks */
+			if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
+			{
+				/* data available in socket, or EOF */
+				result |= WL_SOCKET_READABLE;
+			}
+			if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
+			{
+				result |= WL_SOCKET_WRITEABLE;
+			}
+			if ((wakeEvents & WL_POSTMASTER_DEATH) &&
 			FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask))
-		{
-			/*
-			 * According to the select(2) man page on Linux, select(2) may
-			 * spuriously return and report a file descriptor as readable,
-			 * when it's not; and presumably so can poll(2).  It's not clear
-			 * that the relevant cases would ever apply to the postmaster
-			 * pipe, but since the consequences of falsely returning
-			 * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
-			 * trouble to positively verify EOF with PostmasterIsAlive().
-			 */
-			if (!PostmasterIsAlive())
-				result |= WL_POSTMASTER_DEATH;
+			{
+				/*
+				 * According to the select(2) man page on Linux, select(2) may
+				 * spuriously return and report a file descriptor as readable,
+				 * when it's not; and presumably so can poll(2).  It's not
+				 * clear that the relevant cases would ever apply to the
+				 * postmaster pipe, but since the consequences of falsely
+				 * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
+				 * we take the trouble to positively verify EOF with
+				 * PostmasterIsAlive().
+				 */
+				if (!PostmasterIsAlive())
+					result |= WL_POSTMASTER_DEATH;
+			}
 		}
 #endif   /* HAVE_POLL */
+
+		/* If we're not done, update cur_timeout for next iteration */
+		if (result == 0 && cur_timeout >= 0)
+		{
+			INSTR_TIME_SET_CURRENT(cur_time);
+			INSTR_TIME_SUBTRACT(cur_time, start_time);
+			cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
+			if (cur_timeout < 0)
+				cur_timeout = 0;
+
+#ifndef HAVE_POLL
+			tv.tv_sec = cur_timeout / 1000L;
+			tv.tv_usec = (cur_timeout % 1000L) * 1000L;
+#endif
+		}
 	} while (result == 0);
 	waiting = false;

--- a/src/backend/port/win32_latch.c
+++ b/src/backend/port/win32_latch.c
@ -24,6 +24,7 @@
 #include <unistd.h>

 #include "miscadmin.h"
+#include "portability/instr_time.h"
 #include "postmaster/postmaster.h"
 #include "storage/latch.h"
 #include "storage/pmsignal.h"
@ -100,6 +101,9 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 				  long timeout)
 {
 	DWORD		rc;
+	instr_time	start_time,
+				cur_time;
+	long		cur_timeout;
 	HANDLE		events[4];
 	HANDLE		latchevent;
 	HANDLE		sockevent = WSA_INVALID_EVENT;
@ -118,11 +122,19 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 	if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
 		elog(ERROR, "cannot wait on a latch owned by another process");

-	/* Convert timeout to form used by WaitForMultipleObjects() */
+	/*
+	 * Initialize timeout if requested.  We must record the current time so
+	 * that we can determine the remaining timeout if WaitForMultipleObjects
+	 * is interrupted.
+	 */
 	if (wakeEvents & WL_TIMEOUT)
+	{
+		INSTR_TIME_SET_CURRENT(start_time);
 		Assert(timeout >= 0);
+		cur_timeout = timeout;
+	}
 	else
-		timeout = INFINITE;
+		cur_timeout = INFINITE;

 	/*
 	 * Construct an array of event handles for WaitforMultipleObjects().
@ -187,7 +199,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 			break;
 		}

-		rc = WaitForMultipleObjects(numevents, events, FALSE, timeout);
+		rc = WaitForMultipleObjects(numevents, events, FALSE, cur_timeout);

 		if (rc == WAIT_FAILED)
 			elog(ERROR, "WaitForMultipleObjects() failed: error code %lu",
@ -203,7 +215,11 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		}
 		else if (rc == WAIT_OBJECT_0 + 1)
 		{
-			/* Latch is set, we'll handle that on next iteration of loop */
+			/*
+			 * Latch is set.  We'll handle that on next iteration of loop, but
+			 * let's not waste the cycles to update cur_timeout below.
+			 */
+			continue;
 		}
 		else if ((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) &&
 				 rc == WAIT_OBJECT_0 + 2)		/* socket is at event slot 2 */
@ -240,8 +256,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 		}
 		else
 			elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc);
-	}
-	while (result == 0);
+
+		/* If we're not done, update cur_timeout for next iteration */
+		if (result == 0 && cur_timeout != INFINITE)
+		{
+			INSTR_TIME_SET_CURRENT(cur_time);
+			INSTR_TIME_SUBTRACT(cur_time, start_time);
+			cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
+			if (cur_timeout < 0)
+				cur_timeout = 0;
+		}
+	} while (result == 0);

 	/* Clean up the event object we created for the socket */
 	if (sockevent != WSA_INVALID_EVENT)
--- a/src/include/storage/latch.h
+++ b/src/include/storage/latch.h
@ -33,8 +33,8 @@
 * ResetLatch	- Clears the latch, allowing it to be set again
 * WaitLatch	- Waits for the latch to become set
 *
- * WaitLatch includes a provision for timeouts (which should hopefully not
- * be necessary once the code is fully latch-ified) and a provision for
+ * WaitLatch includes a provision for timeouts (which should be avoided
+ * when possible, as they incur extra overhead) and a provision for
 * postmaster child processes to wake up immediately on postmaster death.
 * See unix_latch.c for detailed specifications for the exported functions.
 *
@ -64,14 +64,15 @@
 * will be lifted in future by inserting suitable memory barriers into
 * SetLatch and ResetLatch.
 *
+ * On some platforms, signals will not interrupt the latch wait primitive
+ * by themselves.  Therefore, it is critical that any signal handler that
+ * is meant to terminate a WaitLatch wait calls SetLatch.
+ *
 * Note that use of the process latch (PGPROC.procLatch) is generally better
 * than an ad-hoc shared latch for signaling auxiliary processes.  This is
 * because generic signal handlers will call SetLatch on the process latch
 * only, so using any latch other than the process latch effectively precludes
- * ever registering a generic handler.	Since signals have the potential to
- * invalidate the latch timeout on some platforms, resulting in a
- * denial-of-service, it is important to verify that all signal handlers
- * within all WaitLatch-calling processes call SetLatch.
+ * use of any generic handler.
 *
 *
 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group