From 8ef3d9fae496d80fc1d100d49b46891ae9c2c0fc Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 1 Aug 2016 15:13:53 -0400 Subject: [PATCH] Don't CHECK_FOR_INTERRUPTS between WaitLatch and ResetLatch. This coding pattern creates a race condition, because if an interesting interrupt happens after we've checked InterruptPending but before we reset our latch, the latch-setting done by the signal handler would get lost, and then we might block at WaitLatch in the next iteration without ever noticing the interrupt condition. You can put the CHECK_FOR_INTERRUPTS before WaitLatch or after ResetLatch, but not between them. Aside from fixing the bugs, add some explanatory comments to latch.h to perhaps forestall the next person from making the same mistake. In HEAD, also replace gather_readnext's direct call of HandleParallelMessages with CHECK_FOR_INTERRUPTS. It does not seem clean or useful for this one caller to bypass ProcessInterrupts and go straight to HandleParallelMessages; not least because that fails to consider the InterruptPending flag, resulting in useless work both here (if InterruptPending isn't set) and in the next CHECK_FOR_INTERRUPTS call (if it is). This thinko seems to have been introduced in the initial coding of storage/ipc/shm_mq.c (commit ec9037df2), and then blindly copied into all the subsequent parallel-query support logic. Back-patch relevant hunks to 9.4 to extirpate the error everywhere. Discussion: <1661.1469996911@sss.pgh.pa.us> --- src/backend/libpq/pqmq.c | 2 +- src/backend/storage/ipc/shm_mq.c | 18 +++++++++--------- src/include/storage/latch.h | 16 ++++++++++++++++ src/test/modules/test_shm_mq/setup.c | 6 +++--- src/test/modules/test_shm_mq/test.c | 2 +- 5 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/backend/libpq/pqmq.c b/src/backend/libpq/pqmq.c index 0a3c2b70cb..549e76dfa0 100644 --- a/src/backend/libpq/pqmq.c +++ b/src/backend/libpq/pqmq.c @@ -172,8 +172,8 @@ mq_putmessage(char msgtype, const char *s, size_t len) break; WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0); - CHECK_FOR_INTERRUPTS(); ResetLatch(&MyProc->procLatch); + CHECK_FOR_INTERRUPTS(); } pq_mq_busy = false; diff --git a/src/backend/storage/ipc/shm_mq.c b/src/backend/storage/ipc/shm_mq.c index f88e624a41..65348dca97 100644 --- a/src/backend/storage/ipc/shm_mq.c +++ b/src/backend/storage/ipc/shm_mq.c @@ -868,11 +868,11 @@ shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, const void *data, */ WaitLatch(MyLatch, WL_LATCH_SET, 0); - /* An interrupt may have occurred while we were waiting. */ - CHECK_FOR_INTERRUPTS(); - /* Reset the latch so we don't spin. */ ResetLatch(MyLatch); + + /* An interrupt may have occurred while we were waiting. */ + CHECK_FOR_INTERRUPTS(); } else { @@ -965,11 +965,11 @@ shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed, bool nowait, */ WaitLatch(MyLatch, WL_LATCH_SET, 0); - /* An interrupt may have occurred while we were waiting. */ - CHECK_FOR_INTERRUPTS(); - /* Reset the latch so we don't spin. */ ResetLatch(MyLatch); + + /* An interrupt may have occurred while we were waiting. */ + CHECK_FOR_INTERRUPTS(); } } @@ -1071,11 +1071,11 @@ shm_mq_wait_internal(volatile shm_mq *mq, PGPROC *volatile * ptr, /* Wait to be signalled. */ WaitLatch(MyLatch, WL_LATCH_SET, 0); - /* An interrupt may have occurred while we were waiting. */ - CHECK_FOR_INTERRUPTS(); - /* Reset the latch so we don't spin. */ ResetLatch(MyLatch); + + /* An interrupt may have occurred while we were waiting. */ + CHECK_FOR_INTERRUPTS(); } } PG_CATCH(); diff --git a/src/include/storage/latch.h b/src/include/storage/latch.h index 28fc684d24..273836d553 100644 --- a/src/include/storage/latch.h +++ b/src/include/storage/latch.h @@ -52,6 +52,22 @@ * do. Otherwise, if someone sets the latch between the check and the * ResetLatch call, you will miss it and Wait will incorrectly block. * + * Another valid coding pattern looks like: + * + * for (;;) + * { + * if (work to do) + * Do Stuff(); // in particular, exit loop if some condition satisfied + * WaitLatch(); + * ResetLatch(); + * } + * + * This is useful to reduce latch traffic if it's expected that the loop's + * termination condition will often be satisfied in the first iteration; + * the cost is an extra loop iteration before blocking when it is not. + * What must be avoided is placing any checks for asynchronous events after + * WaitLatch and before ResetLatch, as that creates a race condition. + * * To wake up the waiter, you must first set a global flag or something * else that the wait loop tests in the "if (work to do)" part, and call * SetLatch *after* that. SetLatch is designed to return quickly if the diff --git a/src/test/modules/test_shm_mq/setup.c b/src/test/modules/test_shm_mq/setup.c index 7f2f5fd3ff..061af8b467 100644 --- a/src/test/modules/test_shm_mq/setup.c +++ b/src/test/modules/test_shm_mq/setup.c @@ -287,11 +287,11 @@ wait_for_workers_to_become_ready(worker_state *wstate, /* Wait to be signalled. */ WaitLatch(MyLatch, WL_LATCH_SET, 0); - /* An interrupt may have occurred while we were waiting. */ - CHECK_FOR_INTERRUPTS(); - /* Reset the latch so we don't spin. */ ResetLatch(MyLatch); + + /* An interrupt may have occurred while we were waiting. */ + CHECK_FOR_INTERRUPTS(); } } PG_CATCH(); diff --git a/src/test/modules/test_shm_mq/test.c b/src/test/modules/test_shm_mq/test.c index 732376daff..b36b4bf129 100644 --- a/src/test/modules/test_shm_mq/test.c +++ b/src/test/modules/test_shm_mq/test.c @@ -231,8 +231,8 @@ test_shm_mq_pipelined(PG_FUNCTION_ARGS) * for us to do. */ WaitLatch(MyLatch, WL_LATCH_SET, 0); - CHECK_FOR_INTERRUPTS(); ResetLatch(MyLatch); + CHECK_FOR_INTERRUPTS(); } }