From d3aa114ac4de9ecc558ba77ed5c85e2ad9ad01d4 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sun, 24 Nov 2019 18:03:39 -0500
Subject: [PATCH] Doc: improve discussion of race conditions involved in
 LISTEN.

The user docs didn't really explain how to use LISTEN safely,
so clarify that.  Also clean up some fuzzy-headed explanations
in comments.  No code changes.

Discussion: https://postgr.es/m/3ac7f397-4d5f-be8e-f354-440020675694@gmail.com
---
 doc/src/sgml/ref/listen.sgml | 31 ++++++++++++----
 src/backend/commands/async.c | 70 +++++++++++++++++++++---------------
 2 files changed, 65 insertions(+), 36 deletions(-)
diff --git a/doc/src/sgml/ref/listen.sgml b/doc/src/sgml/ref/listen.sgml
index ecc1fb82de..2fab9d65a1 100644
--- a/doc/src/sgml/ref/listen.sgml
+++ b/doc/src/sgml/ref/listen.sgml
@@ -63,13 +63,6 @@ LISTEN <replaceable class="parameter">channel</replaceable>
    <command>LISTEN</command> or <command>UNLISTEN</command> directly.  See the
    documentation for the interface you are using for more details.
   </para>
-
-  <para>
-   <xref linkend="sql-notify"/>
-   contains a more extensive
-   discussion of the use of <command>LISTEN</command> and
-   <command>NOTIFY</command>.
-  </para>
  </refsect1>
 
  <refsect1>
@@ -96,10 +89,34 @@ LISTEN <replaceable class="parameter">channel</replaceable>
    within a transaction that later rolls back, the set of notification
    channels being listened to is unchanged.
   </para>
+
   <para>
    A transaction that has executed <command>LISTEN</command> cannot be
    prepared for two-phase commit.
   </para>
+
+  <para>
+   There is a race condition when first setting up a listening session:
+   if concurrently-committing transactions are sending notify events,
+   exactly which of those will the newly listening session receive?
+   The answer is that the session will receive all events committed after
+   an instant during the transaction's commit step.  But that is slightly
+   later than any database state that the transaction could have observed
+   in queries.  This leads to the following rule for
+   using <command>LISTEN</command>: first execute (and commit!) that
+   command, then in a new transaction inspect the database state as needed
+   by the application logic, then rely on notifications to find out about
+   subsequent changes to the database state.  The first few received
+   notifications might refer to updates already observed in the initial
+   database inspection, but this is usually harmless.
+  </para>
+
+  <para>
+   <xref linkend="sql-notify"/>
+   contains a more extensive
+   discussion of the use of <command>LISTEN</command> and
+   <command>NOTIFY</command>.
+  </para>
  </refsect1>
 
  <refsect1>
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 5086775646..6522aa9fcf 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -1112,14 +1112,12 @@ Exec_ListenPreCommit(void)
 	amRegisteredListener = true;
 
 	/*
-	 * Try to move our pointer forward as far as possible. This will skip over
-	 * already-committed notifications. Still, we could get notifications that
-	 * have already committed before we started to LISTEN.
-	 *
-	 * Note that we are not yet listening on anything, so we won't deliver any
-	 * notification to the frontend.  Also, although our transaction might
-	 * have executed NOTIFY, those message(s) aren't queued yet so we can't
-	 * see them in the queue.
+	 * Try to move our pointer forward as far as possible.  This will skip
+	 * over already-committed notifications, which we want to do because they
+	 * might be quite stale.  Note that we are not yet listening on anything,
+	 * so we won't deliver such notifications to our frontend.  Also, although
+	 * our transaction might have executed NOTIFY, those message(s) aren't
+	 * queued yet so we won't skip them here.
 	 */
 	if (!QUEUE_POS_EQUAL(max, head))
 		asyncQueueReadAllNotifications();
@@ -1938,43 +1936,57 @@ asyncQueueReadAllNotifications(void)
 		return;
 	}
 
-	/* Get snapshot we'll use to decide which xacts are still in progress */
-	snapshot = RegisterSnapshot(GetLatestSnapshot());
-
 	/*----------
-	 * Note that we deliver everything that we see in the queue and that
-	 * matches our _current_ listening state.
-	 * Especially we do not take into account different commit times.
+	 * Get snapshot we'll use to decide which xacts are still in progress.
+	 * This is trickier than it might seem, because of race conditions.
 	 * Consider the following example:
 	 *
 	 * Backend 1:					 Backend 2:
 	 *
 	 * transaction starts
+	 * UPDATE foo SET ...;
 	 * NOTIFY foo;
 	 * commit starts
+	 * queue the notify message
 	 *								 transaction starts
-	 *								 LISTEN foo;
-	 *								 commit starts
+	 *								 LISTEN foo;  -- first LISTEN in session
+	 *								 SELECT * FROM foo WHERE ...;
 	 * commit to clog
+	 *								 commit starts
+	 *								 add backend 2 to array of listeners
+	 *								 advance to queue head (this code)
 	 *								 commit to clog
 	 *
-	 * It could happen that backend 2 sees the notification from backend 1 in
-	 * the queue.  Even though the notifying transaction committed before
-	 * the listening transaction, we still deliver the notification.
+	 * Transaction 2's SELECT has not seen the UPDATE's effects, since that
+	 * wasn't committed yet.  Ideally we'd ensure that client 2 would
+	 * eventually get transaction 1's notify message, but there's no way
+	 * to do that; until we're in the listener array, there's no guarantee
+	 * that the notify message doesn't get removed from the queue.
 	 *
-	 * The idea is that an additional notification does not do any harm, we
-	 * just need to make sure that we do not miss a notification.
+	 * Therefore the coding technique transaction 2 is using is unsafe:
+	 * applications must commit a LISTEN before inspecting database state,
+	 * if they want to ensure they will see notifications about subsequent
+	 * changes to that state.
 	 *
-	 * It is possible that we fail while trying to send a message to our
-	 * frontend (for example, because of encoding conversion failure).
-	 * If that happens it is critical that we not try to send the same
-	 * message over and over again.  Therefore, we place a PG_TRY block
-	 * here that will forcibly advance our backend position before we lose
-	 * control to an error.  (We could alternatively retake AsyncQueueLock
-	 * and move the position before handling each individual message, but
-	 * that seems like too much lock traffic.)
+	 * What we do guarantee is that we'll see all notifications from
+	 * transactions committing after the snapshot we take here.
+	 * Exec_ListenPreCommit has already added us to the listener array,
+	 * so no not-yet-committed messages can be removed from the queue
+	 * before we see them.
 	 *----------
 	 */
+	snapshot = RegisterSnapshot(GetLatestSnapshot());
+
+	/*
+	 * It is possible that we fail while trying to send a message to our
+	 * frontend (for example, because of encoding conversion failure).  If
+	 * that happens it is critical that we not try to send the same message
+	 * over and over again.  Therefore, we place a PG_TRY block here that will
+	 * forcibly advance our queue position before we lose control to an error.
+	 * (We could alternatively retake AsyncQueueLock and move the position
+	 * before handling each individual message, but that seems like too much
+	 * lock traffic.)
+	 */
 	PG_TRY();
 	{
 		bool		reachedStop;