From 421685812290406daea58b78dfab0346eb683bbb Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Sat, 11 Apr 2020 10:30:00 -0700 Subject: [PATCH] When WalSndCaughtUp, sleep only in WalSndWaitForWal(). Before sleeping, WalSndWaitForWal() sends a keepalive if MyWalSnd->write < sentPtr. That is important in logical replication. When the latest physical LSN yields no logical replication messages (a common case), that keepalive elicits a reply, and processing the reply updates pg_stat_replication.replay_lsn. WalSndLoop() lacks that; when WalSndLoop() slept, replay_lsn advancement could stall until wal_receiver_status_interval elapsed. This sometimes stalled src/test/subscription/t/001_rep_changes.pl for up to 10s. Discussion: https://postgr.es/m/20200406063649.GA3738151@rfd.leadboat.com --- src/backend/replication/walsender.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 122d884f3e..fc475d144d 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -1428,8 +1428,10 @@ WalSndWaitForWal(XLogRecPtr loc) /* * We only send regular messages to the client for full decoded * transactions, but a synchronous replication and walsender shutdown - * possibly are waiting for a later location. So we send pings - * containing the flush location every now and then. + * possibly are waiting for a later location. So, before sleeping, we + * send a ping containing the flush location. If the receiver is + * otherwise idle, this keepalive will trigger a reply. Processing the + * reply will update these MyWalSnd locations. */ if (MyWalSnd->flush < sentPtr && MyWalSnd->write < sentPtr && @@ -2314,20 +2316,16 @@ WalSndLoop(WalSndSendDataCallback send_data) WalSndKeepaliveIfNecessary(); /* - * We don't block if not caught up, unless there is unsent data - * pending in which case we'd better block until the socket is - * write-ready. This test is only needed for the case where the - * send_data callback handled a subset of the available data but then - * pq_flush_if_writable flushed it all --- we should immediately try - * to send more. + * Block if we have unsent data. Let WalSndWaitForWal() handle any + * other blocking; idle receivers need its additional actions. */ - if ((WalSndCaughtUp && !streamingDoneSending) || pq_is_send_pending()) + if (pq_is_send_pending()) { long sleeptime; int wakeEvents; wakeEvents = WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | WL_TIMEOUT | - WL_SOCKET_READABLE; + WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE; /* * Use fresh timestamp, not last_processing, to reduce the chance @@ -2335,9 +2333,6 @@ WalSndLoop(WalSndSendDataCallback send_data) */ sleeptime = WalSndComputeSleeptime(GetCurrentTimestamp()); - if (pq_is_send_pending()) - wakeEvents |= WL_SOCKET_WRITEABLE; - /* Sleep until something happens or we time out */ (void) WaitLatchOrSocket(MyLatch, wakeEvents, MyProcPort->sock, sleeptime,