mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-09-30 18:51:20 +02:00
Improve replication lag interpolation after idle period
After sitting idle and fully replayed for a while and then encountering a new burst of WAL activity, we interpolate between an ancient sample and the not-yet-reached one for the new traffic. That produced a corner case report of lag after receiving first new reply from standby, which might sometimes be a large spike. Correct this by resetting last_read time and handle that new case. Author: Thomas Munro
This commit is contained in:
parent
a79122b061
commit
9ea3c64124
@ -3443,6 +3443,16 @@ LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now)
|
|||||||
(LagTracker.read_heads[head] + 1) % LAG_TRACKER_BUFFER_SIZE;
|
(LagTracker.read_heads[head] + 1) % LAG_TRACKER_BUFFER_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the lag tracker is empty, that means the standby has processed
|
||||||
|
* everything we've ever sent so we should now clear 'last_read'. If we
|
||||||
|
* didn't do that, we'd risk using a stale and irrelevant sample for
|
||||||
|
* interpolation at the beginning of the next burst of WAL after a period
|
||||||
|
* of idleness.
|
||||||
|
*/
|
||||||
|
if (LagTracker.read_heads[head] == LagTracker.write_head)
|
||||||
|
LagTracker.last_read[head].time = 0;
|
||||||
|
|
||||||
if (time > now)
|
if (time > now)
|
||||||
{
|
{
|
||||||
/* If the clock somehow went backwards, treat as not found. */
|
/* If the clock somehow went backwards, treat as not found. */
|
||||||
@ -3459,9 +3469,14 @@ LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now)
|
|||||||
* eventually start moving again and cross one of our samples before
|
* eventually start moving again and cross one of our samples before
|
||||||
* we can show the lag increasing.
|
* we can show the lag increasing.
|
||||||
*/
|
*/
|
||||||
if (LagTracker.read_heads[head] != LagTracker.write_head &&
|
if (LagTracker.read_heads[head] == LagTracker.write_head)
|
||||||
LagTracker.last_read[head].time != 0)
|
|
||||||
{
|
{
|
||||||
|
/* There are no future samples, so we can't interpolate. */
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
else if (LagTracker.last_read[head].time != 0)
|
||||||
|
{
|
||||||
|
/* We can interpolate between last_read and the next sample. */
|
||||||
double fraction;
|
double fraction;
|
||||||
WalTimeSample prev = LagTracker.last_read[head];
|
WalTimeSample prev = LagTracker.last_read[head];
|
||||||
WalTimeSample next = LagTracker.buffer[LagTracker.read_heads[head]];
|
WalTimeSample next = LagTracker.buffer[LagTracker.read_heads[head]];
|
||||||
@ -3494,8 +3509,14 @@ LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Couldn't interpolate due to lack of data. */
|
/*
|
||||||
return -1;
|
* We have only a future sample, implying that we were entirely
|
||||||
|
* caught up but and now there is a new burst of WAL and the
|
||||||
|
* standby hasn't processed the first sample yet. Until the
|
||||||
|
* standby reaches the future sample the best we can do is report
|
||||||
|
* the hypothetical lag if that sample were to be replayed now.
|
||||||
|
*/
|
||||||
|
time = LagTracker.buffer[LagTracker.read_heads[head]].time;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user