mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-09-28 00:11:53 +02:00
Revert 019_replslot_limit.pl related debugging aids.
This reverts most of91c0570a79
,f28bf667f6
,fe0972ee5e
,afdeff1052
. The only thing left is the retry loop in 019_replslot_limit.pl that avoids spurious failures by retrying a couple times. We haven't seen any hard evidence that this is caused by anything but slow process shutdown. We did not find any cases where walsenders did not vanish after waiting for longer. Therefore there's no reason for this debugging code to remain. Discussion: https://postgr.es/m/20220530190155.47wr3x2prdwyciah@alap3.anarazel.de Backpatch: 15-
This commit is contained in:
parent
b9eb0ff09e
commit
3f8148c256
@ -177,10 +177,6 @@ ReplicationSlotInitialize(void)
|
|||||||
static void
|
static void
|
||||||
ReplicationSlotShmemExit(int code, Datum arg)
|
ReplicationSlotShmemExit(int code, Datum arg)
|
||||||
{
|
{
|
||||||
/* temp debugging aid to analyze 019_replslot_limit failures */
|
|
||||||
elog(DEBUG3, "replication slot exit hook, %s active slot",
|
|
||||||
MyReplicationSlot != NULL ? "with" : "without");
|
|
||||||
|
|
||||||
/* Make sure active replication slots are released */
|
/* Make sure active replication slots are released */
|
||||||
if (MyReplicationSlot != NULL)
|
if (MyReplicationSlot != NULL)
|
||||||
ReplicationSlotRelease();
|
ReplicationSlotRelease();
|
||||||
@ -582,9 +578,6 @@ ReplicationSlotCleanup(void)
|
|||||||
Assert(MyReplicationSlot == NULL);
|
Assert(MyReplicationSlot == NULL);
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
/* temp debugging aid to analyze 019_replslot_limit failures */
|
|
||||||
elog(DEBUG3, "temporary replication slot cleanup: begin");
|
|
||||||
|
|
||||||
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
|
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
|
||||||
for (i = 0; i < max_replication_slots; i++)
|
for (i = 0; i < max_replication_slots; i++)
|
||||||
{
|
{
|
||||||
@ -593,10 +586,6 @@ restart:
|
|||||||
if (!s->in_use)
|
if (!s->in_use)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* unlocked read of active_pid is ok for debugging purposes */
|
|
||||||
elog(DEBUG3, "temporary replication slot cleanup: %d in use, active_pid: %d",
|
|
||||||
i, (int) s->active_pid);
|
|
||||||
|
|
||||||
SpinLockAcquire(&s->mutex);
|
SpinLockAcquire(&s->mutex);
|
||||||
if (s->active_pid == MyProcPid)
|
if (s->active_pid == MyProcPid)
|
||||||
{
|
{
|
||||||
@ -614,8 +603,6 @@ restart:
|
|||||||
}
|
}
|
||||||
|
|
||||||
LWLockRelease(ReplicationSlotControlLock);
|
LWLockRelease(ReplicationSlotControlLock);
|
||||||
|
|
||||||
elog(DEBUG3, "temporary replication slot cleanup: done");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -657,9 +644,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
|
|||||||
char path[MAXPGPATH];
|
char path[MAXPGPATH];
|
||||||
char tmppath[MAXPGPATH];
|
char tmppath[MAXPGPATH];
|
||||||
|
|
||||||
/* temp debugging aid to analyze 019_replslot_limit failures */
|
|
||||||
elog(DEBUG3, "replication slot drop: %s: begin", NameStr(slot->data.name));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If some other backend ran this code concurrently with us, we might try
|
* If some other backend ran this code concurrently with us, we might try
|
||||||
* to delete a slot with a certain name while someone else was trying to
|
* to delete a slot with a certain name while someone else was trying to
|
||||||
@ -710,9 +694,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
|
|||||||
path, tmppath)));
|
path, tmppath)));
|
||||||
}
|
}
|
||||||
|
|
||||||
elog(DEBUG3, "replication slot drop: %s: removed on-disk",
|
|
||||||
NameStr(slot->data.name));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The slot is definitely gone. Lock out concurrent scans of the array
|
* The slot is definitely gone. Lock out concurrent scans of the array
|
||||||
* long enough to kill it. It's OK to clear the active PID here without
|
* long enough to kill it. It's OK to clear the active PID here without
|
||||||
@ -726,13 +707,8 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
|
|||||||
slot->active_pid = 0;
|
slot->active_pid = 0;
|
||||||
slot->in_use = false;
|
slot->in_use = false;
|
||||||
LWLockRelease(ReplicationSlotControlLock);
|
LWLockRelease(ReplicationSlotControlLock);
|
||||||
|
|
||||||
elog(DEBUG3, "replication slot drop: %s: marked as not in use", NameStr(slot->data.name));
|
|
||||||
|
|
||||||
ConditionVariableBroadcast(&slot->active_cv);
|
ConditionVariableBroadcast(&slot->active_cv);
|
||||||
|
|
||||||
elog(DEBUG3, "replication slot drop: %s: notified others", NameStr(slot->data.name));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Slot is dead and doesn't prevent resource removal anymore, recompute
|
* Slot is dead and doesn't prevent resource removal anymore, recompute
|
||||||
* limits.
|
* limits.
|
||||||
@ -740,8 +716,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
|
|||||||
ReplicationSlotsComputeRequiredXmin(false);
|
ReplicationSlotsComputeRequiredXmin(false);
|
||||||
ReplicationSlotsComputeRequiredLSN();
|
ReplicationSlotsComputeRequiredLSN();
|
||||||
|
|
||||||
elog(DEBUG3, "replication slot drop: %s: computed required", NameStr(slot->data.name));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If removing the directory fails, the worst thing that will happen is
|
* If removing the directory fails, the worst thing that will happen is
|
||||||
* that the user won't be able to create a new slot with the same name
|
* that the user won't be able to create a new slot with the same name
|
||||||
@ -751,8 +725,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
|
|||||||
ereport(WARNING,
|
ereport(WARNING,
|
||||||
(errmsg("could not remove directory \"%s\"", tmppath)));
|
(errmsg("could not remove directory \"%s\"", tmppath)));
|
||||||
|
|
||||||
elog(DEBUG3, "replication slot drop: %s: removed directory", NameStr(slot->data.name));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Drop the statistics entry for the replication slot. Do this while
|
* Drop the statistics entry for the replication slot. Do this while
|
||||||
* holding ReplicationSlotAllocationLock so that we don't drop a
|
* holding ReplicationSlotAllocationLock so that we don't drop a
|
||||||
@ -767,9 +739,6 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
|
|||||||
* a slot while we're still cleaning up the detritus of the old one.
|
* a slot while we're still cleaning up the detritus of the old one.
|
||||||
*/
|
*/
|
||||||
LWLockRelease(ReplicationSlotAllocationLock);
|
LWLockRelease(ReplicationSlotAllocationLock);
|
||||||
|
|
||||||
elog(DEBUG3, "replication slot drop: %s: done",
|
|
||||||
NameStr(slot->data.name));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1329,12 +1298,6 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN,
|
|||||||
(void) kill(active_pid, SIGTERM);
|
(void) kill(active_pid, SIGTERM);
|
||||||
last_signaled_pid = active_pid;
|
last_signaled_pid = active_pid;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
/* temp debugging aid to analyze 019_replslot_limit failures */
|
|
||||||
elog(DEBUG3, "not signalling process %d during invalidation of slot \"%s\"",
|
|
||||||
active_pid, NameStr(slotname));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Wait until the slot is released. */
|
/* Wait until the slot is released. */
|
||||||
ConditionVariableSleep(&s->active_cv,
|
ConditionVariableSleep(&s->active_cv,
|
||||||
@ -1398,10 +1361,6 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno)
|
|||||||
XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN);
|
XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN);
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
/* temp debugging aid to analyze 019_replslot_limit failures */
|
|
||||||
elog(DEBUG3, "begin invalidating obsolete replication slots older than %X/%X",
|
|
||||||
LSN_FORMAT_ARGS(oldestLSN));
|
|
||||||
|
|
||||||
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
|
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
|
||||||
for (int i = 0; i < max_replication_slots; i++)
|
for (int i = 0; i < max_replication_slots; i++)
|
||||||
{
|
{
|
||||||
@ -1427,8 +1386,6 @@ restart:
|
|||||||
ReplicationSlotsComputeRequiredLSN();
|
ReplicationSlotsComputeRequiredLSN();
|
||||||
}
|
}
|
||||||
|
|
||||||
elog(DEBUG3, "done invalidating obsolete replication slots");
|
|
||||||
|
|
||||||
return invalidated;
|
return invalidated;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1942,10 +1942,3 @@ LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* temp debugging aid to analyze 019_replslot_limit failures */
|
|
||||||
int
|
|
||||||
LWLockHeldCount(void)
|
|
||||||
{
|
|
||||||
return num_held_lwlocks;
|
|
||||||
}
|
|
||||||
|
@ -1231,24 +1231,6 @@ ShutdownPostgres(int code, Datum arg)
|
|||||||
* them explicitly.
|
* them explicitly.
|
||||||
*/
|
*/
|
||||||
LockReleaseAll(USER_LOCKMETHOD, true);
|
LockReleaseAll(USER_LOCKMETHOD, true);
|
||||||
|
|
||||||
/*
|
|
||||||
* temp debugging aid to analyze 019_replslot_limit failures
|
|
||||||
*
|
|
||||||
* If an error were thrown outside of a transaction nothing up to now
|
|
||||||
* would have released lwlocks. We probably will add an
|
|
||||||
* LWLockReleaseAll(). But for now make it easier to understand such cases
|
|
||||||
* by warning if any lwlocks are held.
|
|
||||||
*/
|
|
||||||
#ifdef USE_ASSERT_CHECKING
|
|
||||||
{
|
|
||||||
int held_lwlocks = LWLockHeldCount();
|
|
||||||
|
|
||||||
if (held_lwlocks)
|
|
||||||
elog(WARNING, "holding %d lwlocks at the end of ShutdownPostgres()",
|
|
||||||
held_lwlocks);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -694,16 +694,8 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier,
|
|||||||
bgchild = fork();
|
bgchild = fork();
|
||||||
if (bgchild == 0)
|
if (bgchild == 0)
|
||||||
{
|
{
|
||||||
int ret;
|
|
||||||
|
|
||||||
/* in child process */
|
/* in child process */
|
||||||
ret = LogStreamerMain(param);
|
exit(LogStreamerMain(param));
|
||||||
|
|
||||||
/* temp debugging aid to analyze 019_replslot_limit failures */
|
|
||||||
if (verbose)
|
|
||||||
pg_log_info("log streamer with pid %d exiting", getpid());
|
|
||||||
|
|
||||||
exit(ret);
|
|
||||||
}
|
}
|
||||||
else if (bgchild < 0)
|
else if (bgchild < 0)
|
||||||
pg_fatal("could not create background process: %m");
|
pg_fatal("could not create background process: %m");
|
||||||
|
@ -121,7 +121,6 @@ extern void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val);
|
|||||||
extern void LWLockReleaseAll(void);
|
extern void LWLockReleaseAll(void);
|
||||||
extern bool LWLockHeldByMe(LWLock *lock);
|
extern bool LWLockHeldByMe(LWLock *lock);
|
||||||
extern bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode);
|
extern bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode);
|
||||||
extern int LWLockHeldCount(void);
|
|
||||||
|
|
||||||
extern bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval);
|
extern bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval);
|
||||||
extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value);
|
extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value);
|
||||||
|
@ -316,16 +316,13 @@ $node_primary3->append_conf(
|
|||||||
max_wal_size = 2MB
|
max_wal_size = 2MB
|
||||||
log_checkpoints = yes
|
log_checkpoints = yes
|
||||||
max_slot_wal_keep_size = 1MB
|
max_slot_wal_keep_size = 1MB
|
||||||
|
|
||||||
# temp debugging aid to analyze 019_replslot_limit failures
|
|
||||||
log_min_messages=debug3
|
|
||||||
));
|
));
|
||||||
$node_primary3->start;
|
$node_primary3->start;
|
||||||
$node_primary3->safe_psql('postgres',
|
$node_primary3->safe_psql('postgres',
|
||||||
"SELECT pg_create_physical_replication_slot('rep3')");
|
"SELECT pg_create_physical_replication_slot('rep3')");
|
||||||
# Take backup
|
# Take backup
|
||||||
$backup_name = 'my_backup';
|
$backup_name = 'my_backup';
|
||||||
$node_primary3->backup($backup_name, backup_options => ['--verbose']);
|
$node_primary3->backup($backup_name);
|
||||||
# Create standby
|
# Create standby
|
||||||
my $node_standby3 = PostgreSQL::Test::Cluster->new('standby_3');
|
my $node_standby3 = PostgreSQL::Test::Cluster->new('standby_3');
|
||||||
$node_standby3->init_from_backup($node_primary3, $backup_name,
|
$node_standby3->init_from_backup($node_primary3, $backup_name,
|
||||||
@ -336,11 +333,9 @@ $node_primary3->wait_for_catchup($node_standby3);
|
|||||||
|
|
||||||
my $senderpid;
|
my $senderpid;
|
||||||
|
|
||||||
# We've seen occasional cases where multiple walsender pids are active. It
|
# We've seen occasional cases where multiple walsender pids are still active
|
||||||
# could be that we're just observing process shutdown being slow. To collect
|
# at this point, apparently just due to process shutdown being slow. To avoid
|
||||||
# more information, retry a couple times, print a bit of debugging information
|
# spurious failures, retry a couple times.
|
||||||
# each iteration. Don't fail the test if retries find just one pid, the
|
|
||||||
# buildfarm failures are too noisy.
|
|
||||||
my $i = 0;
|
my $i = 0;
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user