2022-03-21 20:02:25 +01:00
|
|
|
/* -------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* pgstat_replslot.c
|
|
|
|
* Implementation of replication slot statistics.
|
|
|
|
*
|
|
|
|
* This file contains the implementation of replication slot statistics. It is kept
|
|
|
|
* separate from pgstat.c to enforce the line between the statistics access /
|
|
|
|
* storage implementation and the details about individual types of
|
|
|
|
* statistics.
|
|
|
|
*
|
2022-09-19 22:37:02 +02:00
|
|
|
* Replication slot stats work a bit different than other variable-numbered
|
|
|
|
* stats. Slots do not have oids (so they can be created on physical
|
|
|
|
* replicas). Use the slot index as object id while running. However, the slot
|
|
|
|
* index can change when restarting. That is addressed by using the name when
|
|
|
|
* (de-)serializing. After a restart it is possible for slots to have been
|
|
|
|
* dropped while shut down, which is addressed by not restoring stats for
|
|
|
|
* slots that cannot be found by name when starting up.
|
2022-04-07 06:29:46 +02:00
|
|
|
*
|
2024-01-04 02:49:05 +01:00
|
|
|
* Copyright (c) 2001-2024, PostgreSQL Global Development Group
|
2022-03-21 20:02:25 +01:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/utils/activity/pgstat_replslot.c
|
|
|
|
* -------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "replication/slot.h"
|
|
|
|
#include "utils/pgstat_internal.h"
|
|
|
|
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
static int get_replslot_index(const char *name);
|
|
|
|
|
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
2022-04-07 02:56:19 +02:00
|
|
|
* Reset counters for a single replication slot.
|
2022-03-21 20:02:25 +01:00
|
|
|
*
|
2022-04-04 21:14:34 +02:00
|
|
|
* Permission checking for this function is managed through the normal
|
|
|
|
* GRANT system.
|
2022-03-21 20:02:25 +01:00
|
|
|
*/
|
|
|
|
void
|
2022-04-07 02:56:19 +02:00
|
|
|
pgstat_reset_replslot(const char *name)
|
2022-03-21 20:02:25 +01:00
|
|
|
{
|
2022-04-07 02:56:19 +02:00
|
|
|
ReplicationSlot *slot;
|
2022-03-21 20:02:25 +01:00
|
|
|
|
2022-10-28 09:19:06 +02:00
|
|
|
Assert(name != NULL);
|
2022-04-07 02:56:19 +02:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* Check if the slot exits with the given name. */
|
2022-04-07 02:56:19 +02:00
|
|
|
slot = SearchNamedReplicationSlot(name, true);
|
2022-03-21 20:02:25 +01:00
|
|
|
|
2022-04-07 02:56:19 +02:00
|
|
|
if (!slot)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("replication slot \"%s\" does not exist",
|
|
|
|
name)));
|
2022-03-21 20:02:25 +01:00
|
|
|
|
2022-04-07 02:56:19 +02:00
|
|
|
/*
|
|
|
|
* Nothing to do for physical slots as we collect stats only for logical
|
|
|
|
* slots.
|
|
|
|
*/
|
|
|
|
if (SlotIsPhysical(slot))
|
|
|
|
return;
|
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* reset this one entry */
|
|
|
|
pgstat_reset(PGSTAT_KIND_REPLSLOT, InvalidOid,
|
|
|
|
ReplicationSlotIndex(slot));
|
2022-03-21 20:02:25 +01:00
|
|
|
}
|
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
2022-04-06 22:56:06 +02:00
|
|
|
* Report replication slot statistics.
|
pgstat: Prevent stats reset from corrupting slotname by removing slotname
Previously PgStat_StatReplSlotEntry contained the slotname, which was mainly
used when writing out the stats during shutdown, to identify the slot in the
serialized data (at runtime the index in ReplicationSlotCtl->replication_slots
is used, but that can change during a restart). Unfortunately the slotname was
overwritten when the slot's stats were reset.
That turned out to only cause "real" problems if the slot was active during
the reset, triggering an assertion failure at the next
pgstat_report_replslot(). In other paths the stats were re-initialized during
pgstat_acquire_replslot().
Fix this by removing slotname from PgStat_StatReplSlotEntry. Instead we can
get the slot's name from the slot itself. Besides fixing a bug, this also is
architecturally cleaner (a name is not really statistics). This is safe
because stats, for a slot removed while shut down, will not be restored at
startup.
In 15 the slotname is not removed, but renamed, to avoid changing the stats
format. In master, bump PGSTAT_FILE_FORMAT_ID.
This commit does not contain a test for the fix. I think this can only be
tested by a tap test starting pg_recvlogical in the background and checking
pg_recvlogical's output. That type of test is notoriously hard to be reliable,
so committing it shortly before the release is wrapped seems like a bad idea.
Reported-by: Jaime Casanova <jcasanov@systemguards.com.ec>
Author: Andres Freund <andres@anarazel.de>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Discussion: https://postgr.es/m/YxfagaTXUNa9ggLb@ahch-to
Backpatch: 15-, where the bug was introduced in 5891c7a8ed8f
2022-10-08 18:33:23 +02:00
|
|
|
*
|
|
|
|
* We can rely on the stats for the slot to exist and to belong to this
|
|
|
|
* slot. We can only get here if pgstat_create_replslot() or
|
|
|
|
* pgstat_acquire_replslot() have already been called.
|
2022-03-21 20:02:25 +01:00
|
|
|
*/
|
|
|
|
void
|
2022-04-07 03:26:17 +02:00
|
|
|
pgstat_report_replslot(ReplicationSlot *slot, const PgStat_StatReplSlotEntry *repSlotStat)
|
2022-03-21 20:02:25 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
PgStat_EntryRef *entry_ref;
|
|
|
|
PgStatShared_ReplSlot *shstatent;
|
|
|
|
PgStat_StatReplSlotEntry *statent;
|
|
|
|
|
|
|
|
entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_REPLSLOT, InvalidOid,
|
|
|
|
ReplicationSlotIndex(slot), false);
|
|
|
|
shstatent = (PgStatShared_ReplSlot *) entry_ref->shared_stats;
|
|
|
|
statent = &shstatent->stats;
|
2022-03-21 20:02:25 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/* Update the replication slot statistics */
|
|
|
|
#define REPLSLOT_ACC(fld) statent->fld += repSlotStat->fld
|
|
|
|
REPLSLOT_ACC(spill_txns);
|
|
|
|
REPLSLOT_ACC(spill_count);
|
|
|
|
REPLSLOT_ACC(spill_bytes);
|
|
|
|
REPLSLOT_ACC(stream_txns);
|
|
|
|
REPLSLOT_ACC(stream_count);
|
|
|
|
REPLSLOT_ACC(stream_bytes);
|
|
|
|
REPLSLOT_ACC(total_txns);
|
|
|
|
REPLSLOT_ACC(total_bytes);
|
|
|
|
#undef REPLSLOT_ACC
|
|
|
|
|
|
|
|
pgstat_unlock_entry(entry_ref);
|
2022-03-21 20:02:25 +01:00
|
|
|
}
|
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
2022-04-06 22:56:06 +02:00
|
|
|
* Report replication slot creation.
|
2022-04-07 03:26:17 +02:00
|
|
|
*
|
|
|
|
* NB: This gets called with ReplicationSlotAllocationLock already held, be
|
|
|
|
* careful about calling back into slot.c.
|
2022-03-21 20:02:25 +01:00
|
|
|
*/
|
|
|
|
void
|
2022-04-07 03:26:17 +02:00
|
|
|
pgstat_create_replslot(ReplicationSlot *slot)
|
2022-03-21 20:02:25 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
PgStat_EntryRef *entry_ref;
|
|
|
|
PgStatShared_ReplSlot *shstatent;
|
|
|
|
|
|
|
|
entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_REPLSLOT, InvalidOid,
|
|
|
|
ReplicationSlotIndex(slot), false);
|
|
|
|
shstatent = (PgStatShared_ReplSlot *) entry_ref->shared_stats;
|
2022-03-21 20:02:25 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
/*
|
|
|
|
* NB: need to accept that there might be stats from an older slot, e.g.
|
|
|
|
* if we previously crashed after dropping a slot.
|
|
|
|
*/
|
|
|
|
memset(&shstatent->stats, 0, sizeof(shstatent->stats));
|
|
|
|
|
|
|
|
pgstat_unlock_entry(entry_ref);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Report replication slot has been acquired.
|
pgstat: Prevent stats reset from corrupting slotname by removing slotname
Previously PgStat_StatReplSlotEntry contained the slotname, which was mainly
used when writing out the stats during shutdown, to identify the slot in the
serialized data (at runtime the index in ReplicationSlotCtl->replication_slots
is used, but that can change during a restart). Unfortunately the slotname was
overwritten when the slot's stats were reset.
That turned out to only cause "real" problems if the slot was active during
the reset, triggering an assertion failure at the next
pgstat_report_replslot(). In other paths the stats were re-initialized during
pgstat_acquire_replslot().
Fix this by removing slotname from PgStat_StatReplSlotEntry. Instead we can
get the slot's name from the slot itself. Besides fixing a bug, this also is
architecturally cleaner (a name is not really statistics). This is safe
because stats, for a slot removed while shut down, will not be restored at
startup.
In 15 the slotname is not removed, but renamed, to avoid changing the stats
format. In master, bump PGSTAT_FILE_FORMAT_ID.
This commit does not contain a test for the fix. I think this can only be
tested by a tap test starting pg_recvlogical in the background and checking
pg_recvlogical's output. That type of test is notoriously hard to be reliable,
so committing it shortly before the release is wrapped seems like a bad idea.
Reported-by: Jaime Casanova <jcasanov@systemguards.com.ec>
Author: Andres Freund <andres@anarazel.de>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Discussion: https://postgr.es/m/YxfagaTXUNa9ggLb@ahch-to
Backpatch: 15-, where the bug was introduced in 5891c7a8ed8f
2022-10-08 18:33:23 +02:00
|
|
|
*
|
|
|
|
* This guarantees that a stats entry exists during later
|
|
|
|
* pgstat_report_replslot() calls.
|
|
|
|
*
|
|
|
|
* If we previously crashed, no stats data exists. But if we did not crash,
|
|
|
|
* the stats do belong to this slot:
|
|
|
|
* - the stats cannot belong to a dropped slot, pgstat_drop_replslot() would
|
|
|
|
* have been called
|
|
|
|
* - if the slot was removed while shut down,
|
|
|
|
* pgstat_replslot_from_serialized_name_cb() returning false would have
|
|
|
|
* caused the stats to be dropped
|
2022-04-07 06:29:46 +02:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
pgstat_acquire_replslot(ReplicationSlot *slot)
|
|
|
|
{
|
pgstat: Prevent stats reset from corrupting slotname by removing slotname
Previously PgStat_StatReplSlotEntry contained the slotname, which was mainly
used when writing out the stats during shutdown, to identify the slot in the
serialized data (at runtime the index in ReplicationSlotCtl->replication_slots
is used, but that can change during a restart). Unfortunately the slotname was
overwritten when the slot's stats were reset.
That turned out to only cause "real" problems if the slot was active during
the reset, triggering an assertion failure at the next
pgstat_report_replslot(). In other paths the stats were re-initialized during
pgstat_acquire_replslot().
Fix this by removing slotname from PgStat_StatReplSlotEntry. Instead we can
get the slot's name from the slot itself. Besides fixing a bug, this also is
architecturally cleaner (a name is not really statistics). This is safe
because stats, for a slot removed while shut down, will not be restored at
startup.
In 15 the slotname is not removed, but renamed, to avoid changing the stats
format. In master, bump PGSTAT_FILE_FORMAT_ID.
This commit does not contain a test for the fix. I think this can only be
tested by a tap test starting pg_recvlogical in the background and checking
pg_recvlogical's output. That type of test is notoriously hard to be reliable,
so committing it shortly before the release is wrapped seems like a bad idea.
Reported-by: Jaime Casanova <jcasanov@systemguards.com.ec>
Author: Andres Freund <andres@anarazel.de>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Discussion: https://postgr.es/m/YxfagaTXUNa9ggLb@ahch-to
Backpatch: 15-, where the bug was introduced in 5891c7a8ed8f
2022-10-08 18:33:23 +02:00
|
|
|
pgstat_get_entry_ref(PGSTAT_KIND_REPLSLOT, InvalidOid,
|
|
|
|
ReplicationSlotIndex(slot), true, NULL);
|
2022-03-21 20:02:25 +01:00
|
|
|
}
|
|
|
|
|
2022-04-04 21:14:34 +02:00
|
|
|
/*
|
2022-04-06 22:56:06 +02:00
|
|
|
* Report replication slot drop.
|
2022-03-21 20:02:25 +01:00
|
|
|
*/
|
|
|
|
void
|
2022-04-07 03:26:17 +02:00
|
|
|
pgstat_drop_replslot(ReplicationSlot *slot)
|
2022-03-21 20:02:25 +01:00
|
|
|
{
|
2022-04-07 06:29:46 +02:00
|
|
|
pgstat_drop_entry(PGSTAT_KIND_REPLSLOT, InvalidOid,
|
|
|
|
ReplicationSlotIndex(slot));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Support function for the SQL-callable pgstat* functions. Returns
|
|
|
|
* a pointer to the replication slot statistics struct.
|
|
|
|
*/
|
|
|
|
PgStat_StatReplSlotEntry *
|
|
|
|
pgstat_fetch_replslot(NameData slotname)
|
|
|
|
{
|
|
|
|
int idx = get_replslot_index(NameStr(slotname));
|
|
|
|
|
|
|
|
if (idx == -1)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return (PgStat_StatReplSlotEntry *)
|
|
|
|
pgstat_fetch_entry(PGSTAT_KIND_REPLSLOT, InvalidOid, idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
pgstat: Prevent stats reset from corrupting slotname by removing slotname
Previously PgStat_StatReplSlotEntry contained the slotname, which was mainly
used when writing out the stats during shutdown, to identify the slot in the
serialized data (at runtime the index in ReplicationSlotCtl->replication_slots
is used, but that can change during a restart). Unfortunately the slotname was
overwritten when the slot's stats were reset.
That turned out to only cause "real" problems if the slot was active during
the reset, triggering an assertion failure at the next
pgstat_report_replslot(). In other paths the stats were re-initialized during
pgstat_acquire_replslot().
Fix this by removing slotname from PgStat_StatReplSlotEntry. Instead we can
get the slot's name from the slot itself. Besides fixing a bug, this also is
architecturally cleaner (a name is not really statistics). This is safe
because stats, for a slot removed while shut down, will not be restored at
startup.
In 15 the slotname is not removed, but renamed, to avoid changing the stats
format. In master, bump PGSTAT_FILE_FORMAT_ID.
This commit does not contain a test for the fix. I think this can only be
tested by a tap test starting pg_recvlogical in the background and checking
pg_recvlogical's output. That type of test is notoriously hard to be reliable,
so committing it shortly before the release is wrapped seems like a bad idea.
Reported-by: Jaime Casanova <jcasanov@systemguards.com.ec>
Author: Andres Freund <andres@anarazel.de>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Discussion: https://postgr.es/m/YxfagaTXUNa9ggLb@ahch-to
Backpatch: 15-, where the bug was introduced in 5891c7a8ed8f
2022-10-08 18:33:23 +02:00
|
|
|
pgstat_replslot_to_serialized_name_cb(const PgStat_HashKey *key, const PgStatShared_Common *header, NameData *name)
|
2022-04-07 06:29:46 +02:00
|
|
|
{
|
pgstat: Prevent stats reset from corrupting slotname by removing slotname
Previously PgStat_StatReplSlotEntry contained the slotname, which was mainly
used when writing out the stats during shutdown, to identify the slot in the
serialized data (at runtime the index in ReplicationSlotCtl->replication_slots
is used, but that can change during a restart). Unfortunately the slotname was
overwritten when the slot's stats were reset.
That turned out to only cause "real" problems if the slot was active during
the reset, triggering an assertion failure at the next
pgstat_report_replslot(). In other paths the stats were re-initialized during
pgstat_acquire_replslot().
Fix this by removing slotname from PgStat_StatReplSlotEntry. Instead we can
get the slot's name from the slot itself. Besides fixing a bug, this also is
architecturally cleaner (a name is not really statistics). This is safe
because stats, for a slot removed while shut down, will not be restored at
startup.
In 15 the slotname is not removed, but renamed, to avoid changing the stats
format. In master, bump PGSTAT_FILE_FORMAT_ID.
This commit does not contain a test for the fix. I think this can only be
tested by a tap test starting pg_recvlogical in the background and checking
pg_recvlogical's output. That type of test is notoriously hard to be reliable,
so committing it shortly before the release is wrapped seems like a bad idea.
Reported-by: Jaime Casanova <jcasanov@systemguards.com.ec>
Author: Andres Freund <andres@anarazel.de>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Discussion: https://postgr.es/m/YxfagaTXUNa9ggLb@ahch-to
Backpatch: 15-, where the bug was introduced in 5891c7a8ed8f
2022-10-08 18:33:23 +02:00
|
|
|
/*
|
|
|
|
* This is only called late during shutdown. The set of existing slots
|
|
|
|
* isn't allowed to change at this point, we can assume that a slot exists
|
|
|
|
* at the offset.
|
|
|
|
*/
|
|
|
|
if (!ReplicationSlotName(key->objoid, name))
|
|
|
|
elog(ERROR, "could not find name for replication slot index %u",
|
|
|
|
key->objoid);
|
2022-04-07 06:29:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
pgstat_replslot_from_serialized_name_cb(const NameData *name, PgStat_HashKey *key)
|
|
|
|
{
|
|
|
|
int idx = get_replslot_index(NameStr(*name));
|
|
|
|
|
|
|
|
/* slot might have been deleted */
|
|
|
|
if (idx == -1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
key->kind = PGSTAT_KIND_REPLSLOT;
|
|
|
|
key->dboid = InvalidOid;
|
|
|
|
key->objoid = idx;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
pgstat_replslot_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts)
|
|
|
|
{
|
|
|
|
((PgStatShared_ReplSlot *) header)->stats.stat_reset_timestamp = ts;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
get_replslot_index(const char *name)
|
|
|
|
{
|
|
|
|
ReplicationSlot *slot;
|
|
|
|
|
2022-10-28 09:19:06 +02:00
|
|
|
Assert(name != NULL);
|
2022-04-07 06:29:46 +02:00
|
|
|
|
|
|
|
slot = SearchNamedReplicationSlot(name, true);
|
|
|
|
|
|
|
|
if (!slot)
|
|
|
|
return -1;
|
2022-03-21 20:02:25 +01:00
|
|
|
|
2022-04-07 06:29:46 +02:00
|
|
|
return ReplicationSlotIndex(slot);
|
2022-03-21 20:02:25 +01:00
|
|
|
}
|