2014-02-01 04:45:17 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* slotfuncs.c
|
|
|
|
* Support functions for replication slots
|
|
|
|
*
|
2021-01-02 19:06:25 +01:00
|
|
|
* Copyright (c) 2012-2021, PostgreSQL Global Development Group
|
2014-02-01 04:45:17 +01:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/replication/slotfuncs.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2019-04-05 19:52:45 +02:00
|
|
|
#include "access/htup_details.h"
|
|
|
|
#include "access/xlog_internal.h"
|
2020-03-17 22:18:01 +01:00
|
|
|
#include "access/xlogutils.h"
|
2014-02-01 04:45:17 +01:00
|
|
|
#include "funcapi.h"
|
|
|
|
#include "miscadmin.h"
|
2018-01-17 12:38:34 +01:00
|
|
|
#include "replication/decode.h"
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
#include "replication/logical.h"
|
2019-11-12 04:00:16 +01:00
|
|
|
#include "replication/slot.h"
|
2014-02-01 04:45:17 +01:00
|
|
|
#include "utils/builtins.h"
|
2018-01-17 12:38:34 +01:00
|
|
|
#include "utils/inval.h"
|
2014-02-19 17:13:44 +01:00
|
|
|
#include "utils/pg_lsn.h"
|
2018-01-17 12:38:34 +01:00
|
|
|
#include "utils/resowner.h"
|
2014-02-01 04:45:17 +01:00
|
|
|
|
|
|
|
static void
|
|
|
|
check_permissions(void)
|
|
|
|
{
|
2014-12-23 19:35:49 +01:00
|
|
|
if (!superuser() && !has_rolreplication(GetUserId()))
|
2014-02-01 04:45:17 +01:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
|
2020-01-30 17:32:04 +01:00
|
|
|
errmsg("must be superuser or replication role to use replication slots")));
|
2014-02-01 04:45:17 +01:00
|
|
|
}
|
|
|
|
|
2019-04-05 19:52:45 +02:00
|
|
|
/*
|
|
|
|
* Helper function for creating a new physical replication slot with
|
|
|
|
* given arguments. Note that this function doesn't release the created
|
|
|
|
* slot.
|
|
|
|
*
|
|
|
|
* If restart_lsn is a valid value, we use it without WAL reservation
|
|
|
|
* routine. So the caller must guarantee that WAL is available.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
create_physical_replication_slot(char *name, bool immediately_reserve,
|
|
|
|
bool temporary, XLogRecPtr restart_lsn)
|
|
|
|
{
|
|
|
|
Assert(!MyReplicationSlot);
|
|
|
|
|
|
|
|
/* acquire replication slot, this will check for conflicting names */
|
|
|
|
ReplicationSlotCreate(name, false,
|
|
|
|
temporary ? RS_TEMPORARY : RS_PERSISTENT);
|
|
|
|
|
|
|
|
if (immediately_reserve)
|
|
|
|
{
|
|
|
|
/* Reserve WAL as the user asked for it */
|
|
|
|
if (XLogRecPtrIsInvalid(restart_lsn))
|
|
|
|
ReplicationSlotReserveWal();
|
|
|
|
else
|
|
|
|
MyReplicationSlot->data.restart_lsn = restart_lsn;
|
|
|
|
|
|
|
|
/* Write this slot to disk */
|
|
|
|
ReplicationSlotMarkDirty();
|
|
|
|
ReplicationSlotSave();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-01 04:45:17 +01:00
|
|
|
/*
|
|
|
|
* SQL function for creating a new physical (streaming replication)
|
|
|
|
* replication slot.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_create_physical_replication_slot(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
Name name = PG_GETARG_NAME(0);
|
2016-06-10 00:02:36 +02:00
|
|
|
bool immediately_reserve = PG_GETARG_BOOL(1);
|
2016-12-08 18:00:00 +01:00
|
|
|
bool temporary = PG_GETARG_BOOL(2);
|
2014-02-01 04:45:17 +01:00
|
|
|
Datum values[2];
|
|
|
|
bool nulls[2];
|
|
|
|
TupleDesc tupdesc;
|
|
|
|
HeapTuple tuple;
|
|
|
|
Datum result;
|
|
|
|
|
|
|
|
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
|
|
|
elog(ERROR, "return type must be a row type");
|
|
|
|
|
2014-06-12 13:23:46 +02:00
|
|
|
check_permissions();
|
|
|
|
|
|
|
|
CheckSlotRequirements();
|
|
|
|
|
2019-04-05 19:52:45 +02:00
|
|
|
create_physical_replication_slot(NameStr(*name),
|
|
|
|
immediately_reserve,
|
|
|
|
temporary,
|
|
|
|
InvalidXLogRecPtr);
|
2014-02-01 04:45:17 +01:00
|
|
|
|
2014-03-03 13:16:54 +01:00
|
|
|
values[0] = NameGetDatum(&MyReplicationSlot->data.name);
|
2014-02-01 04:45:17 +01:00
|
|
|
nulls[0] = false;
|
2015-08-11 12:34:31 +02:00
|
|
|
|
|
|
|
if (immediately_reserve)
|
|
|
|
{
|
|
|
|
values[1] = LSNGetDatum(MyReplicationSlot->data.restart_lsn);
|
|
|
|
nulls[1] = false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
nulls[1] = true;
|
2014-02-01 04:45:17 +01:00
|
|
|
|
|
|
|
tuple = heap_form_tuple(tupdesc, values, nulls);
|
|
|
|
result = HeapTupleGetDatum(tuple);
|
|
|
|
|
|
|
|
ReplicationSlotRelease();
|
|
|
|
|
|
|
|
PG_RETURN_DATUM(result);
|
|
|
|
}
|
|
|
|
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
|
|
|
/*
|
2019-04-05 19:52:45 +02:00
|
|
|
* Helper function for creating a new logical replication slot with
|
|
|
|
* given arguments. Note that this function doesn't release the created
|
|
|
|
* slot.
|
2020-03-17 20:13:18 +01:00
|
|
|
*
|
|
|
|
* When find_startpoint is false, the slot's confirmed_flush is not set; it's
|
|
|
|
* caller's responsibility to ensure it's set to something sensible.
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
*/
|
2019-04-05 19:52:45 +02:00
|
|
|
static void
|
|
|
|
create_logical_replication_slot(char *name, char *plugin,
|
2020-03-17 20:13:18 +01:00
|
|
|
bool temporary, XLogRecPtr restart_lsn,
|
|
|
|
bool find_startpoint)
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
{
|
|
|
|
LogicalDecodingContext *ctx = NULL;
|
|
|
|
|
2014-06-12 13:23:46 +02:00
|
|
|
Assert(!MyReplicationSlot);
|
|
|
|
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
/*
|
2015-05-24 03:35:49 +02:00
|
|
|
* Acquire a logical decoding slot, this will check for conflicting names.
|
2017-05-17 22:31:56 +02:00
|
|
|
* Initially create persistent slot as ephemeral - that allows us to
|
|
|
|
* nicely handle errors during initialization because it'll get dropped if
|
|
|
|
* this transaction fails. We'll make it persistent at the end. Temporary
|
|
|
|
* slots can be created as temporary from beginning as they get dropped on
|
|
|
|
* error as well.
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
*/
|
2019-04-05 19:52:45 +02:00
|
|
|
ReplicationSlotCreate(name, true,
|
2016-12-08 18:00:00 +01:00
|
|
|
temporary ? RS_TEMPORARY : RS_EPHEMERAL);
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
|
|
|
/*
|
2020-03-17 20:13:18 +01:00
|
|
|
* Create logical decoding context to find start point or, if we don't
|
|
|
|
* need it, to 1) bump slot's restart_lsn and xmin 2) check plugin sanity.
|
|
|
|
*
|
|
|
|
* Note: when !find_startpoint this is still important, because it's at
|
|
|
|
* this point that the output plugin is validated.
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
*/
|
2019-04-05 19:52:45 +02:00
|
|
|
ctx = CreateInitDecodingContext(plugin, NIL,
|
2020-03-17 20:13:18 +01:00
|
|
|
false, /* just catalogs is OK */
|
2019-04-05 19:52:45 +02:00
|
|
|
restart_lsn,
|
2020-05-08 21:30:34 +02:00
|
|
|
XL_ROUTINE(.page_read = read_local_xlog_page,
|
|
|
|
.segment_open = wal_segment_open,
|
|
|
|
.segment_close = wal_segment_close),
|
|
|
|
NULL, NULL, NULL);
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
2020-03-17 20:13:18 +01:00
|
|
|
/*
|
|
|
|
* If caller needs us to determine the decoding start point, do so now.
|
|
|
|
* This might take a while.
|
|
|
|
*/
|
|
|
|
if (find_startpoint)
|
|
|
|
DecodingContextFindStartpoint(ctx);
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
|
|
|
/* don't need the decoding context anymore */
|
|
|
|
FreeDecodingContext(ctx);
|
2019-04-05 19:52:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SQL function for creating a new logical replication slot.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
Name name = PG_GETARG_NAME(0);
|
|
|
|
Name plugin = PG_GETARG_NAME(1);
|
|
|
|
bool temporary = PG_GETARG_BOOL(2);
|
|
|
|
Datum result;
|
|
|
|
TupleDesc tupdesc;
|
|
|
|
HeapTuple tuple;
|
|
|
|
Datum values[2];
|
|
|
|
bool nulls[2];
|
|
|
|
|
|
|
|
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
|
|
|
elog(ERROR, "return type must be a row type");
|
|
|
|
|
|
|
|
check_permissions();
|
|
|
|
|
|
|
|
CheckLogicalDecodingRequirements();
|
|
|
|
|
|
|
|
create_logical_replication_slot(NameStr(*name),
|
|
|
|
NameStr(*plugin),
|
|
|
|
temporary,
|
2020-03-17 20:13:18 +01:00
|
|
|
InvalidXLogRecPtr,
|
|
|
|
true);
|
2019-04-05 19:52:45 +02:00
|
|
|
|
|
|
|
values[0] = NameGetDatum(&MyReplicationSlot->data.name);
|
|
|
|
values[1] = LSNGetDatum(MyReplicationSlot->data.confirmed_flush);
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
|
|
|
memset(nulls, 0, sizeof(nulls));
|
|
|
|
|
|
|
|
tuple = heap_form_tuple(tupdesc, values, nulls);
|
|
|
|
result = HeapTupleGetDatum(tuple);
|
|
|
|
|
2016-12-08 18:00:00 +01:00
|
|
|
/* ok, slot is now fully created, mark it as persistent if needed */
|
|
|
|
if (!temporary)
|
|
|
|
ReplicationSlotPersist();
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
ReplicationSlotRelease();
|
|
|
|
|
|
|
|
PG_RETURN_DATUM(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-02-01 04:45:17 +01:00
|
|
|
/*
|
|
|
|
* SQL function for dropping a replication slot.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_drop_replication_slot(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
Name name = PG_GETARG_NAME(0);
|
|
|
|
|
|
|
|
check_permissions();
|
|
|
|
|
|
|
|
CheckSlotRequirements();
|
|
|
|
|
2017-09-01 13:44:14 +02:00
|
|
|
ReplicationSlotDrop(NameStr(*name), true);
|
2014-02-01 04:45:17 +01:00
|
|
|
|
|
|
|
PG_RETURN_VOID();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pg_get_replication_slots - SQL SRF showing active replication slots.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_get_replication_slots(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2020-04-08 00:35:00 +02:00
|
|
|
#define PG_GET_REPLICATION_SLOTS_COLS 13
|
2014-02-01 04:45:17 +01:00
|
|
|
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
|
|
|
|
TupleDesc tupdesc;
|
|
|
|
Tuplestorestate *tupstore;
|
|
|
|
MemoryContext per_query_ctx;
|
|
|
|
MemoryContext oldcontext;
|
2020-07-07 19:08:00 +02:00
|
|
|
XLogRecPtr currlsn;
|
2014-02-01 04:45:17 +01:00
|
|
|
int slotno;
|
|
|
|
|
|
|
|
/* check to see if caller supports us returning a tuplestore */
|
|
|
|
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("set-valued function called in context that cannot accept a set")));
|
|
|
|
if (!(rsinfo->allowedModes & SFRM_Materialize))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2019-12-24 16:37:13 +01:00
|
|
|
errmsg("materialize mode required, but it is not allowed in this context")));
|
2014-02-01 04:45:17 +01:00
|
|
|
|
|
|
|
/* Build a tuple descriptor for our result type */
|
|
|
|
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
|
|
|
elog(ERROR, "return type must be a row type");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't require any special permission to see this function's data
|
|
|
|
* because nothing should be sensitive. The most critical being the slot
|
|
|
|
* name, which shouldn't contain anything particularly sensitive.
|
|
|
|
*/
|
|
|
|
|
|
|
|
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
|
|
|
|
oldcontext = MemoryContextSwitchTo(per_query_ctx);
|
|
|
|
|
|
|
|
tupstore = tuplestore_begin_heap(true, false, work_mem);
|
|
|
|
rsinfo->returnMode = SFRM_Materialize;
|
|
|
|
rsinfo->setResult = tupstore;
|
|
|
|
rsinfo->setDesc = tupdesc;
|
|
|
|
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
|
|
|
2020-07-07 19:08:00 +02:00
|
|
|
currlsn = GetXLogWriteRecPtr();
|
|
|
|
|
2017-07-25 19:26:49 +02:00
|
|
|
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
|
2014-02-01 04:45:17 +01:00
|
|
|
for (slotno = 0; slotno < max_replication_slots; slotno++)
|
|
|
|
{
|
|
|
|
ReplicationSlot *slot = &ReplicationSlotCtl->replication_slots[slotno];
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
ReplicationSlot slot_contents;
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
Datum values[PG_GET_REPLICATION_SLOTS_COLS];
|
|
|
|
bool nulls[PG_GET_REPLICATION_SLOTS_COLS];
|
2020-04-08 00:35:00 +02:00
|
|
|
WALAvailability walstate;
|
2014-02-01 04:45:17 +01:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!slot->in_use)
|
|
|
|
continue;
|
2017-07-25 19:26:49 +02:00
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
/* Copy slot contents while holding spinlock, then examine at leisure */
|
2017-07-25 19:26:49 +02:00
|
|
|
SpinLockAcquire(&slot->mutex);
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
slot_contents = *slot;
|
2014-02-01 04:45:17 +01:00
|
|
|
SpinLockRelease(&slot->mutex);
|
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
memset(values, 0, sizeof(values));
|
2014-02-01 04:45:17 +01:00
|
|
|
memset(nulls, 0, sizeof(nulls));
|
|
|
|
|
|
|
|
i = 0;
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
values[i++] = NameGetDatum(&slot_contents.data.name);
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
if (slot_contents.data.database == InvalidOid)
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
nulls[i++] = true;
|
|
|
|
else
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
values[i++] = NameGetDatum(&slot_contents.data.plugin);
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
if (slot_contents.data.database == InvalidOid)
|
2014-02-01 04:45:17 +01:00
|
|
|
values[i++] = CStringGetTextDatum("physical");
|
|
|
|
else
|
|
|
|
values[i++] = CStringGetTextDatum("logical");
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
if (slot_contents.data.database == InvalidOid)
|
2014-03-03 13:16:54 +01:00
|
|
|
nulls[i++] = true;
|
|
|
|
else
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
values[i++] = ObjectIdGetDatum(slot_contents.data.database);
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
values[i++] = BoolGetDatum(slot_contents.data.persistency == RS_TEMPORARY);
|
|
|
|
values[i++] = BoolGetDatum(slot_contents.active_pid != 0);
|
2015-04-21 11:51:06 +02:00
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
if (slot_contents.active_pid != 0)
|
|
|
|
values[i++] = Int32GetDatum(slot_contents.active_pid);
|
2015-04-21 11:51:06 +02:00
|
|
|
else
|
|
|
|
nulls[i++] = true;
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
if (slot_contents.data.xmin != InvalidTransactionId)
|
|
|
|
values[i++] = TransactionIdGetDatum(slot_contents.data.xmin);
|
2014-02-01 04:45:17 +01:00
|
|
|
else
|
|
|
|
nulls[i++] = true;
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
if (slot_contents.data.catalog_xmin != InvalidTransactionId)
|
|
|
|
values[i++] = TransactionIdGetDatum(slot_contents.data.catalog_xmin);
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
else
|
|
|
|
nulls[i++] = true;
|
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
if (slot_contents.data.restart_lsn != InvalidXLogRecPtr)
|
|
|
|
values[i++] = LSNGetDatum(slot_contents.data.restart_lsn);
|
2014-02-01 04:45:17 +01:00
|
|
|
else
|
|
|
|
nulls[i++] = true;
|
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
if (slot_contents.data.confirmed_flush != InvalidXLogRecPtr)
|
|
|
|
values[i++] = LSNGetDatum(slot_contents.data.confirmed_flush);
|
2015-08-10 13:28:18 +02:00
|
|
|
else
|
|
|
|
nulls[i++] = true;
|
|
|
|
|
2020-06-24 20:15:17 +02:00
|
|
|
/*
|
2020-06-27 02:41:29 +02:00
|
|
|
* If invalidated_at is valid and restart_lsn is invalid, we know for
|
|
|
|
* certain that the slot has been invalidated. Otherwise, test
|
|
|
|
* availability from restart_lsn.
|
2020-06-24 20:15:17 +02:00
|
|
|
*/
|
2020-06-27 02:41:29 +02:00
|
|
|
if (XLogRecPtrIsInvalid(slot_contents.data.restart_lsn) &&
|
|
|
|
!XLogRecPtrIsInvalid(slot_contents.data.invalidated_at))
|
|
|
|
walstate = WALAVAIL_REMOVED;
|
|
|
|
else
|
|
|
|
walstate = GetWALAvailability(slot_contents.data.restart_lsn);
|
2020-04-08 00:35:00 +02:00
|
|
|
|
|
|
|
switch (walstate)
|
|
|
|
{
|
|
|
|
case WALAVAIL_INVALID_LSN:
|
|
|
|
nulls[i++] = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case WALAVAIL_RESERVED:
|
|
|
|
values[i++] = CStringGetTextDatum("reserved");
|
|
|
|
break;
|
|
|
|
|
2020-06-24 20:23:39 +02:00
|
|
|
case WALAVAIL_EXTENDED:
|
|
|
|
values[i++] = CStringGetTextDatum("extended");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case WALAVAIL_UNRESERVED:
|
|
|
|
values[i++] = CStringGetTextDatum("unreserved");
|
|
|
|
break;
|
|
|
|
|
2020-04-08 00:35:00 +02:00
|
|
|
case WALAVAIL_REMOVED:
|
2020-06-24 20:23:39 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we read the restart_lsn long enough ago, maybe that file
|
|
|
|
* has been removed by now. However, the walsender could have
|
|
|
|
* moved forward enough that it jumped to another file after
|
|
|
|
* we looked. If checkpointer signalled the process to
|
|
|
|
* termination, then it's definitely lost; but if a process is
|
|
|
|
* still alive, then "unreserved" seems more appropriate.
|
2020-07-07 19:08:00 +02:00
|
|
|
*
|
|
|
|
* If we do change it, save the state for safe_wal_size below.
|
2020-06-24 20:23:39 +02:00
|
|
|
*/
|
|
|
|
if (!XLogRecPtrIsInvalid(slot_contents.data.restart_lsn))
|
|
|
|
{
|
|
|
|
int pid;
|
|
|
|
|
|
|
|
SpinLockAcquire(&slot->mutex);
|
|
|
|
pid = slot->active_pid;
|
2020-07-07 19:08:00 +02:00
|
|
|
slot_contents.data.restart_lsn = slot->data.restart_lsn;
|
2020-06-24 20:23:39 +02:00
|
|
|
SpinLockRelease(&slot->mutex);
|
|
|
|
if (pid != 0)
|
|
|
|
{
|
|
|
|
values[i++] = CStringGetTextDatum("unreserved");
|
2020-07-07 19:08:00 +02:00
|
|
|
walstate = WALAVAIL_UNRESERVED;
|
2020-06-24 20:23:39 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2020-04-08 00:35:00 +02:00
|
|
|
values[i++] = CStringGetTextDatum("lost");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2020-07-07 19:08:00 +02:00
|
|
|
/*
|
|
|
|
* safe_wal_size is only computed for slots that have not been lost,
|
|
|
|
* and only if there's a configured maximum size.
|
|
|
|
*/
|
|
|
|
if (walstate == WALAVAIL_REMOVED || max_slot_wal_keep_size_mb < 0)
|
|
|
|
nulls[i++] = true;
|
|
|
|
else
|
2020-04-08 00:35:00 +02:00
|
|
|
{
|
2020-07-07 19:08:00 +02:00
|
|
|
XLogSegNo targetSeg;
|
2020-07-20 06:30:18 +02:00
|
|
|
uint64 slotKeepSegs;
|
|
|
|
uint64 keepSegs;
|
2020-07-07 19:08:00 +02:00
|
|
|
XLogSegNo failSeg;
|
|
|
|
XLogRecPtr failLSN;
|
2020-04-08 00:35:00 +02:00
|
|
|
|
2020-07-07 19:08:00 +02:00
|
|
|
XLByteToSeg(slot_contents.data.restart_lsn, targetSeg, wal_segment_size);
|
|
|
|
|
2020-07-20 06:30:18 +02:00
|
|
|
/* determine how many segments slots can be kept by slots */
|
|
|
|
slotKeepSegs = XLogMBVarToSegs(max_slot_wal_keep_size_mb, wal_segment_size);
|
|
|
|
/* ditto for wal_keep_size */
|
|
|
|
keepSegs = XLogMBVarToSegs(wal_keep_size_mb, wal_segment_size);
|
2020-07-07 19:08:00 +02:00
|
|
|
|
|
|
|
/* if currpos reaches failLSN, we lose our segment */
|
2020-07-20 06:30:18 +02:00
|
|
|
failSeg = targetSeg + Max(slotKeepSegs, keepSegs) + 1;
|
2020-07-07 19:08:00 +02:00
|
|
|
XLogSegNoOffsetToRecPtr(failSeg, 0, wal_segment_size, failLSN);
|
|
|
|
|
|
|
|
values[i++] = Int64GetDatum(failLSN - currlsn);
|
2020-04-08 00:35:00 +02:00
|
|
|
}
|
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
Assert(i == PG_GET_REPLICATION_SLOTS_COLS);
|
|
|
|
|
2014-02-01 04:45:17 +01:00
|
|
|
tuplestore_putvalues(tupstore, tupdesc, values, nulls);
|
|
|
|
}
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
|
2017-07-25 19:26:49 +02:00
|
|
|
LWLockRelease(ReplicationSlotControlLock);
|
2014-02-01 04:45:17 +01:00
|
|
|
|
|
|
|
tuplestore_donestoring(tupstore);
|
|
|
|
|
|
|
|
return (Datum) 0;
|
|
|
|
}
|
2018-01-17 12:38:34 +01:00
|
|
|
|
|
|
|
/*
|
2018-07-19 20:15:44 +02:00
|
|
|
* Helper function for advancing our physical replication slot forward.
|
|
|
|
*
|
|
|
|
* The LSN position to move to is compared simply to the slot's restart_lsn,
|
|
|
|
* knowing that any position older than that would be removed by successive
|
|
|
|
* checkpoints.
|
2018-01-17 12:38:34 +01:00
|
|
|
*/
|
|
|
|
static XLogRecPtr
|
Fix a couple of bugs with replication slot advancing feature
A review of the code has showed up a couple of issues fixed by this
commit:
- Physical slots have been using the confirmed LSN position as a start
comparison point which is always 0/0, instead use the restart LSN
position (logical slots need to use the confirmed LSN position, which
was correct).
- The actual slot update was incorrect for both physical and logical
slots. Physical slots need to use their restart_lsn as base comparison
point (confirmed_flush was used because of previous point), and logical
slots need to begin reading WAL from restart_lsn (confirmed_flush was
used as well), while confirmed_flush is compiled depending on the
decoding context and record read, and is the LSN position returned back
to the caller.
- Never return 0/0 if a slot cannot be advanced. This way, if a slot is
advanced while the activity is idle, then the same position is returned
to the caller over and over without raising an error. Instead return
the LSN the slot has been advanced to. With repetitive calls, the same
position is returned hence caller can directly monitor the difference in
progress in bytes by doing simply LSN difference calculations, which
should be monotonic.
Note that as the slot is owned by the backend advancing it, then the
read of those fields is fine lock-less, while updates need to happen
while the slot mutex is held, so fix that on the way as well. Other
locks for in-memory data of replication slots have been already fixed
previously.
Some of those issues have been pointed out by Petr and Simon during the
patch, while I noticed some of them after looking at the code. This
also visibly takes of a recently-discovered bug causing assertion
failures which can be triggered by a two-step slot forwarding which
first advanced the slot to a WAL page boundary and secondly advanced it
to the latest position, say 'FF/FFFFFFF' to make sure that the newest
LSN is used as forward point. It would have been nice to drop a test
for that, but the set of operators working on pg_lsn limits it, so this
is left for a future exercise.
Author: Michael Paquier
Reviewed-by: Petr Jelinek, Simon Riggs
Discussion: https://postgr.es/m/CANP8+jLyS=X-CAk59BJnsxKQfjwrmKicHQykyn52Qj-Q=9GLCw@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/2840048a-1184-417a-9da8-3299d207a1d7%40postgrespro.ru
2018-06-11 02:26:13 +02:00
|
|
|
pg_physical_replication_slot_advance(XLogRecPtr moveto)
|
2018-01-17 12:38:34 +01:00
|
|
|
{
|
Fix a couple of bugs with replication slot advancing feature
A review of the code has showed up a couple of issues fixed by this
commit:
- Physical slots have been using the confirmed LSN position as a start
comparison point which is always 0/0, instead use the restart LSN
position (logical slots need to use the confirmed LSN position, which
was correct).
- The actual slot update was incorrect for both physical and logical
slots. Physical slots need to use their restart_lsn as base comparison
point (confirmed_flush was used because of previous point), and logical
slots need to begin reading WAL from restart_lsn (confirmed_flush was
used as well), while confirmed_flush is compiled depending on the
decoding context and record read, and is the LSN position returned back
to the caller.
- Never return 0/0 if a slot cannot be advanced. This way, if a slot is
advanced while the activity is idle, then the same position is returned
to the caller over and over without raising an error. Instead return
the LSN the slot has been advanced to. With repetitive calls, the same
position is returned hence caller can directly monitor the difference in
progress in bytes by doing simply LSN difference calculations, which
should be monotonic.
Note that as the slot is owned by the backend advancing it, then the
read of those fields is fine lock-less, while updates need to happen
while the slot mutex is held, so fix that on the way as well. Other
locks for in-memory data of replication slots have been already fixed
previously.
Some of those issues have been pointed out by Petr and Simon during the
patch, while I noticed some of them after looking at the code. This
also visibly takes of a recently-discovered bug causing assertion
failures which can be triggered by a two-step slot forwarding which
first advanced the slot to a WAL page boundary and secondly advanced it
to the latest position, say 'FF/FFFFFFF' to make sure that the newest
LSN is used as forward point. It would have been nice to drop a test
for that, but the set of operators working on pg_lsn limits it, so this
is left for a future exercise.
Author: Michael Paquier
Reviewed-by: Petr Jelinek, Simon Riggs
Discussion: https://postgr.es/m/CANP8+jLyS=X-CAk59BJnsxKQfjwrmKicHQykyn52Qj-Q=9GLCw@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/2840048a-1184-417a-9da8-3299d207a1d7%40postgrespro.ru
2018-06-11 02:26:13 +02:00
|
|
|
XLogRecPtr startlsn = MyReplicationSlot->data.restart_lsn;
|
|
|
|
XLogRecPtr retlsn = startlsn;
|
2018-01-17 12:38:34 +01:00
|
|
|
|
2020-04-08 00:35:00 +02:00
|
|
|
Assert(moveto != InvalidXLogRecPtr);
|
|
|
|
|
Fix a couple of bugs with replication slot advancing feature
A review of the code has showed up a couple of issues fixed by this
commit:
- Physical slots have been using the confirmed LSN position as a start
comparison point which is always 0/0, instead use the restart LSN
position (logical slots need to use the confirmed LSN position, which
was correct).
- The actual slot update was incorrect for both physical and logical
slots. Physical slots need to use their restart_lsn as base comparison
point (confirmed_flush was used because of previous point), and logical
slots need to begin reading WAL from restart_lsn (confirmed_flush was
used as well), while confirmed_flush is compiled depending on the
decoding context and record read, and is the LSN position returned back
to the caller.
- Never return 0/0 if a slot cannot be advanced. This way, if a slot is
advanced while the activity is idle, then the same position is returned
to the caller over and over without raising an error. Instead return
the LSN the slot has been advanced to. With repetitive calls, the same
position is returned hence caller can directly monitor the difference in
progress in bytes by doing simply LSN difference calculations, which
should be monotonic.
Note that as the slot is owned by the backend advancing it, then the
read of those fields is fine lock-less, while updates need to happen
while the slot mutex is held, so fix that on the way as well. Other
locks for in-memory data of replication slots have been already fixed
previously.
Some of those issues have been pointed out by Petr and Simon during the
patch, while I noticed some of them after looking at the code. This
also visibly takes of a recently-discovered bug causing assertion
failures which can be triggered by a two-step slot forwarding which
first advanced the slot to a WAL page boundary and secondly advanced it
to the latest position, say 'FF/FFFFFFF' to make sure that the newest
LSN is used as forward point. It would have been nice to drop a test
for that, but the set of operators working on pg_lsn limits it, so this
is left for a future exercise.
Author: Michael Paquier
Reviewed-by: Petr Jelinek, Simon Riggs
Discussion: https://postgr.es/m/CANP8+jLyS=X-CAk59BJnsxKQfjwrmKicHQykyn52Qj-Q=9GLCw@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/2840048a-1184-417a-9da8-3299d207a1d7%40postgrespro.ru
2018-06-11 02:26:13 +02:00
|
|
|
if (startlsn < moveto)
|
2018-01-17 12:38:34 +01:00
|
|
|
{
|
Fix a couple of bugs with replication slot advancing feature
A review of the code has showed up a couple of issues fixed by this
commit:
- Physical slots have been using the confirmed LSN position as a start
comparison point which is always 0/0, instead use the restart LSN
position (logical slots need to use the confirmed LSN position, which
was correct).
- The actual slot update was incorrect for both physical and logical
slots. Physical slots need to use their restart_lsn as base comparison
point (confirmed_flush was used because of previous point), and logical
slots need to begin reading WAL from restart_lsn (confirmed_flush was
used as well), while confirmed_flush is compiled depending on the
decoding context and record read, and is the LSN position returned back
to the caller.
- Never return 0/0 if a slot cannot be advanced. This way, if a slot is
advanced while the activity is idle, then the same position is returned
to the caller over and over without raising an error. Instead return
the LSN the slot has been advanced to. With repetitive calls, the same
position is returned hence caller can directly monitor the difference in
progress in bytes by doing simply LSN difference calculations, which
should be monotonic.
Note that as the slot is owned by the backend advancing it, then the
read of those fields is fine lock-less, while updates need to happen
while the slot mutex is held, so fix that on the way as well. Other
locks for in-memory data of replication slots have been already fixed
previously.
Some of those issues have been pointed out by Petr and Simon during the
patch, while I noticed some of them after looking at the code. This
also visibly takes of a recently-discovered bug causing assertion
failures which can be triggered by a two-step slot forwarding which
first advanced the slot to a WAL page boundary and secondly advanced it
to the latest position, say 'FF/FFFFFFF' to make sure that the newest
LSN is used as forward point. It would have been nice to drop a test
for that, but the set of operators working on pg_lsn limits it, so this
is left for a future exercise.
Author: Michael Paquier
Reviewed-by: Petr Jelinek, Simon Riggs
Discussion: https://postgr.es/m/CANP8+jLyS=X-CAk59BJnsxKQfjwrmKicHQykyn52Qj-Q=9GLCw@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/2840048a-1184-417a-9da8-3299d207a1d7%40postgrespro.ru
2018-06-11 02:26:13 +02:00
|
|
|
SpinLockAcquire(&MyReplicationSlot->mutex);
|
2018-01-17 12:38:34 +01:00
|
|
|
MyReplicationSlot->data.restart_lsn = moveto;
|
Fix a couple of bugs with replication slot advancing feature
A review of the code has showed up a couple of issues fixed by this
commit:
- Physical slots have been using the confirmed LSN position as a start
comparison point which is always 0/0, instead use the restart LSN
position (logical slots need to use the confirmed LSN position, which
was correct).
- The actual slot update was incorrect for both physical and logical
slots. Physical slots need to use their restart_lsn as base comparison
point (confirmed_flush was used because of previous point), and logical
slots need to begin reading WAL from restart_lsn (confirmed_flush was
used as well), while confirmed_flush is compiled depending on the
decoding context and record read, and is the LSN position returned back
to the caller.
- Never return 0/0 if a slot cannot be advanced. This way, if a slot is
advanced while the activity is idle, then the same position is returned
to the caller over and over without raising an error. Instead return
the LSN the slot has been advanced to. With repetitive calls, the same
position is returned hence caller can directly monitor the difference in
progress in bytes by doing simply LSN difference calculations, which
should be monotonic.
Note that as the slot is owned by the backend advancing it, then the
read of those fields is fine lock-less, while updates need to happen
while the slot mutex is held, so fix that on the way as well. Other
locks for in-memory data of replication slots have been already fixed
previously.
Some of those issues have been pointed out by Petr and Simon during the
patch, while I noticed some of them after looking at the code. This
also visibly takes of a recently-discovered bug causing assertion
failures which can be triggered by a two-step slot forwarding which
first advanced the slot to a WAL page boundary and secondly advanced it
to the latest position, say 'FF/FFFFFFF' to make sure that the newest
LSN is used as forward point. It would have been nice to drop a test
for that, but the set of operators working on pg_lsn limits it, so this
is left for a future exercise.
Author: Michael Paquier
Reviewed-by: Petr Jelinek, Simon Riggs
Discussion: https://postgr.es/m/CANP8+jLyS=X-CAk59BJnsxKQfjwrmKicHQykyn52Qj-Q=9GLCw@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/2840048a-1184-417a-9da8-3299d207a1d7%40postgrespro.ru
2018-06-11 02:26:13 +02:00
|
|
|
SpinLockRelease(&MyReplicationSlot->mutex);
|
2018-01-17 12:38:34 +01:00
|
|
|
retlsn = moveto;
|
2020-01-30 03:14:02 +01:00
|
|
|
|
|
|
|
/*
|
2020-05-14 19:06:38 +02:00
|
|
|
* Dirty the slot so as it is written out at the next checkpoint. Note
|
|
|
|
* that the LSN position advanced may still be lost in the event of a
|
|
|
|
* crash, but this makes the data consistent after a clean shutdown.
|
2020-01-30 03:14:02 +01:00
|
|
|
*/
|
|
|
|
ReplicationSlotMarkDirty();
|
2018-01-17 12:38:34 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return retlsn;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2018-07-19 20:15:44 +02:00
|
|
|
* Helper function for advancing our logical replication slot forward.
|
|
|
|
*
|
2019-12-26 14:26:09 +01:00
|
|
|
* The slot's restart_lsn is used as start point for reading records, while
|
|
|
|
* confirmed_flush is used as base point for the decoding context.
|
2018-07-19 20:15:44 +02:00
|
|
|
*
|
|
|
|
* We cannot just do LogicalConfirmReceivedLocation to update confirmed_flush,
|
|
|
|
* because we need to digest WAL to advance restart_lsn allowing to recycle
|
|
|
|
* WAL and removal of old catalog tuples. As decoding is done in fast_forward
|
|
|
|
* mode, no changes are generated anyway.
|
2018-01-17 12:38:34 +01:00
|
|
|
*/
|
|
|
|
static XLogRecPtr
|
Fix a couple of bugs with replication slot advancing feature
A review of the code has showed up a couple of issues fixed by this
commit:
- Physical slots have been using the confirmed LSN position as a start
comparison point which is always 0/0, instead use the restart LSN
position (logical slots need to use the confirmed LSN position, which
was correct).
- The actual slot update was incorrect for both physical and logical
slots. Physical slots need to use their restart_lsn as base comparison
point (confirmed_flush was used because of previous point), and logical
slots need to begin reading WAL from restart_lsn (confirmed_flush was
used as well), while confirmed_flush is compiled depending on the
decoding context and record read, and is the LSN position returned back
to the caller.
- Never return 0/0 if a slot cannot be advanced. This way, if a slot is
advanced while the activity is idle, then the same position is returned
to the caller over and over without raising an error. Instead return
the LSN the slot has been advanced to. With repetitive calls, the same
position is returned hence caller can directly monitor the difference in
progress in bytes by doing simply LSN difference calculations, which
should be monotonic.
Note that as the slot is owned by the backend advancing it, then the
read of those fields is fine lock-less, while updates need to happen
while the slot mutex is held, so fix that on the way as well. Other
locks for in-memory data of replication slots have been already fixed
previously.
Some of those issues have been pointed out by Petr and Simon during the
patch, while I noticed some of them after looking at the code. This
also visibly takes of a recently-discovered bug causing assertion
failures which can be triggered by a two-step slot forwarding which
first advanced the slot to a WAL page boundary and secondly advanced it
to the latest position, say 'FF/FFFFFFF' to make sure that the newest
LSN is used as forward point. It would have been nice to drop a test
for that, but the set of operators working on pg_lsn limits it, so this
is left for a future exercise.
Author: Michael Paquier
Reviewed-by: Petr Jelinek, Simon Riggs
Discussion: https://postgr.es/m/CANP8+jLyS=X-CAk59BJnsxKQfjwrmKicHQykyn52Qj-Q=9GLCw@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/2840048a-1184-417a-9da8-3299d207a1d7%40postgrespro.ru
2018-06-11 02:26:13 +02:00
|
|
|
pg_logical_replication_slot_advance(XLogRecPtr moveto)
|
2018-01-17 12:38:34 +01:00
|
|
|
{
|
|
|
|
LogicalDecodingContext *ctx;
|
2018-04-26 20:47:16 +02:00
|
|
|
ResourceOwner old_resowner = CurrentResourceOwner;
|
2018-07-19 20:15:44 +02:00
|
|
|
XLogRecPtr retlsn;
|
2018-01-17 12:38:34 +01:00
|
|
|
|
2020-04-08 00:35:00 +02:00
|
|
|
Assert(moveto != InvalidXLogRecPtr);
|
|
|
|
|
2018-01-17 12:38:34 +01:00
|
|
|
PG_TRY();
|
|
|
|
{
|
2018-07-19 20:15:44 +02:00
|
|
|
/*
|
|
|
|
* Create our decoding context in fast_forward mode, passing start_lsn
|
|
|
|
* as InvalidXLogRecPtr, so that we start processing from my slot's
|
|
|
|
* confirmed_flush.
|
|
|
|
*/
|
2018-01-17 12:38:34 +01:00
|
|
|
ctx = CreateDecodingContext(InvalidXLogRecPtr,
|
|
|
|
NIL,
|
2018-07-19 20:15:44 +02:00
|
|
|
true, /* fast_forward */
|
2020-05-08 21:30:34 +02:00
|
|
|
XL_ROUTINE(.page_read = read_local_xlog_page,
|
|
|
|
.segment_open = wal_segment_open,
|
|
|
|
.segment_close = wal_segment_close),
|
2018-01-17 12:38:34 +01:00
|
|
|
NULL, NULL, NULL);
|
|
|
|
|
2018-07-19 20:15:44 +02:00
|
|
|
/*
|
|
|
|
* Start reading at the slot's restart_lsn, which we know to point to
|
|
|
|
* a valid record.
|
|
|
|
*/
|
2020-01-26 10:39:00 +01:00
|
|
|
XLogBeginRead(ctx->reader, MyReplicationSlot->data.restart_lsn);
|
2018-07-19 20:15:44 +02:00
|
|
|
|
2018-01-17 12:38:34 +01:00
|
|
|
/* invalidate non-timetravel entries */
|
|
|
|
InvalidateSystemCaches();
|
|
|
|
|
2018-07-19 20:15:44 +02:00
|
|
|
/* Decode at least one record, until we run out of records */
|
2020-01-26 10:39:00 +01:00
|
|
|
while (ctx->reader->EndRecPtr < moveto)
|
2018-01-17 12:38:34 +01:00
|
|
|
{
|
|
|
|
char *errm = NULL;
|
2018-07-19 20:15:44 +02:00
|
|
|
XLogRecord *record;
|
2018-01-17 12:38:34 +01:00
|
|
|
|
2018-07-19 20:15:44 +02:00
|
|
|
/*
|
|
|
|
* Read records. No changes are generated in fast_forward mode,
|
|
|
|
* but snapbuilder/slot statuses are updated properly.
|
|
|
|
*/
|
2020-01-26 10:39:00 +01:00
|
|
|
record = XLogReadRecord(ctx->reader, &errm);
|
2018-01-17 12:38:34 +01:00
|
|
|
if (errm)
|
|
|
|
elog(ERROR, "%s", errm);
|
|
|
|
|
|
|
|
/*
|
2018-07-19 20:15:44 +02:00
|
|
|
* Process the record. Storage-level changes are ignored in
|
|
|
|
* fast_forward mode, but other modules (such as snapbuilder)
|
|
|
|
* might still have critical updates to do.
|
2018-01-17 12:38:34 +01:00
|
|
|
*/
|
2018-07-19 20:15:44 +02:00
|
|
|
if (record)
|
2018-01-17 12:38:34 +01:00
|
|
|
LogicalDecodingProcessRecord(ctx, ctx->reader);
|
|
|
|
|
2018-07-19 20:15:44 +02:00
|
|
|
/* Stop once the requested target has been reached */
|
2018-01-17 12:38:34 +01:00
|
|
|
if (moveto <= ctx->reader->EndRecPtr)
|
|
|
|
break;
|
|
|
|
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
}
|
|
|
|
|
Use a ResourceOwner to track buffer pins in all cases.
Historically, we've allowed auxiliary processes to take buffer pins without
tracking them in a ResourceOwner. However, that creates problems for error
recovery. In particular, we've seen multiple reports of assertion crashes
in the startup process when it gets an error while holding a buffer pin,
as for example if it gets ENOSPC during a write. In a non-assert build,
the process would simply exit without releasing the pin at all. We've
gotten away with that so far just because a failure exit of the startup
process translates to a database crash anyhow; but any similar behavior
in other aux processes could result in stuck pins and subsequent problems
in vacuum.
To improve this, institute a policy that we must *always* have a resowner
backing any attempt to pin a buffer, which we can enforce just by removing
the previous special-case code in resowner.c. Add infrastructure to make
it easy to create a process-lifespan AuxProcessResourceOwner and clear
out its contents at appropriate times. Replace existing ad-hoc resowner
management in bgwriter.c and other aux processes with that. (Thus, while
the startup process gains a resowner where it had none at all before, some
other aux process types are replacing an ad-hoc resowner with this code.)
Also use the AuxProcessResourceOwner to manage buffer pins taken during
StartupXLOG and ShutdownXLOG, even when those are being run in a bootstrap
process or a standalone backend rather than a true auxiliary process.
In passing, remove some other ad-hoc resource owner creations that had
gotten cargo-culted into various other places. As far as I can tell
that was all unnecessary, and if it had been necessary it was incomplete,
due to lacking any provision for clearing those resowners later.
(Also worth noting in this connection is that a process that hasn't called
InitBufferPoolBackend has no business accessing buffers; so there's more
to do than just add the resowner if we want to touch buffers in processes
not covered by this patch.)
Although this fixes a very old bug, no back-patch, because there's no
evidence of any significant problem in non-assert builds.
Patch by me, pursuant to a report from Justin Pryzby. Thanks to
Robert Haas and Kyotaro Horiguchi for reviews.
Discussion: https://postgr.es/m/20180627233939.GA10276@telsasoft.com
2018-07-18 18:15:16 +02:00
|
|
|
/*
|
|
|
|
* Logical decoding could have clobbered CurrentResourceOwner during
|
|
|
|
* transaction management, so restore the executor's value. (This is
|
|
|
|
* a kluge, but it's not worth cleaning up right now.)
|
|
|
|
*/
|
2018-01-17 12:38:34 +01:00
|
|
|
CurrentResourceOwner = old_resowner;
|
|
|
|
|
|
|
|
if (ctx->reader->EndRecPtr != InvalidXLogRecPtr)
|
|
|
|
{
|
|
|
|
LogicalConfirmReceivedLocation(moveto);
|
|
|
|
|
|
|
|
/*
|
2018-07-19 20:15:44 +02:00
|
|
|
* If only the confirmed_flush LSN has changed the slot won't get
|
2018-01-17 12:38:34 +01:00
|
|
|
* marked as dirty by the above. Callers on the walsender
|
|
|
|
* interface are expected to keep track of their own progress and
|
|
|
|
* don't need it written out. But SQL-interface users cannot
|
|
|
|
* specify their own start positions and it's harder for them to
|
|
|
|
* keep track of their progress, so we should make more of an
|
|
|
|
* effort to save it for them.
|
|
|
|
*
|
2020-05-14 19:06:38 +02:00
|
|
|
* Dirty the slot so it is written out at the next checkpoint. The
|
|
|
|
* LSN position advanced to may still be lost on a crash but this
|
|
|
|
* makes the data consistent after a clean shutdown.
|
2018-01-17 12:38:34 +01:00
|
|
|
*/
|
|
|
|
ReplicationSlotMarkDirty();
|
|
|
|
}
|
|
|
|
|
|
|
|
retlsn = MyReplicationSlot->data.confirmed_flush;
|
|
|
|
|
|
|
|
/* free context, call shutdown callback */
|
|
|
|
FreeDecodingContext(ctx);
|
|
|
|
|
|
|
|
InvalidateSystemCaches();
|
|
|
|
}
|
|
|
|
PG_CATCH();
|
|
|
|
{
|
|
|
|
/* clear all timetravel entries */
|
|
|
|
InvalidateSystemCaches();
|
|
|
|
|
|
|
|
PG_RE_THROW();
|
|
|
|
}
|
|
|
|
PG_END_TRY();
|
|
|
|
|
|
|
|
return retlsn;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SQL function for moving the position in a replication slot.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
pg_replication_slot_advance(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
Name slotname = PG_GETARG_NAME(0);
|
|
|
|
XLogRecPtr moveto = PG_GETARG_LSN(1);
|
|
|
|
XLogRecPtr endlsn;
|
Fix a couple of bugs with replication slot advancing feature
A review of the code has showed up a couple of issues fixed by this
commit:
- Physical slots have been using the confirmed LSN position as a start
comparison point which is always 0/0, instead use the restart LSN
position (logical slots need to use the confirmed LSN position, which
was correct).
- The actual slot update was incorrect for both physical and logical
slots. Physical slots need to use their restart_lsn as base comparison
point (confirmed_flush was used because of previous point), and logical
slots need to begin reading WAL from restart_lsn (confirmed_flush was
used as well), while confirmed_flush is compiled depending on the
decoding context and record read, and is the LSN position returned back
to the caller.
- Never return 0/0 if a slot cannot be advanced. This way, if a slot is
advanced while the activity is idle, then the same position is returned
to the caller over and over without raising an error. Instead return
the LSN the slot has been advanced to. With repetitive calls, the same
position is returned hence caller can directly monitor the difference in
progress in bytes by doing simply LSN difference calculations, which
should be monotonic.
Note that as the slot is owned by the backend advancing it, then the
read of those fields is fine lock-less, while updates need to happen
while the slot mutex is held, so fix that on the way as well. Other
locks for in-memory data of replication slots have been already fixed
previously.
Some of those issues have been pointed out by Petr and Simon during the
patch, while I noticed some of them after looking at the code. This
also visibly takes of a recently-discovered bug causing assertion
failures which can be triggered by a two-step slot forwarding which
first advanced the slot to a WAL page boundary and secondly advanced it
to the latest position, say 'FF/FFFFFFF' to make sure that the newest
LSN is used as forward point. It would have been nice to drop a test
for that, but the set of operators working on pg_lsn limits it, so this
is left for a future exercise.
Author: Michael Paquier
Reviewed-by: Petr Jelinek, Simon Riggs
Discussion: https://postgr.es/m/CANP8+jLyS=X-CAk59BJnsxKQfjwrmKicHQykyn52Qj-Q=9GLCw@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/2840048a-1184-417a-9da8-3299d207a1d7%40postgrespro.ru
2018-06-11 02:26:13 +02:00
|
|
|
XLogRecPtr minlsn;
|
2018-01-17 12:38:34 +01:00
|
|
|
TupleDesc tupdesc;
|
|
|
|
Datum values[2];
|
|
|
|
bool nulls[2];
|
|
|
|
HeapTuple tuple;
|
|
|
|
Datum result;
|
|
|
|
|
|
|
|
Assert(!MyReplicationSlot);
|
|
|
|
|
|
|
|
check_permissions();
|
|
|
|
|
|
|
|
if (XLogRecPtrIsInvalid(moveto))
|
|
|
|
ereport(ERROR,
|
2019-09-23 13:37:33 +02:00
|
|
|
(errmsg("invalid target WAL LSN")));
|
2018-01-17 12:38:34 +01:00
|
|
|
|
|
|
|
/* Build a tuple descriptor for our result type */
|
|
|
|
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
|
|
|
elog(ERROR, "return type must be a row type");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We can't move slot past what's been flushed/replayed so clamp the
|
2018-01-19 07:36:17 +01:00
|
|
|
* target position accordingly.
|
2018-01-17 12:38:34 +01:00
|
|
|
*/
|
|
|
|
if (!RecoveryInProgress())
|
|
|
|
moveto = Min(moveto, GetFlushRecPtr());
|
|
|
|
else
|
|
|
|
moveto = Min(moveto, GetXLogReplayRecPtr(&ThisTimeLineID));
|
|
|
|
|
|
|
|
/* Acquire the slot so we "own" it */
|
2020-04-08 00:35:00 +02:00
|
|
|
(void) ReplicationSlotAcquire(NameStr(*slotname), SAB_Error);
|
2018-01-17 12:38:34 +01:00
|
|
|
|
2018-07-11 01:56:24 +02:00
|
|
|
/* A slot whose restart_lsn has never been reserved cannot be advanced */
|
|
|
|
if (XLogRecPtrIsInvalid(MyReplicationSlot->data.restart_lsn))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
2020-04-29 02:39:04 +02:00
|
|
|
errmsg("replication slot \"%s\" cannot be advanced",
|
|
|
|
NameStr(*slotname)),
|
|
|
|
errdetail("This slot has never previously reserved WAL, or has been invalidated.")));
|
2018-07-11 01:56:24 +02:00
|
|
|
|
Fix a couple of bugs with replication slot advancing feature
A review of the code has showed up a couple of issues fixed by this
commit:
- Physical slots have been using the confirmed LSN position as a start
comparison point which is always 0/0, instead use the restart LSN
position (logical slots need to use the confirmed LSN position, which
was correct).
- The actual slot update was incorrect for both physical and logical
slots. Physical slots need to use their restart_lsn as base comparison
point (confirmed_flush was used because of previous point), and logical
slots need to begin reading WAL from restart_lsn (confirmed_flush was
used as well), while confirmed_flush is compiled depending on the
decoding context and record read, and is the LSN position returned back
to the caller.
- Never return 0/0 if a slot cannot be advanced. This way, if a slot is
advanced while the activity is idle, then the same position is returned
to the caller over and over without raising an error. Instead return
the LSN the slot has been advanced to. With repetitive calls, the same
position is returned hence caller can directly monitor the difference in
progress in bytes by doing simply LSN difference calculations, which
should be monotonic.
Note that as the slot is owned by the backend advancing it, then the
read of those fields is fine lock-less, while updates need to happen
while the slot mutex is held, so fix that on the way as well. Other
locks for in-memory data of replication slots have been already fixed
previously.
Some of those issues have been pointed out by Petr and Simon during the
patch, while I noticed some of them after looking at the code. This
also visibly takes of a recently-discovered bug causing assertion
failures which can be triggered by a two-step slot forwarding which
first advanced the slot to a WAL page boundary and secondly advanced it
to the latest position, say 'FF/FFFFFFF' to make sure that the newest
LSN is used as forward point. It would have been nice to drop a test
for that, but the set of operators working on pg_lsn limits it, so this
is left for a future exercise.
Author: Michael Paquier
Reviewed-by: Petr Jelinek, Simon Riggs
Discussion: https://postgr.es/m/CANP8+jLyS=X-CAk59BJnsxKQfjwrmKicHQykyn52Qj-Q=9GLCw@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/2840048a-1184-417a-9da8-3299d207a1d7%40postgrespro.ru
2018-06-11 02:26:13 +02:00
|
|
|
/*
|
|
|
|
* Check if the slot is not moving backwards. Physical slots rely simply
|
|
|
|
* on restart_lsn as a minimum point, while logical slots have confirmed
|
2019-12-26 14:26:09 +01:00
|
|
|
* consumption up to confirmed_flush, meaning that in both cases data
|
|
|
|
* older than that is not available anymore.
|
Fix a couple of bugs with replication slot advancing feature
A review of the code has showed up a couple of issues fixed by this
commit:
- Physical slots have been using the confirmed LSN position as a start
comparison point which is always 0/0, instead use the restart LSN
position (logical slots need to use the confirmed LSN position, which
was correct).
- The actual slot update was incorrect for both physical and logical
slots. Physical slots need to use their restart_lsn as base comparison
point (confirmed_flush was used because of previous point), and logical
slots need to begin reading WAL from restart_lsn (confirmed_flush was
used as well), while confirmed_flush is compiled depending on the
decoding context and record read, and is the LSN position returned back
to the caller.
- Never return 0/0 if a slot cannot be advanced. This way, if a slot is
advanced while the activity is idle, then the same position is returned
to the caller over and over without raising an error. Instead return
the LSN the slot has been advanced to. With repetitive calls, the same
position is returned hence caller can directly monitor the difference in
progress in bytes by doing simply LSN difference calculations, which
should be monotonic.
Note that as the slot is owned by the backend advancing it, then the
read of those fields is fine lock-less, while updates need to happen
while the slot mutex is held, so fix that on the way as well. Other
locks for in-memory data of replication slots have been already fixed
previously.
Some of those issues have been pointed out by Petr and Simon during the
patch, while I noticed some of them after looking at the code. This
also visibly takes of a recently-discovered bug causing assertion
failures which can be triggered by a two-step slot forwarding which
first advanced the slot to a WAL page boundary and secondly advanced it
to the latest position, say 'FF/FFFFFFF' to make sure that the newest
LSN is used as forward point. It would have been nice to drop a test
for that, but the set of operators working on pg_lsn limits it, so this
is left for a future exercise.
Author: Michael Paquier
Reviewed-by: Petr Jelinek, Simon Riggs
Discussion: https://postgr.es/m/CANP8+jLyS=X-CAk59BJnsxKQfjwrmKicHQykyn52Qj-Q=9GLCw@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/2840048a-1184-417a-9da8-3299d207a1d7%40postgrespro.ru
2018-06-11 02:26:13 +02:00
|
|
|
*/
|
|
|
|
if (OidIsValid(MyReplicationSlot->data.database))
|
|
|
|
minlsn = MyReplicationSlot->data.confirmed_flush;
|
|
|
|
else
|
|
|
|
minlsn = MyReplicationSlot->data.restart_lsn;
|
|
|
|
|
|
|
|
if (moveto < minlsn)
|
2018-01-17 12:38:34 +01:00
|
|
|
ereport(ERROR,
|
2018-07-11 01:56:24 +02:00
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
|
|
|
errmsg("cannot advance replication slot to %X/%X, minimum is %X/%X",
|
2021-02-23 10:14:38 +01:00
|
|
|
LSN_FORMAT_ARGS(moveto), LSN_FORMAT_ARGS(minlsn))));
|
2018-01-17 12:38:34 +01:00
|
|
|
|
Fix a couple of bugs with replication slot advancing feature
A review of the code has showed up a couple of issues fixed by this
commit:
- Physical slots have been using the confirmed LSN position as a start
comparison point which is always 0/0, instead use the restart LSN
position (logical slots need to use the confirmed LSN position, which
was correct).
- The actual slot update was incorrect for both physical and logical
slots. Physical slots need to use their restart_lsn as base comparison
point (confirmed_flush was used because of previous point), and logical
slots need to begin reading WAL from restart_lsn (confirmed_flush was
used as well), while confirmed_flush is compiled depending on the
decoding context and record read, and is the LSN position returned back
to the caller.
- Never return 0/0 if a slot cannot be advanced. This way, if a slot is
advanced while the activity is idle, then the same position is returned
to the caller over and over without raising an error. Instead return
the LSN the slot has been advanced to. With repetitive calls, the same
position is returned hence caller can directly monitor the difference in
progress in bytes by doing simply LSN difference calculations, which
should be monotonic.
Note that as the slot is owned by the backend advancing it, then the
read of those fields is fine lock-less, while updates need to happen
while the slot mutex is held, so fix that on the way as well. Other
locks for in-memory data of replication slots have been already fixed
previously.
Some of those issues have been pointed out by Petr and Simon during the
patch, while I noticed some of them after looking at the code. This
also visibly takes of a recently-discovered bug causing assertion
failures which can be triggered by a two-step slot forwarding which
first advanced the slot to a WAL page boundary and secondly advanced it
to the latest position, say 'FF/FFFFFFF' to make sure that the newest
LSN is used as forward point. It would have been nice to drop a test
for that, but the set of operators working on pg_lsn limits it, so this
is left for a future exercise.
Author: Michael Paquier
Reviewed-by: Petr Jelinek, Simon Riggs
Discussion: https://postgr.es/m/CANP8+jLyS=X-CAk59BJnsxKQfjwrmKicHQykyn52Qj-Q=9GLCw@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/2840048a-1184-417a-9da8-3299d207a1d7%40postgrespro.ru
2018-06-11 02:26:13 +02:00
|
|
|
/* Do the actual slot update, depending on the slot type */
|
2018-01-17 12:38:34 +01:00
|
|
|
if (OidIsValid(MyReplicationSlot->data.database))
|
Fix a couple of bugs with replication slot advancing feature
A review of the code has showed up a couple of issues fixed by this
commit:
- Physical slots have been using the confirmed LSN position as a start
comparison point which is always 0/0, instead use the restart LSN
position (logical slots need to use the confirmed LSN position, which
was correct).
- The actual slot update was incorrect for both physical and logical
slots. Physical slots need to use their restart_lsn as base comparison
point (confirmed_flush was used because of previous point), and logical
slots need to begin reading WAL from restart_lsn (confirmed_flush was
used as well), while confirmed_flush is compiled depending on the
decoding context and record read, and is the LSN position returned back
to the caller.
- Never return 0/0 if a slot cannot be advanced. This way, if a slot is
advanced while the activity is idle, then the same position is returned
to the caller over and over without raising an error. Instead return
the LSN the slot has been advanced to. With repetitive calls, the same
position is returned hence caller can directly monitor the difference in
progress in bytes by doing simply LSN difference calculations, which
should be monotonic.
Note that as the slot is owned by the backend advancing it, then the
read of those fields is fine lock-less, while updates need to happen
while the slot mutex is held, so fix that on the way as well. Other
locks for in-memory data of replication slots have been already fixed
previously.
Some of those issues have been pointed out by Petr and Simon during the
patch, while I noticed some of them after looking at the code. This
also visibly takes of a recently-discovered bug causing assertion
failures which can be triggered by a two-step slot forwarding which
first advanced the slot to a WAL page boundary and secondly advanced it
to the latest position, say 'FF/FFFFFFF' to make sure that the newest
LSN is used as forward point. It would have been nice to drop a test
for that, but the set of operators working on pg_lsn limits it, so this
is left for a future exercise.
Author: Michael Paquier
Reviewed-by: Petr Jelinek, Simon Riggs
Discussion: https://postgr.es/m/CANP8+jLyS=X-CAk59BJnsxKQfjwrmKicHQykyn52Qj-Q=9GLCw@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/2840048a-1184-417a-9da8-3299d207a1d7%40postgrespro.ru
2018-06-11 02:26:13 +02:00
|
|
|
endlsn = pg_logical_replication_slot_advance(moveto);
|
2018-01-17 12:38:34 +01:00
|
|
|
else
|
Fix a couple of bugs with replication slot advancing feature
A review of the code has showed up a couple of issues fixed by this
commit:
- Physical slots have been using the confirmed LSN position as a start
comparison point which is always 0/0, instead use the restart LSN
position (logical slots need to use the confirmed LSN position, which
was correct).
- The actual slot update was incorrect for both physical and logical
slots. Physical slots need to use their restart_lsn as base comparison
point (confirmed_flush was used because of previous point), and logical
slots need to begin reading WAL from restart_lsn (confirmed_flush was
used as well), while confirmed_flush is compiled depending on the
decoding context and record read, and is the LSN position returned back
to the caller.
- Never return 0/0 if a slot cannot be advanced. This way, if a slot is
advanced while the activity is idle, then the same position is returned
to the caller over and over without raising an error. Instead return
the LSN the slot has been advanced to. With repetitive calls, the same
position is returned hence caller can directly monitor the difference in
progress in bytes by doing simply LSN difference calculations, which
should be monotonic.
Note that as the slot is owned by the backend advancing it, then the
read of those fields is fine lock-less, while updates need to happen
while the slot mutex is held, so fix that on the way as well. Other
locks for in-memory data of replication slots have been already fixed
previously.
Some of those issues have been pointed out by Petr and Simon during the
patch, while I noticed some of them after looking at the code. This
also visibly takes of a recently-discovered bug causing assertion
failures which can be triggered by a two-step slot forwarding which
first advanced the slot to a WAL page boundary and secondly advanced it
to the latest position, say 'FF/FFFFFFF' to make sure that the newest
LSN is used as forward point. It would have been nice to drop a test
for that, but the set of operators working on pg_lsn limits it, so this
is left for a future exercise.
Author: Michael Paquier
Reviewed-by: Petr Jelinek, Simon Riggs
Discussion: https://postgr.es/m/CANP8+jLyS=X-CAk59BJnsxKQfjwrmKicHQykyn52Qj-Q=9GLCw@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/2840048a-1184-417a-9da8-3299d207a1d7%40postgrespro.ru
2018-06-11 02:26:13 +02:00
|
|
|
endlsn = pg_physical_replication_slot_advance(moveto);
|
2018-01-17 12:38:34 +01:00
|
|
|
|
|
|
|
values[0] = NameGetDatum(&MyReplicationSlot->data.name);
|
|
|
|
nulls[0] = false;
|
|
|
|
|
2020-06-18 09:34:59 +02:00
|
|
|
/*
|
|
|
|
* Recompute the minimum LSN and xmin across all slots to adjust with the
|
|
|
|
* advancing potentially done.
|
|
|
|
*/
|
|
|
|
ReplicationSlotsComputeRequiredXmin(false);
|
|
|
|
ReplicationSlotsComputeRequiredLSN();
|
|
|
|
|
2018-01-17 12:38:34 +01:00
|
|
|
ReplicationSlotRelease();
|
|
|
|
|
|
|
|
/* Return the reached position. */
|
|
|
|
values[1] = LSNGetDatum(endlsn);
|
|
|
|
nulls[1] = false;
|
|
|
|
|
|
|
|
tuple = heap_form_tuple(tupdesc, values, nulls);
|
|
|
|
result = HeapTupleGetDatum(tuple);
|
|
|
|
|
|
|
|
PG_RETURN_DATUM(result);
|
|
|
|
}
|
2019-04-05 19:52:45 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Helper function of copying a replication slot.
|
|
|
|
*/
|
|
|
|
static Datum
|
|
|
|
copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot)
|
|
|
|
{
|
|
|
|
Name src_name = PG_GETARG_NAME(0);
|
|
|
|
Name dst_name = PG_GETARG_NAME(1);
|
|
|
|
ReplicationSlot *src = NULL;
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
ReplicationSlot first_slot_contents;
|
|
|
|
ReplicationSlot second_slot_contents;
|
2019-04-05 19:52:45 +02:00
|
|
|
XLogRecPtr src_restart_lsn;
|
|
|
|
bool src_islogical;
|
|
|
|
bool temporary;
|
|
|
|
char *plugin;
|
|
|
|
Datum values[2];
|
|
|
|
bool nulls[2];
|
|
|
|
Datum result;
|
|
|
|
TupleDesc tupdesc;
|
|
|
|
HeapTuple tuple;
|
|
|
|
|
|
|
|
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
|
|
|
|
elog(ERROR, "return type must be a row type");
|
|
|
|
|
|
|
|
check_permissions();
|
|
|
|
|
|
|
|
if (logical_slot)
|
|
|
|
CheckLogicalDecodingRequirements();
|
|
|
|
else
|
|
|
|
CheckSlotRequirements();
|
|
|
|
|
|
|
|
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need to prevent the source slot's reserved WAL from being removed,
|
|
|
|
* but we don't want to lock that slot for very long, and it can advance
|
|
|
|
* in the meantime. So obtain the source slot's data, and create a new
|
|
|
|
* slot using its restart_lsn. Afterwards we lock the source slot again
|
|
|
|
* and verify that the data we copied (name, type) has not changed
|
|
|
|
* incompatibly. No inconvenient WAL removal can occur once the new slot
|
|
|
|
* is created -- but since WAL removal could have occurred before we
|
|
|
|
* managed to create the new slot, we advance the new slot's restart_lsn
|
|
|
|
* to the source slot's updated restart_lsn the second time we lock it.
|
|
|
|
*/
|
|
|
|
for (int i = 0; i < max_replication_slots; i++)
|
|
|
|
{
|
|
|
|
ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
|
|
|
|
|
|
|
|
if (s->in_use && strcmp(NameStr(s->data.name), NameStr(*src_name)) == 0)
|
|
|
|
{
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
/* Copy the slot contents while holding spinlock */
|
2019-04-05 19:52:45 +02:00
|
|
|
SpinLockAcquire(&s->mutex);
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
first_slot_contents = *s;
|
2019-04-05 19:52:45 +02:00
|
|
|
SpinLockRelease(&s->mutex);
|
|
|
|
src = s;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
LWLockRelease(ReplicationSlotControlLock);
|
|
|
|
|
|
|
|
if (src == NULL)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_OBJECT),
|
|
|
|
errmsg("replication slot \"%s\" does not exist", NameStr(*src_name))));
|
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
src_islogical = SlotIsLogical(&first_slot_contents);
|
|
|
|
src_restart_lsn = first_slot_contents.data.restart_lsn;
|
|
|
|
temporary = (first_slot_contents.data.persistency == RS_TEMPORARY);
|
|
|
|
plugin = logical_slot ? NameStr(first_slot_contents.data.plugin) : NULL;
|
|
|
|
|
2019-04-05 19:52:45 +02:00
|
|
|
/* Check type of replication slot */
|
|
|
|
if (src_islogical != logical_slot)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
src_islogical ?
|
|
|
|
errmsg("cannot copy physical replication slot \"%s\" as a logical replication slot",
|
|
|
|
NameStr(*src_name)) :
|
|
|
|
errmsg("cannot copy logical replication slot \"%s\" as a physical replication slot",
|
|
|
|
NameStr(*src_name))));
|
|
|
|
|
|
|
|
/* Copying non-reserved slot doesn't make sense */
|
|
|
|
if (XLogRecPtrIsInvalid(src_restart_lsn))
|
|
|
|
ereport(ERROR,
|
2020-06-25 04:13:13 +02:00
|
|
|
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
2020-01-30 17:32:04 +01:00
|
|
|
errmsg("cannot copy a replication slot that doesn't reserve WAL")));
|
2019-04-05 19:52:45 +02:00
|
|
|
|
|
|
|
/* Overwrite params from optional arguments */
|
|
|
|
if (PG_NARGS() >= 3)
|
|
|
|
temporary = PG_GETARG_BOOL(2);
|
|
|
|
if (PG_NARGS() >= 4)
|
|
|
|
{
|
|
|
|
Assert(logical_slot);
|
|
|
|
plugin = NameStr(*(PG_GETARG_NAME(3)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create new slot and acquire it */
|
|
|
|
if (logical_slot)
|
2020-03-17 20:13:18 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We must not try to read WAL, since we haven't reserved it yet --
|
|
|
|
* hence pass find_startpoint false. confirmed_flush will be set
|
|
|
|
* below, by copying from the source slot.
|
|
|
|
*/
|
2019-04-05 19:52:45 +02:00
|
|
|
create_logical_replication_slot(NameStr(*dst_name),
|
|
|
|
plugin,
|
|
|
|
temporary,
|
2020-03-17 20:13:18 +01:00
|
|
|
src_restart_lsn,
|
|
|
|
false);
|
|
|
|
}
|
2019-04-05 19:52:45 +02:00
|
|
|
else
|
|
|
|
create_physical_replication_slot(NameStr(*dst_name),
|
|
|
|
true,
|
|
|
|
temporary,
|
|
|
|
src_restart_lsn);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update the destination slot to current values of the source slot;
|
|
|
|
* recheck that the source slot is still the one we saw previously.
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
TransactionId copy_effective_xmin;
|
|
|
|
TransactionId copy_effective_catalog_xmin;
|
|
|
|
TransactionId copy_xmin;
|
|
|
|
TransactionId copy_catalog_xmin;
|
|
|
|
XLogRecPtr copy_restart_lsn;
|
2020-03-17 20:13:18 +01:00
|
|
|
XLogRecPtr copy_confirmed_flush;
|
2019-04-05 19:52:45 +02:00
|
|
|
bool copy_islogical;
|
|
|
|
char *copy_name;
|
|
|
|
|
|
|
|
/* Copy data of source slot again */
|
|
|
|
SpinLockAcquire(&src->mutex);
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
second_slot_contents = *src;
|
|
|
|
SpinLockRelease(&src->mutex);
|
2019-04-05 19:52:45 +02:00
|
|
|
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
copy_effective_xmin = second_slot_contents.effective_xmin;
|
|
|
|
copy_effective_catalog_xmin = second_slot_contents.effective_catalog_xmin;
|
|
|
|
|
|
|
|
copy_xmin = second_slot_contents.data.xmin;
|
|
|
|
copy_catalog_xmin = second_slot_contents.data.catalog_xmin;
|
|
|
|
copy_restart_lsn = second_slot_contents.data.restart_lsn;
|
|
|
|
copy_confirmed_flush = second_slot_contents.data.confirmed_flush;
|
2019-04-05 19:52:45 +02:00
|
|
|
|
|
|
|
/* for existence check */
|
Don't call palloc() while holding a spinlock, either.
Fix some more violations of the "only straight-line code inside a
spinlock" rule. These are hazardous not only because they risk
holding the lock for an excessively long time, but because it's
possible for palloc to throw elog(ERROR), leaving a stuck spinlock
behind.
copy_replication_slot() had two separate places that did pallocs
while holding a spinlock. We can make the code simpler and safer
by copying the whole ReplicationSlot struct into a local variable
while holding the spinlock, and then referencing that copy.
(While that's arguably more cycles than we really need to spend
holding the lock, the struct isn't all that big, and this way seems
far more maintainable than copying fields piecemeal. Anyway this
is surely much cheaper than a palloc.) That bug goes back to v12.
InvalidateObsoleteReplicationSlots() not only did a palloc while
holding a spinlock, but for extra sloppiness then leaked the memory
--- probably for the lifetime of the checkpointer process, though
I didn't try to verify that. Fortunately that silliness is new
in HEAD.
pg_get_replication_slots() had a cosmetic violation of the rule,
in that it only assumed it's safe to call namecpy() while holding
a spinlock. Still, that's a hazard waiting to bite somebody, and
there were some other cosmetic coding-rule violations in the same
function, so clean it up. I back-patched this as far as v10; the
code exists before that but it looks different, and this didn't
seem important enough to adapt the patch further back.
Discussion: https://postgr.es/m/20200602.161518.1399689010416646074.horikyota.ntt@gmail.com
2020-06-03 18:36:00 +02:00
|
|
|
copy_name = NameStr(second_slot_contents.data.name);
|
|
|
|
copy_islogical = SlotIsLogical(&second_slot_contents);
|
2019-04-05 19:52:45 +02:00
|
|
|
|
|
|
|
/*
|
2019-05-22 18:55:34 +02:00
|
|
|
* Check if the source slot still exists and is valid. We regard it as
|
|
|
|
* invalid if the type of replication slot or name has been changed,
|
|
|
|
* or the restart_lsn either is invalid or has gone backward. (The
|
|
|
|
* restart_lsn could go backwards if the source slot is dropped and
|
|
|
|
* copied from an older slot during installation.)
|
2019-04-05 19:52:45 +02:00
|
|
|
*
|
|
|
|
* Since erroring out will release and drop the destination slot we
|
|
|
|
* don't need to release it here.
|
|
|
|
*/
|
|
|
|
if (copy_restart_lsn < src_restart_lsn ||
|
|
|
|
src_islogical != copy_islogical ||
|
|
|
|
strcmp(copy_name, NameStr(*src_name)) != 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errmsg("could not copy replication slot \"%s\"",
|
|
|
|
NameStr(*src_name)),
|
|
|
|
errdetail("The source replication slot was modified incompatibly during the copy operation.")));
|
|
|
|
|
2020-03-17 20:13:18 +01:00
|
|
|
/* The source slot must have a consistent snapshot */
|
|
|
|
if (src_islogical && XLogRecPtrIsInvalid(copy_confirmed_flush))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("cannot copy unfinished logical replication slot \"%s\"",
|
|
|
|
NameStr(*src_name)),
|
|
|
|
errhint("Retry when the source replication slot's confirmed_flush_lsn is valid.")));
|
|
|
|
|
2019-04-05 19:52:45 +02:00
|
|
|
/* Install copied values again */
|
|
|
|
SpinLockAcquire(&MyReplicationSlot->mutex);
|
|
|
|
MyReplicationSlot->effective_xmin = copy_effective_xmin;
|
|
|
|
MyReplicationSlot->effective_catalog_xmin = copy_effective_catalog_xmin;
|
|
|
|
|
|
|
|
MyReplicationSlot->data.xmin = copy_xmin;
|
|
|
|
MyReplicationSlot->data.catalog_xmin = copy_catalog_xmin;
|
|
|
|
MyReplicationSlot->data.restart_lsn = copy_restart_lsn;
|
2020-03-17 20:13:18 +01:00
|
|
|
MyReplicationSlot->data.confirmed_flush = copy_confirmed_flush;
|
2019-04-05 19:52:45 +02:00
|
|
|
SpinLockRelease(&MyReplicationSlot->mutex);
|
|
|
|
|
|
|
|
ReplicationSlotMarkDirty();
|
|
|
|
ReplicationSlotsComputeRequiredXmin(false);
|
|
|
|
ReplicationSlotsComputeRequiredLSN();
|
|
|
|
ReplicationSlotSave();
|
|
|
|
|
|
|
|
#ifdef USE_ASSERT_CHECKING
|
|
|
|
/* Check that the restart_lsn is available */
|
|
|
|
{
|
|
|
|
XLogSegNo segno;
|
|
|
|
|
|
|
|
XLByteToSeg(copy_restart_lsn, segno, wal_segment_size);
|
|
|
|
Assert(XLogGetLastRemovedSegno() < segno);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/* target slot fully created, mark as persistent if needed */
|
|
|
|
if (logical_slot && !temporary)
|
|
|
|
ReplicationSlotPersist();
|
|
|
|
|
|
|
|
/* All done. Set up the return values */
|
|
|
|
values[0] = NameGetDatum(dst_name);
|
|
|
|
nulls[0] = false;
|
|
|
|
if (!XLogRecPtrIsInvalid(MyReplicationSlot->data.confirmed_flush))
|
|
|
|
{
|
|
|
|
values[1] = LSNGetDatum(MyReplicationSlot->data.confirmed_flush);
|
|
|
|
nulls[1] = false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
nulls[1] = true;
|
|
|
|
|
|
|
|
tuple = heap_form_tuple(tupdesc, values, nulls);
|
|
|
|
result = HeapTupleGetDatum(tuple);
|
|
|
|
|
|
|
|
ReplicationSlotRelease();
|
|
|
|
|
|
|
|
PG_RETURN_DATUM(result);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The wrappers below are all to appease opr_sanity */
|
|
|
|
Datum
|
|
|
|
pg_copy_logical_replication_slot_a(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return copy_replication_slot(fcinfo, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
pg_copy_logical_replication_slot_b(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return copy_replication_slot(fcinfo, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
pg_copy_logical_replication_slot_c(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return copy_replication_slot(fcinfo, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
pg_copy_physical_replication_slot_a(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return copy_replication_slot(fcinfo, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
Datum
|
|
|
|
pg_copy_physical_replication_slot_b(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return copy_replication_slot(fcinfo, false);
|
|
|
|
}
|