Fix bogus tuple-slot management in logical replication UPDATE handling.

slot_modify_cstrings seriously abused the TupleTableSlot API by relying
on a slot's underlying data to stay valid across ExecClearTuple.  Since
this abuse was also quite undocumented, it's little surprise that the
case got broken during the v12 slot rewrites.  As reported in bug #16129
from Ondřej Jirman, this could lead to crashes or data corruption when
a logical replication subscriber processes a row update.  Problems would
only arise if the subscriber's table contained columns of pass-by-ref
types that were not being copied from the publisher.

Fix by explicitly copying the datum/isnull arrays from the source slot
that the old row was in already.  This ends up being about the same
thing that happened pre-v12, but hopefully in a less opaque and
fragile way.

We might've caught the problem sooner if there were any test cases
dealing with updates involving non-replicated or dropped columns.
Now there are.

Back-patch to v10 where this code came in.  Even though the failure
does not manifest before v12, IMO this code is too fragile to leave
as-is.  In any case we certainly want the additional test coverage.

Patch by me; thanks to Tomas Vondra for initial investigation.

Discussion: https://postgr.es/m/16129-a0c0f48e71741e5f@postgresql.org
This commit is contained in:
Tom Lane 2019-11-22 11:31:19 -05:00
parent 8959a5e0fa
commit 4d9ceb0018
2 changed files with 70 additions and 17 deletions

View File

@ -363,13 +363,19 @@ slot_store_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
}
/*
* Modify slot with user data provided as C strings.
* Replace selected columns with user data provided as C strings.
* This is somewhat similar to heap_modify_tuple but also calls the type
* input function on the user data as the input is the text representation
* of the types.
* input functions on the user data.
* "slot" is filled with a copy of the tuple in "srcslot", with
* columns selected by the "replaces" array replaced with data values
* from "values".
* Caution: unreplaced pass-by-ref columns in "slot" will point into the
* storage for "srcslot". This is OK for current usage, but someday we may
* need to materialize "slot" at the end to make it independent of "srcslot".
*/
static void
slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
slot_modify_cstrings(TupleTableSlot *slot, TupleTableSlot *srcslot,
LogicalRepRelMapEntry *rel,
char **values, bool *replaces)
{
int natts = slot->tts_tupleDescriptor->natts;
@ -377,10 +383,19 @@ slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
SlotErrCallbackArg errarg;
ErrorContextCallback errcallback;
slot_getallattrs(slot);
/* We'll fill "slot" with a virtual tuple, so we must start with ... */
ExecClearTuple(slot);
/* Push callback + info on the error context stack */
/*
* Copy all the column data from srcslot, so that we'll have valid values
* for unreplaced columns.
*/
Assert(natts == srcslot->tts_tupleDescriptor->natts);
slot_getallattrs(srcslot);
memcpy(slot->tts_values, srcslot->tts_values, natts * sizeof(Datum));
memcpy(slot->tts_isnull, srcslot->tts_isnull, natts * sizeof(bool));
/* For error reporting, push callback + info on the error context stack */
errarg.rel = rel;
errarg.local_attnum = -1;
errarg.remote_attnum = -1;
@ -428,6 +443,7 @@ slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
/* Pop the error context stack */
error_context_stack = errcallback.previous;
/* And finally, declare that "slot" contains a valid virtual tuple */
ExecStoreVirtualTuple(slot);
}
@ -740,8 +756,8 @@ apply_handle_update(StringInfo s)
{
/* Process and store remote tuple in the slot */
oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
ExecCopySlot(remoteslot, localslot);
slot_modify_cstrings(remoteslot, rel, newtup.values, newtup.changed);
slot_modify_cstrings(remoteslot, localslot, rel,
newtup.values, newtup.changed);
MemoryContextSwitchTo(oldctx);
EvalPlanQualSetSlot(&epqstate, remoteslot);

View File

@ -3,7 +3,7 @@ use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 17;
use Test::More tests => 20;
# Initialize publisher node
my $node_publisher = get_new_node('publisher');
@ -28,9 +28,9 @@ $node_publisher->safe_psql('postgres',
$node_publisher->safe_psql('postgres',
"CREATE TABLE tab_rep (a int primary key)");
$node_publisher->safe_psql('postgres',
"CREATE TABLE tab_mixed (a int primary key, b text)");
"CREATE TABLE tab_mixed (a int primary key, b text, c numeric)");
$node_publisher->safe_psql('postgres',
"INSERT INTO tab_mixed (a, b) VALUES (1, 'foo')");
"INSERT INTO tab_mixed (a, b, c) VALUES (1, 'foo', 1.1)");
$node_publisher->safe_psql('postgres',
"CREATE TABLE tab_include (a int, b text, CONSTRAINT covering PRIMARY KEY(a) INCLUDE(b))"
);
@ -45,7 +45,8 @@ $node_subscriber->safe_psql('postgres',
# different column count and order than on publisher
$node_subscriber->safe_psql('postgres',
"CREATE TABLE tab_mixed (c text, b text, a int primary key)");
"CREATE TABLE tab_mixed (d text default 'local', c numeric, b text, a int primary key)"
);
# replication of the table with included index
$node_subscriber->safe_psql('postgres',
@ -94,7 +95,7 @@ $node_publisher->safe_psql('postgres', "DELETE FROM tab_rep WHERE a > 20");
$node_publisher->safe_psql('postgres', "UPDATE tab_rep SET a = -a");
$node_publisher->safe_psql('postgres',
"INSERT INTO tab_mixed VALUES (2, 'bar')");
"INSERT INTO tab_mixed VALUES (2, 'bar', 2.2)");
$node_publisher->safe_psql('postgres',
"INSERT INTO tab_include SELECT generate_series(1,50)");
@ -112,10 +113,9 @@ $result = $node_subscriber->safe_psql('postgres',
"SELECT count(*), min(a), max(a) FROM tab_rep");
is($result, qq(20|-20|-1), 'check replicated changes on subscriber');
$result =
$node_subscriber->safe_psql('postgres', "SELECT c, b, a FROM tab_mixed");
is( $result, qq(|foo|1
|bar|2), 'check replicated changes with different column order');
$result = $node_subscriber->safe_psql('postgres', "SELECT * FROM tab_mixed");
is( $result, qq(local|1.1|foo|1
local|2.2|bar|2), 'check replicated changes with different column order');
$result = $node_subscriber->safe_psql('postgres',
"SELECT count(*), min(a), max(a) FROM tab_include");
@ -139,11 +139,14 @@ $node_publisher->safe_psql('postgres',
"ALTER TABLE tab_ins REPLICA IDENTITY FULL");
$node_subscriber->safe_psql('postgres',
"ALTER TABLE tab_ins REPLICA IDENTITY FULL");
# tab_mixed can use DEFAULT, since it has a primary key
# and do the updates
$node_publisher->safe_psql('postgres', "UPDATE tab_full SET a = a * a");
$node_publisher->safe_psql('postgres',
"UPDATE tab_full2 SET x = 'bb' WHERE x = 'b'");
$node_publisher->safe_psql('postgres',
"UPDATE tab_mixed SET b = 'baz' WHERE a = 1");
$node_publisher->wait_for_catchup('tap_sub');
@ -159,6 +162,40 @@ bb
bb),
'update works with REPLICA IDENTITY FULL and text datums');
$result = $node_subscriber->safe_psql('postgres',
"SELECT * FROM tab_mixed ORDER BY a");
is( $result, qq(local|1.1|baz|1
local|2.2|bar|2),
'update works with different column order and subscriber local values');
# check behavior with dropped columns
$node_publisher->safe_psql('postgres', "ALTER TABLE tab_mixed DROP COLUMN b");
$node_publisher->safe_psql('postgres',
"UPDATE tab_mixed SET c = 11.11 WHERE a = 1");
$node_publisher->wait_for_catchup('tap_sub');
$result = $node_subscriber->safe_psql('postgres',
"SELECT * FROM tab_mixed ORDER BY a");
is( $result, qq(local|11.11|baz|1
local|2.2|bar|2),
'update works with dropped publisher column');
$node_subscriber->safe_psql('postgres',
"ALTER TABLE tab_mixed DROP COLUMN d");
$node_publisher->safe_psql('postgres',
"UPDATE tab_mixed SET c = 22.22 WHERE a = 2");
$node_publisher->wait_for_catchup('tap_sub');
$result = $node_subscriber->safe_psql('postgres',
"SELECT * FROM tab_mixed ORDER BY a");
is( $result, qq(11.11|baz|1
22.22|bar|2),
'update works with dropped subscriber column');
# check that change of connection string and/or publication list causes
# restart of subscription workers. Not all of these are registered as tests
# as we need to poll for a change but the test suite will fail none the less