Reorder subskiplsn in pg_subscription to avoid alignment issues.

The column 'subskiplsn' uses TYPALIGN_DOUBLE (which has 4 bytes alignment
on AIX) for storage. But the C Struct (Form_pg_subscription) has 8-byte
alignment for this field, so retrieving it from storage causes an
unaligned read.

To fix this, we rearranged the 'subskiplsn' column in the catalog so that
it naturally comes at an 8-byte boundary.

We have fixed a similar problem in commit f3b421da5f. This patch adds a
test to avoid a similar mistake in the future.

Reported-by: Noah Misch
Diagnosed-by: Noah Misch, Masahiko Sawada, Amit Kapila
Author: Masahiko Sawada
Reviewed-by: Noah Misch, Amit Kapila
Discussion: https://postgr.es/m/20220401074423.GC3682158@rfd.leadboat.com
	    https://postgr.es/m/CAD21AoDeScrsHhLyEPYqN3sydg6PxAPVBboK=30xJfUVihNZDA@mail.gmail.com
This commit is contained in:
Amit Kapila 2022-04-07 09:39:25 +05:30
parent e41aed674f
commit 79b716cfb7
11 changed files with 130 additions and 20 deletions

View File

@ -7823,6 +7823,16 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>subskiplsn</structfield> <type>pg_lsn</type>
</para>
<para>
Finish LSN of the transaction whose changes are to be skipped, if a valid
LSN; otherwise <literal>0/0</literal>.
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>subname</structfield> <type>name</type>
@ -7893,16 +7903,6 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>subskiplsn</structfield> <type>pg_lsn</type>
</para>
<para>
Finish LSN of the transaction whose changes are to be skipped, if a valid
LSN; otherwise <literal>0/0</literal>.
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>subconninfo</structfield> <type>text</type>

View File

@ -63,6 +63,7 @@ GetSubscription(Oid subid, bool missing_ok)
sub = (Subscription *) palloc(sizeof(Subscription));
sub->oid = subid;
sub->dbid = subform->subdbid;
sub->skiplsn = subform->subskiplsn;
sub->name = pstrdup(NameStr(subform->subname));
sub->owner = subform->subowner;
sub->enabled = subform->subenabled;
@ -70,7 +71,6 @@ GetSubscription(Oid subid, bool missing_ok)
sub->stream = subform->substream;
sub->twophasestate = subform->subtwophasestate;
sub->disableonerr = subform->subdisableonerr;
sub->skiplsn = subform->subskiplsn;
/* Get conninfo */
datum = SysCacheGetAttr(SUBSCRIPTIONOID,

View File

@ -1285,8 +1285,8 @@ REVOKE ALL ON pg_replication_origin_status FROM public;
-- All columns of pg_subscription except subconninfo are publicly readable.
REVOKE ALL ON pg_subscription FROM public;
GRANT SELECT (oid, subdbid, subname, subowner, subenabled, subbinary,
substream, subtwophasestate, subdisableonerr, subskiplsn, subslotname,
GRANT SELECT (oid, subdbid, subskiplsn, subname, subowner, subenabled,
subbinary, substream, subtwophasestate, subdisableonerr, subslotname,
subsynccommit, subpublications)
ON pg_subscription TO public;

View File

@ -596,6 +596,7 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt,
Anum_pg_subscription_oid);
values[Anum_pg_subscription_oid - 1] = ObjectIdGetDatum(subid);
values[Anum_pg_subscription_subdbid - 1] = ObjectIdGetDatum(MyDatabaseId);
values[Anum_pg_subscription_subskiplsn - 1] = LSNGetDatum(InvalidXLogRecPtr);
values[Anum_pg_subscription_subname - 1] =
DirectFunctionCall1(namein, CStringGetDatum(stmt->subname));
values[Anum_pg_subscription_subowner - 1] = ObjectIdGetDatum(owner);
@ -607,7 +608,6 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt,
LOGICALREP_TWOPHASE_STATE_PENDING :
LOGICALREP_TWOPHASE_STATE_DISABLED);
values[Anum_pg_subscription_subdisableonerr - 1] = BoolGetDatum(opts.disableonerr);
values[Anum_pg_subscription_subskiplsn - 1] = LSNGetDatum(InvalidXLogRecPtr);
values[Anum_pg_subscription_subconninfo - 1] =
CStringGetTextDatum(conninfo);
if (opts.slot_name)

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202204062
#define CATALOG_VERSION_NO 202204071
#endif

View File

@ -54,6 +54,10 @@ CATALOG(pg_subscription,6100,SubscriptionRelationId) BKI_SHARED_RELATION BKI_ROW
Oid subdbid BKI_LOOKUP(pg_database); /* Database the
* subscription is in. */
XLogRecPtr subskiplsn; /* All changes finished at this LSN are
* skipped */
NameData subname; /* Name of the subscription */
Oid subowner BKI_LOOKUP(pg_authid); /* Owner of the subscription */
@ -71,9 +75,6 @@ CATALOG(pg_subscription,6100,SubscriptionRelationId) BKI_SHARED_RELATION BKI_ROW
bool subdisableonerr; /* True if a worker error should cause the
* subscription to be disabled */
XLogRecPtr subskiplsn; /* All changes finished at this LSN are
* skipped */
#ifdef CATALOG_VARLEN /* variable-length fields start here */
/* Connection string to the publisher */
text subconninfo BKI_FORCE_NOT_NULL;
@ -103,6 +104,8 @@ typedef struct Subscription
Oid oid; /* Oid of the subscription */
Oid dbid; /* Oid of the database which subscription is
* in */
XLogRecPtr skiplsn; /* All changes finished at this LSN are
* skipped */
char *name; /* Name of the subscription */
Oid owner; /* Oid of the subscription owner */
bool enabled; /* Indicates if the subscription is enabled */
@ -113,8 +116,6 @@ typedef struct Subscription
bool disableonerr; /* Indicates if the subscription should be
* automatically disabled if a worker error
* occurs */
XLogRecPtr skiplsn; /* All changes finished at this LSN are
* skipped */
char *conninfo; /* Connection string to the publisher */
char *slotname; /* Name of the replication slot */
char *synccommit; /* Synchronous commit setting for worker */

View File

@ -25,3 +25,32 @@ SELECT relname, relkind
---------+---------
(0 rows)
--
-- When ALIGNOF_DOUBLE==4 (e.g. AIX), the C ABI may impose 8-byte alignment on
-- some of the C types that correspond to TYPALIGN_DOUBLE SQL types. To ensure
-- catalog C struct layout matches catalog tuple layout, arrange for the tuple
-- offset of each fixed-width, attalign='d' catalog column to be divisible by 8
-- unconditionally. Keep such columns before the first NameData column of the
-- catalog, since packagers can override NAMEDATALEN to an odd number.
--
WITH check_columns AS (
SELECT relname, attname,
array(
SELECT t.oid
FROM pg_type t JOIN pg_attribute pa ON t.oid = pa.atttypid
WHERE pa.attrelid = a.attrelid AND
pa.attnum > 0 AND pa.attnum <= a.attnum
ORDER BY pa.attnum) AS coltypes
FROM pg_attribute a JOIN pg_class c ON c.oid = attrelid
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE attalign = 'd' AND relkind = 'r' AND
attnotnull AND attlen <> -1 AND n.nspname = 'pg_catalog'
)
SELECT relname, attname, coltypes, get_column_offset(coltypes)
FROM check_columns
WHERE get_column_offset(coltypes) % 8 != 0 OR
'name'::regtype::oid = ANY(coltypes);
relname | attname | coltypes | get_column_offset
---------+---------+----------+-------------------
(0 rows)

View File

@ -206,6 +206,10 @@ CREATE FUNCTION ttdummy ()
RETURNS trigger
AS :'regresslib'
LANGUAGE C;
CREATE FUNCTION get_column_offset (oid[])
RETURNS int
AS :'regresslib'
LANGUAGE C STRICT STABLE PARALLEL SAFE;
-- Use hand-rolled hash functions and operator classes to get predictable
-- result on different machines. The hash function for int4 simply returns
-- the sum of the values passed to it and the one for text returns the length

View File

@ -41,6 +41,7 @@
#include "storage/spin.h"
#include "utils/builtins.h"
#include "utils/geo_decls.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/typcache.h"
@ -1216,3 +1217,47 @@ binary_coercible(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(IsBinaryCoercible(srctype, targettype));
}
/*
* Return the column offset of the last data in the given array of
* data types. The input data types must be fixed-length data types.
*/
PG_FUNCTION_INFO_V1(get_column_offset);
Datum
get_column_offset(PG_FUNCTION_ARGS)
{
ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
Oid *type_oids;
int ntypes;
int column_offset = 0;
if (ARR_HASNULL(ta) && array_contains_nulls(ta))
elog(ERROR, "argument must not contain nulls");
if (ARR_NDIM(ta) > 1)
elog(ERROR, "argument must be empty or one-dimensional array");
type_oids = (Oid *) ARR_DATA_PTR(ta);
ntypes = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
for (int i = 0; i < ntypes; i++)
{
Oid typeoid = type_oids[i];
int16 typlen;
bool typbyval;
char typalign;
get_typlenbyvalalign(typeoid, &typlen, &typbyval, &typalign);
/* the data type must be fixed-length */
if (!(typbyval || (typlen > 0)))
elog(ERROR, "type %u is not fixed-length data type", typeoid);
column_offset = att_align_nominal(column_offset, typalign);
/* not include the last type size */
if (i != (ntypes - 1))
column_offset += typlen;
}
PG_RETURN_INT32(column_offset);
}

View File

@ -19,3 +19,29 @@ SELECT relname, relkind
FROM pg_class
WHERE relkind IN ('v', 'c', 'f', 'p', 'I')
AND relfilenode <> 0;
--
-- When ALIGNOF_DOUBLE==4 (e.g. AIX), the C ABI may impose 8-byte alignment on
-- some of the C types that correspond to TYPALIGN_DOUBLE SQL types. To ensure
-- catalog C struct layout matches catalog tuple layout, arrange for the tuple
-- offset of each fixed-width, attalign='d' catalog column to be divisible by 8
-- unconditionally. Keep such columns before the first NameData column of the
-- catalog, since packagers can override NAMEDATALEN to an odd number.
--
WITH check_columns AS (
SELECT relname, attname,
array(
SELECT t.oid
FROM pg_type t JOIN pg_attribute pa ON t.oid = pa.atttypid
WHERE pa.attrelid = a.attrelid AND
pa.attnum > 0 AND pa.attnum <= a.attnum
ORDER BY pa.attnum) AS coltypes
FROM pg_attribute a JOIN pg_class c ON c.oid = attrelid
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE attalign = 'd' AND relkind = 'r' AND
attnotnull AND attlen <> -1 AND n.nspname = 'pg_catalog'
)
SELECT relname, attname, coltypes, get_column_offset(coltypes)
FROM check_columns
WHERE get_column_offset(coltypes) % 8 != 0 OR
'name'::regtype::oid = ANY(coltypes);

View File

@ -253,6 +253,11 @@ CREATE FUNCTION ttdummy ()
AS :'regresslib'
LANGUAGE C;
CREATE FUNCTION get_column_offset (oid[])
RETURNS int
AS :'regresslib'
LANGUAGE C STRICT STABLE PARALLEL SAFE;
-- Use hand-rolled hash functions and operator classes to get predictable
-- result on different machines. The hash function for int4 simply returns
-- the sum of the values passed to it and the one for text returns the length