From 79b716cfb7a1be2a61ebb4418099db1258f35e30 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Thu, 7 Apr 2022 09:39:25 +0530 Subject: [PATCH] Reorder subskiplsn in pg_subscription to avoid alignment issues. The column 'subskiplsn' uses TYPALIGN_DOUBLE (which has 4 bytes alignment on AIX) for storage. But the C Struct (Form_pg_subscription) has 8-byte alignment for this field, so retrieving it from storage causes an unaligned read. To fix this, we rearranged the 'subskiplsn' column in the catalog so that it naturally comes at an 8-byte boundary. We have fixed a similar problem in commit f3b421da5f. This patch adds a test to avoid a similar mistake in the future. Reported-by: Noah Misch Diagnosed-by: Noah Misch, Masahiko Sawada, Amit Kapila Author: Masahiko Sawada Reviewed-by: Noah Misch, Amit Kapila Discussion: https://postgr.es/m/20220401074423.GC3682158@rfd.leadboat.com https://postgr.es/m/CAD21AoDeScrsHhLyEPYqN3sydg6PxAPVBboK=30xJfUVihNZDA@mail.gmail.com --- doc/src/sgml/catalogs.sgml | 20 +++++----- src/backend/catalog/pg_subscription.c | 2 +- src/backend/catalog/system_views.sql | 4 +- src/backend/commands/subscriptioncmds.c | 2 +- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_subscription.h | 11 +++--- src/test/regress/expected/sanity_check.out | 29 ++++++++++++++ src/test/regress/expected/test_setup.out | 4 ++ src/test/regress/regress.c | 45 ++++++++++++++++++++++ src/test/regress/sql/sanity_check.sql | 26 +++++++++++++ src/test/regress/sql/test_setup.sql | 5 +++ 11 files changed, 130 insertions(+), 20 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 298de74af4..646ab74d04 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -7823,6 +7823,16 @@ SCRAM-SHA-256$<iteration count>:&l + + + subskiplsn pg_lsn + + + Finish LSN of the transaction whose changes are to be skipped, if a valid + LSN; otherwise 0/0. + + + subname name @@ -7893,16 +7903,6 @@ SCRAM-SHA-256$<iteration count>:&l - - - subskiplsn pg_lsn - - - Finish LSN of the transaction whose changes are to be skipped, if a valid - LSN; otherwise 0/0. - - - subconninfo text diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c index 0ff0982f7b..add51caadf 100644 --- a/src/backend/catalog/pg_subscription.c +++ b/src/backend/catalog/pg_subscription.c @@ -63,6 +63,7 @@ GetSubscription(Oid subid, bool missing_ok) sub = (Subscription *) palloc(sizeof(Subscription)); sub->oid = subid; sub->dbid = subform->subdbid; + sub->skiplsn = subform->subskiplsn; sub->name = pstrdup(NameStr(subform->subname)); sub->owner = subform->subowner; sub->enabled = subform->subenabled; @@ -70,7 +71,6 @@ GetSubscription(Oid subid, bool missing_ok) sub->stream = subform->substream; sub->twophasestate = subform->subtwophasestate; sub->disableonerr = subform->subdisableonerr; - sub->skiplsn = subform->subskiplsn; /* Get conninfo */ datum = SysCacheGetAttr(SUBSCRIPTIONOID, diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 9eaa51df29..e701d1c676 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1285,8 +1285,8 @@ REVOKE ALL ON pg_replication_origin_status FROM public; -- All columns of pg_subscription except subconninfo are publicly readable. REVOKE ALL ON pg_subscription FROM public; -GRANT SELECT (oid, subdbid, subname, subowner, subenabled, subbinary, - substream, subtwophasestate, subdisableonerr, subskiplsn, subslotname, +GRANT SELECT (oid, subdbid, subskiplsn, subname, subowner, subenabled, + subbinary, substream, subtwophasestate, subdisableonerr, subslotname, subsynccommit, subpublications) ON pg_subscription TO public; diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index 83192dbd51..057ab4b6a3 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -596,6 +596,7 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt, Anum_pg_subscription_oid); values[Anum_pg_subscription_oid - 1] = ObjectIdGetDatum(subid); values[Anum_pg_subscription_subdbid - 1] = ObjectIdGetDatum(MyDatabaseId); + values[Anum_pg_subscription_subskiplsn - 1] = LSNGetDatum(InvalidXLogRecPtr); values[Anum_pg_subscription_subname - 1] = DirectFunctionCall1(namein, CStringGetDatum(stmt->subname)); values[Anum_pg_subscription_subowner - 1] = ObjectIdGetDatum(owner); @@ -607,7 +608,6 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt, LOGICALREP_TWOPHASE_STATE_PENDING : LOGICALREP_TWOPHASE_STATE_DISABLED); values[Anum_pg_subscription_subdisableonerr - 1] = BoolGetDatum(opts.disableonerr); - values[Anum_pg_subscription_subskiplsn - 1] = LSNGetDatum(InvalidXLogRecPtr); values[Anum_pg_subscription_subconninfo - 1] = CStringGetTextDatum(conninfo); if (opts.slot_name) diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 9cf5ffb6ff..b6742b12c5 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202204062 +#define CATALOG_VERSION_NO 202204071 #endif diff --git a/src/include/catalog/pg_subscription.h b/src/include/catalog/pg_subscription.h index 599c2e4422..f006a92612 100644 --- a/src/include/catalog/pg_subscription.h +++ b/src/include/catalog/pg_subscription.h @@ -54,6 +54,10 @@ CATALOG(pg_subscription,6100,SubscriptionRelationId) BKI_SHARED_RELATION BKI_ROW Oid subdbid BKI_LOOKUP(pg_database); /* Database the * subscription is in. */ + + XLogRecPtr subskiplsn; /* All changes finished at this LSN are + * skipped */ + NameData subname; /* Name of the subscription */ Oid subowner BKI_LOOKUP(pg_authid); /* Owner of the subscription */ @@ -71,9 +75,6 @@ CATALOG(pg_subscription,6100,SubscriptionRelationId) BKI_SHARED_RELATION BKI_ROW bool subdisableonerr; /* True if a worker error should cause the * subscription to be disabled */ - XLogRecPtr subskiplsn; /* All changes finished at this LSN are - * skipped */ - #ifdef CATALOG_VARLEN /* variable-length fields start here */ /* Connection string to the publisher */ text subconninfo BKI_FORCE_NOT_NULL; @@ -103,6 +104,8 @@ typedef struct Subscription Oid oid; /* Oid of the subscription */ Oid dbid; /* Oid of the database which subscription is * in */ + XLogRecPtr skiplsn; /* All changes finished at this LSN are + * skipped */ char *name; /* Name of the subscription */ Oid owner; /* Oid of the subscription owner */ bool enabled; /* Indicates if the subscription is enabled */ @@ -113,8 +116,6 @@ typedef struct Subscription bool disableonerr; /* Indicates if the subscription should be * automatically disabled if a worker error * occurs */ - XLogRecPtr skiplsn; /* All changes finished at this LSN are - * skipped */ char *conninfo; /* Connection string to the publisher */ char *slotname; /* Name of the replication slot */ char *synccommit; /* Synchronous commit setting for worker */ diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out index 8370c1561c..a2faefb4c0 100644 --- a/src/test/regress/expected/sanity_check.out +++ b/src/test/regress/expected/sanity_check.out @@ -25,3 +25,32 @@ SELECT relname, relkind ---------+--------- (0 rows) +-- +-- When ALIGNOF_DOUBLE==4 (e.g. AIX), the C ABI may impose 8-byte alignment on +-- some of the C types that correspond to TYPALIGN_DOUBLE SQL types. To ensure +-- catalog C struct layout matches catalog tuple layout, arrange for the tuple +-- offset of each fixed-width, attalign='d' catalog column to be divisible by 8 +-- unconditionally. Keep such columns before the first NameData column of the +-- catalog, since packagers can override NAMEDATALEN to an odd number. +-- +WITH check_columns AS ( + SELECT relname, attname, + array( + SELECT t.oid + FROM pg_type t JOIN pg_attribute pa ON t.oid = pa.atttypid + WHERE pa.attrelid = a.attrelid AND + pa.attnum > 0 AND pa.attnum <= a.attnum + ORDER BY pa.attnum) AS coltypes + FROM pg_attribute a JOIN pg_class c ON c.oid = attrelid + JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE attalign = 'd' AND relkind = 'r' AND + attnotnull AND attlen <> -1 AND n.nspname = 'pg_catalog' +) +SELECT relname, attname, coltypes, get_column_offset(coltypes) + FROM check_columns + WHERE get_column_offset(coltypes) % 8 != 0 OR + 'name'::regtype::oid = ANY(coltypes); + relname | attname | coltypes | get_column_offset +---------+---------+----------+------------------- +(0 rows) + diff --git a/src/test/regress/expected/test_setup.out b/src/test/regress/expected/test_setup.out index a9d0de3dea..8b8ba7d778 100644 --- a/src/test/regress/expected/test_setup.out +++ b/src/test/regress/expected/test_setup.out @@ -206,6 +206,10 @@ CREATE FUNCTION ttdummy () RETURNS trigger AS :'regresslib' LANGUAGE C; +CREATE FUNCTION get_column_offset (oid[]) + RETURNS int + AS :'regresslib' + LANGUAGE C STRICT STABLE PARALLEL SAFE; -- Use hand-rolled hash functions and operator classes to get predictable -- result on different machines. The hash function for int4 simply returns -- the sum of the values passed to it and the one for text returns the length diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index 0802fb9136..8b0c2d9d68 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -41,6 +41,7 @@ #include "storage/spin.h" #include "utils/builtins.h" #include "utils/geo_decls.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/typcache.h" @@ -1216,3 +1217,47 @@ binary_coercible(PG_FUNCTION_ARGS) PG_RETURN_BOOL(IsBinaryCoercible(srctype, targettype)); } + +/* + * Return the column offset of the last data in the given array of + * data types. The input data types must be fixed-length data types. + */ +PG_FUNCTION_INFO_V1(get_column_offset); +Datum +get_column_offset(PG_FUNCTION_ARGS) +{ + ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); + Oid *type_oids; + int ntypes; + int column_offset = 0; + + if (ARR_HASNULL(ta) && array_contains_nulls(ta)) + elog(ERROR, "argument must not contain nulls"); + + if (ARR_NDIM(ta) > 1) + elog(ERROR, "argument must be empty or one-dimensional array"); + + type_oids = (Oid *) ARR_DATA_PTR(ta); + ntypes = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta)); + for (int i = 0; i < ntypes; i++) + { + Oid typeoid = type_oids[i]; + int16 typlen; + bool typbyval; + char typalign; + + get_typlenbyvalalign(typeoid, &typlen, &typbyval, &typalign); + + /* the data type must be fixed-length */ + if (!(typbyval || (typlen > 0))) + elog(ERROR, "type %u is not fixed-length data type", typeoid); + + column_offset = att_align_nominal(column_offset, typalign); + + /* not include the last type size */ + if (i != (ntypes - 1)) + column_offset += typlen; + } + + PG_RETURN_INT32(column_offset); +} diff --git a/src/test/regress/sql/sanity_check.sql b/src/test/regress/sql/sanity_check.sql index 162e5324b5..c70ff781fa 100644 --- a/src/test/regress/sql/sanity_check.sql +++ b/src/test/regress/sql/sanity_check.sql @@ -19,3 +19,29 @@ SELECT relname, relkind FROM pg_class WHERE relkind IN ('v', 'c', 'f', 'p', 'I') AND relfilenode <> 0; + +-- +-- When ALIGNOF_DOUBLE==4 (e.g. AIX), the C ABI may impose 8-byte alignment on +-- some of the C types that correspond to TYPALIGN_DOUBLE SQL types. To ensure +-- catalog C struct layout matches catalog tuple layout, arrange for the tuple +-- offset of each fixed-width, attalign='d' catalog column to be divisible by 8 +-- unconditionally. Keep such columns before the first NameData column of the +-- catalog, since packagers can override NAMEDATALEN to an odd number. +-- +WITH check_columns AS ( + SELECT relname, attname, + array( + SELECT t.oid + FROM pg_type t JOIN pg_attribute pa ON t.oid = pa.atttypid + WHERE pa.attrelid = a.attrelid AND + pa.attnum > 0 AND pa.attnum <= a.attnum + ORDER BY pa.attnum) AS coltypes + FROM pg_attribute a JOIN pg_class c ON c.oid = attrelid + JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE attalign = 'd' AND relkind = 'r' AND + attnotnull AND attlen <> -1 AND n.nspname = 'pg_catalog' +) +SELECT relname, attname, coltypes, get_column_offset(coltypes) + FROM check_columns + WHERE get_column_offset(coltypes) % 8 != 0 OR + 'name'::regtype::oid = ANY(coltypes); diff --git a/src/test/regress/sql/test_setup.sql b/src/test/regress/sql/test_setup.sql index 1f3f2f1724..fbceb8cb46 100644 --- a/src/test/regress/sql/test_setup.sql +++ b/src/test/regress/sql/test_setup.sql @@ -253,6 +253,11 @@ CREATE FUNCTION ttdummy () AS :'regresslib' LANGUAGE C; +CREATE FUNCTION get_column_offset (oid[]) + RETURNS int + AS :'regresslib' + LANGUAGE C STRICT STABLE PARALLEL SAFE; + -- Use hand-rolled hash functions and operator classes to get predictable -- result on different machines. The hash function for int4 simply returns -- the sum of the values passed to it and the one for text returns the length